From 9dd47ebbd7cbb341483c26031a72229cace1a461 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:01:20 +0100 Subject: [PATCH 01/10] - added vidello extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidello.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/vidello.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2a4893fe..40a66ea03 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1206,6 +1206,7 @@ from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE +from .vidello import VidelloIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py new file mode 100644 index 000000000..cdbd0da85 --- /dev/null +++ b/youtube_dl/extractor/vidello.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + clean_html +) + +class VidelloIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?embed.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player.html' + _TEST = { + 'url': 'https://embed.vidello.com/2/t1umm637xb1ylgw4/player.html', + 'md5': '7a4d76ac74ef7724af4c6c3ecb5e0042', + 'info_dict': { + 'id': 't1umm637xb1ylgw4', + 'ext': 'mp4', + 'title': 'Vidello Hosting & Marketing', + 'description': "Start marketing your videos more effectively on \x03the web utilising vidello's premium hosting, streaming, \x03analytics & marketing features to grow your \x03online business fast." + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + vidello_settings = self._parse_json(self._search_regex( + r'vidello_'+video_id+'_settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) + + video_url = "" + video_sources = vidello_settings.get('player').get('clip').get('sources') or {} + for curr_entry in video_sources: + if curr_entry['type'] == "video/mp4": + video_url = "http://"+curr_entry["src"][2:] + title = vidello_settings.get('cta')[0].get('values').get('product_title') + description = clean_html(vidello_settings.get('cta')[0].get('values').get('product_desc')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'url': video_url + } From 8ef2625c2e1057effe027dac8831db9b3a4cb38c Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:03:33 +0100 Subject: [PATCH 02/10] - flake8 --- youtube_dl/extractor/vidello.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index cdbd0da85..f547f0d2a 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -7,6 +7,7 @@ from ..utils import ( clean_html ) + class VidelloIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?embed.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player.html' _TEST = { @@ -25,13 +26,13 @@ class VidelloIE(InfoExtractor): webpage = self._download_webpage(url, video_id) vidello_settings = self._parse_json(self._search_regex( - r'vidello_'+video_id+'_settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) + r'vidello_' + video_id + '_settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) video_url = "" video_sources = vidello_settings.get('player').get('clip').get('sources') or {} for curr_entry in video_sources: if curr_entry['type'] == "video/mp4": - video_url = "http://"+curr_entry["src"][2:] + video_url = "http://" + curr_entry["src"][2:] title = vidello_settings.get('cta')[0].get('values').get('product_title') description = clean_html(vidello_settings.get('cta')[0].get('values').get('product_desc')) From f9990314efe28f1c50a009559bbfb23af83ca323 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:11:21 +0100 Subject: [PATCH 03/10] - changed title gathering since it is mandatory/expected --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index f547f0d2a..7fdd0ebac 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -33,7 +33,7 @@ class VidelloIE(InfoExtractor): for curr_entry in video_sources: if curr_entry['type'] == "video/mp4": video_url = "http://" + curr_entry["src"][2:] - title = vidello_settings.get('cta')[0].get('values').get('product_title') + title = vidello_settings['cta'][0]['values']['product_title'] description = clean_html(vidello_settings.get('cta')[0].get('values').get('product_desc')) return { From dfa72918849324d5e5abebbf56109ec2a476b83a Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:13:06 +0100 Subject: [PATCH 04/10] - changed video settings to be mandatory --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index 7fdd0ebac..958507278 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -29,7 +29,7 @@ class VidelloIE(InfoExtractor): r'vidello_' + video_id + '_settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) video_url = "" - video_sources = vidello_settings.get('player').get('clip').get('sources') or {} + video_sources = vidello_settings['player']['clip']['sources'] for curr_entry in video_sources: if curr_entry['type'] == "video/mp4": video_url = "http://" + curr_entry["src"][2:] From 2fa1d205c7170952d4bb9e5f095df6eb15655422 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:23:37 +0100 Subject: [PATCH 05/10] - relaxed regex --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index 958507278..3fab4fed3 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -26,7 +26,7 @@ class VidelloIE(InfoExtractor): webpage = self._download_webpage(url, video_id) vidello_settings = self._parse_json(self._search_regex( - r'vidello_' + video_id + '_settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) + r'var\s*.+' + video_id + '[^=]+=\s*({.+});', webpage, 'vidello settings'), video_id) video_url = "" video_sources = vidello_settings['player']['clip']['sources'] From 91ccc8836279a6897c9f33c75b2464ff7e3cd9c7 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:31:32 +0100 Subject: [PATCH 06/10] - removed video_id from regex --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index 3fab4fed3..a53dbeab1 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -26,7 +26,7 @@ class VidelloIE(InfoExtractor): webpage = self._download_webpage(url, video_id) vidello_settings = self._parse_json(self._search_regex( - r'var\s*.+' + video_id + '[^=]+=\s*({.+});', webpage, 'vidello settings'), video_id) + r'settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) video_url = "" video_sources = vidello_settings['player']['clip']['sources'] From 72d7391d95aaccd02b7e35c545ffd6f9a3259582 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:52:41 +0100 Subject: [PATCH 07/10] - fixed escape of dot - changed regex --- youtube_dl/extractor/vidello.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index a53dbeab1..801870509 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -9,7 +9,7 @@ from ..utils import ( class VidelloIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?embed.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player.html' + _VALID_URL = r'https?://(?:www\.)?embed\.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player.html' _TEST = { 'url': 'https://embed.vidello.com/2/t1umm637xb1ylgw4/player.html', 'md5': '7a4d76ac74ef7724af4c6c3ecb5e0042', @@ -26,7 +26,7 @@ class VidelloIE(InfoExtractor): webpage = self._download_webpage(url, video_id) vidello_settings = self._parse_json(self._search_regex( - r'settings\s*=\s*({.+});', webpage, 'vidello settings'), video_id) + r'settings=({.+});', webpage, 'vidello settings'), video_id) video_url = "" video_sources = vidello_settings['player']['clip']['sources'] From 8041289f19f6adae70488ef16cd511a017494f4c Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 20:55:06 +0100 Subject: [PATCH 08/10] - changed greedy to lazy --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index 801870509..2d772a155 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -26,7 +26,7 @@ class VidelloIE(InfoExtractor): webpage = self._download_webpage(url, video_id) vidello_settings = self._parse_json(self._search_regex( - r'settings=({.+});', webpage, 'vidello settings'), video_id) + r'settings\s*=\s*({.+?});', webpage, 'vidello settings'), video_id) video_url = "" video_sources = vidello_settings['player']['clip']['sources'] From 79e006584ff272b9a16223a0e53f953a37df2313 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Fri, 23 Feb 2018 21:26:45 +0100 Subject: [PATCH 09/10] - changed " to ' --- youtube_dl/extractor/vidello.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index 2d772a155..f2eb3bb02 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -28,11 +28,11 @@ class VidelloIE(InfoExtractor): vidello_settings = self._parse_json(self._search_regex( r'settings\s*=\s*({.+?});', webpage, 'vidello settings'), video_id) - video_url = "" + video_url = '' video_sources = vidello_settings['player']['clip']['sources'] for curr_entry in video_sources: - if curr_entry['type'] == "video/mp4": - video_url = "http://" + curr_entry["src"][2:] + if curr_entry['type'] == 'video/mp4': + video_url = 'http://' + curr_entry['src'][2:] title = vidello_settings['cta'][0]['values']['product_title'] description = clean_html(vidello_settings.get('cta')[0].get('values').get('product_desc')) From 026d8c9ee9d6e1e4217a1d3e4d51a480c0770afc Mon Sep 17 00:00:00 2001 From: carsten demming Date: Sat, 3 Mar 2018 14:23:30 +0100 Subject: [PATCH 10/10] escaped dots --- youtube_dl/extractor/vidello.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidello.py b/youtube_dl/extractor/vidello.py index f2eb3bb02..17840494d 100644 --- a/youtube_dl/extractor/vidello.py +++ b/youtube_dl/extractor/vidello.py @@ -9,7 +9,7 @@ from ..utils import ( class VidelloIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?embed\.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player.html' + _VALID_URL = r'https?://(?:www\.)?embed\.vidello\.com/[0-9]/(?P[a-zA-Z0-9]+)/player\.html' _TEST = { 'url': 'https://embed.vidello.com/2/t1umm637xb1ylgw4/player.html', 'md5': '7a4d76ac74ef7724af4c6c3ecb5e0042',