From f8340f7d1da4b77baa3f2498f2880bcf2b76981d Mon Sep 17 00:00:00 2001 From: Vijayanand Nandam Date: Mon, 18 Mar 2019 23:04:49 +0530 Subject: [PATCH 1/5] fixes spankwire --- youtube_dl/extractor/spankwire.py | 67 +++++++++++-------------------- 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 44d8fa52f..85295fca2 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -10,6 +10,7 @@ from ..compat import ( from ..utils import ( sanitized_Request, str_to_int, + int_or_none, unified_strdate, ) from ..aes import aes_decrypt_text @@ -54,62 +55,40 @@ class SpankwireIE(InfoExtractor): req = sanitized_Request('http://www.' + mobj.group('url')) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) + + json_req = sanitized_Request('https://www.spankwire.com/api/video/'+video_id+'.json') + video_data = self._download_json(json_req, video_id) - title = self._html_search_regex( - r'

([^<]+)', webpage, 'title') - description = self._html_search_regex( - r'(?s)(.+?)', - webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', - webpage, 'thumbnail', fatal=False) + title = video_data['title'] + description = video_data['description'] + thumbnail = video_data['poster'] - uploader = self._html_search_regex( - r'by:\s*]*>(.+?)', - webpage, 'uploader', fatal=False) + uploader = self._search_regex( + r']+class="uploaded__by"[^>]*>(.+?)', + webpage, 'uploader', flags=re.DOTALL, fatal=False) uploader_id = self._html_search_regex( - r'by:\s* on (.+?) at \d+:\d+', + # r' on (.+?) at \d+:\d+', + r'(.+?) at \d+:\d+ (AM|PM) by', webpage, 'upload date', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'
([\d,\.]+) views
', - webpage, 'view count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r']*>([\d,\.]+)', - webpage, 'comment count', fatal=False)) - - videos = re.findall( - r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) - heights = [int(video[0]) for video in videos] - video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos])) - if webpage.find(r'flashvars\.encrypted = "true"') != -1: - password = self._search_regex( - r'flashvars\.video_title = "([^"]+)', - webpage, 'password').replace('+', ' ') - video_urls = list(map( - lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), - video_urls)) + view_count = int_or_none(video_data['viewed']) + comment_count = int_or_none(video_data['comments']) + formats = [] - for height, video_url in zip(heights, video_urls): - path = compat_urllib_parse_urlparse(video_url).path - m = re.search(r'/(?P\d+)[pP]_(?P\d+)[kK]', path) - if m: - tbr = int(m.group('tbr')) - height = int(m.group('height')) - else: - tbr = None + videos = video_data['videos'] + for quality, video_url in videos.items(): + height = quality.split('_')[1].replace('p','') + self.to_screen(height) formats.append({ 'url': video_url, - 'format_id': '%dp' % height, - 'height': height, - 'tbr': tbr, + 'format_id': quality, + 'height': int_or_none(height), + 'tbr': None }) - self._sort_formats(formats) - age_limit = self._rta_search(webpage) return { From 083945d0715ef8199ff0a6192fda6663d79faac8 Mon Sep 17 00:00:00 2001 From: Vijayanand Nandam Date: Mon, 18 Mar 2019 23:12:34 +0530 Subject: [PATCH 2/5] lint by flake8 --- youtube_dl/extractor/spankwire.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 85295fca2..f3214856b 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -55,8 +55,7 @@ class SpankwireIE(InfoExtractor): req = sanitized_Request('http://www.' + mobj.group('url')) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) - - json_req = sanitized_Request('https://www.spankwire.com/api/video/'+video_id+'.json') + json_req = sanitized_Request('https://www.spankwire.com/api/video/' + video_id + '.json') video_data = self._download_json(json_req, video_id) title = video_data['title'] @@ -77,11 +76,10 @@ class SpankwireIE(InfoExtractor): view_count = int_or_none(video_data['viewed']) comment_count = int_or_none(video_data['comments']) - formats = [] videos = video_data['videos'] for quality, video_url in videos.items(): - height = quality.split('_')[1].replace('p','') + height = quality.split('_')[1].replace('p', '') self.to_screen(height) formats.append({ 'url': video_url, From 8dbab4fd012a000e77d7f1a58e87cc3ca3688c31 Mon Sep 17 00:00:00 2001 From: Vijayanand Nandam Date: Mon, 18 Mar 2019 23:16:02 +0530 Subject: [PATCH 3/5] resolves code review --- youtube_dl/extractor/spankwire.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index f3214856b..a8178381c 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -55,12 +55,11 @@ class SpankwireIE(InfoExtractor): req = sanitized_Request('http://www.' + mobj.group('url')) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) - json_req = sanitized_Request('https://www.spankwire.com/api/video/' + video_id + '.json') - video_data = self._download_json(json_req, video_id) + video_data = self._download_json(sanitized_Request('https://www.spankwire.com/api/video/' + video_id + '.json'), video_id) - title = video_data['title'] - description = video_data['description'] - thumbnail = video_data['poster'] + title = video_data.get('title') + description = video_data.get('description') + thumbnail = video_data.get('poster') uploader = self._search_regex( r']+class="uploaded__by"[^>]*>(.+?)', @@ -73,12 +72,11 @@ class SpankwireIE(InfoExtractor): r'(.+?) at \d+:\d+ (AM|PM) by', webpage, 'upload date', fatal=False)) - view_count = int_or_none(video_data['viewed']) - comment_count = int_or_none(video_data['comments']) + view_count = int_or_none(video_data.get('viewed')) + comment_count = int_or_none(video_data.get('comments')) formats = [] - videos = video_data['videos'] - for quality, video_url in videos.items(): + for quality, video_url in video_data.get('videos').items(): height = quality.split('_')[1].replace('p', '') self.to_screen(height) formats.append({ From eeac2ca31cef4477942cc635b62b4a0e14a28e43 Mon Sep 17 00:00:00 2001 From: Vijayanand Nandam Date: Mon, 18 Mar 2019 23:21:32 +0530 Subject: [PATCH 4/5] removes commented out regex --- youtube_dl/extractor/spankwire.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index a8178381c..df2fc1ec8 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -68,7 +68,6 @@ class SpankwireIE(InfoExtractor): r'by\s* on (.+?) at \d+:\d+', r'(.+?) at \d+:\d+ (AM|PM) by', webpage, 'upload date', fatal=False)) From 23da8566c5db49287ccfa8fc59b1f00d1486dd50 Mon Sep 17 00:00:00 2001 From: Vijayanand Nandam Date: Thu, 21 Mar 2019 15:08:59 +0530 Subject: [PATCH 5/5] resolves comments --- youtube_dl/extractor/spankwire.py | 50 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index df2fc1ec8..87ec595d4 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -3,17 +3,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) from ..utils import ( sanitized_Request, - str_to_int, int_or_none, + str_to_int, unified_strdate, ) -from ..aes import aes_decrypt_text class SpankwireIE(InfoExtractor): @@ -48,6 +43,25 @@ class SpankwireIE(InfoExtractor): }, }] + _MEDIA_FILE_SLOTS = { + 'quality_180p': { + 'width': 320, + 'height': 180, + }, + 'quality_240p': { + 'width': 426, + 'height': 240, + }, + 'quality_480p': { + 'width': 854, + 'height': 480, + }, + 'quality_720p': { + 'width': 1280, + 'height': 720, + }, + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -55,15 +69,15 @@ class SpankwireIE(InfoExtractor): req = sanitized_Request('http://www.' + mobj.group('url')) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) - video_data = self._download_json(sanitized_Request('https://www.spankwire.com/api/video/' + video_id + '.json'), video_id) + video_data = self._download_json('https://www.spankwire.com/api/video/' + video_id + '.json', video_id) - title = video_data.get('title') + title = video_data['title'] description = video_data.get('description') thumbnail = video_data.get('poster') - uploader = self._search_regex( - r']+class="uploaded__by"[^>]*>(.+?)', - webpage, 'uploader', flags=re.DOTALL, fatal=False) + uploader = self._html_search_regex( + r'(?s)]+class="uploaded__by"[^>]*>(.+?)', + webpage, 'uploader', fatal=False) uploader_id = self._html_search_regex( r'by\s*(.+?) at \d+:\d+ (AM|PM) by', webpage, 'upload date', fatal=False)) - view_count = int_or_none(video_data.get('viewed')) + view_count = str_to_int(video_data.get('viewed')) comment_count = int_or_none(video_data.get('comments')) formats = [] - for quality, video_url in video_data.get('videos').items(): - height = quality.split('_')[1].replace('p', '') - self.to_screen(height) + for quality, video_url in video_data['videos'].items(): + resolution = self._MEDIA_FILE_SLOTS.get(quality) + if not resolution: + continue + formats.append({ 'url': video_url, 'format_id': quality, - 'height': int_or_none(height), - 'tbr': None + 'height': resolution.get('height'), + 'width': resolution.get('width'), }) age_limit = self._rta_search(webpage)