From 17c35221ba2e87d5638c3baf83a7ada2aeafa8ae Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 10 Apr 2018 18:23:38 -0500 Subject: [PATCH 1/8] [KeezMovies] Correctly fetch video formats --- youtube_dl/extractor/keezmovies.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index e83115e2a..30e451065 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -55,7 +55,7 @@ class KeezMoviesIE(InfoExtractor): encrypted = False def extract_format(format_url, height=None): - if not isinstance(format_url, compat_str) or not format_url.startswith('http'): + if not isinstance(format_url, compat_str): return if format_url in format_urls: return @@ -105,7 +105,8 @@ class KeezMoviesIE(InfoExtractor): raise ExtractorError( 'Video %s is no longer available' % video_id, expected=True) - self._sort_formats(formats) + if len(formats) > 0: + self._sort_formats(formats) if not title: title = self._html_search_regex( @@ -123,6 +124,10 @@ class KeezMoviesIE(InfoExtractor): def _real_extract(self, url): webpage, info = self._extract_info(url) + if len(info['formats']) == 0: + embed_url = self._search_regex( + r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) return info From ecd4c1ddf64ec46dea68862471bc2f145d0c5c8d Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 10 Apr 2018 18:34:41 -0500 Subject: [PATCH 2/8] [KeezMovies] Replaced 404 test --- youtube_dl/extractor/keezmovies.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 30e451065..b031ce4ea 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -20,19 +20,19 @@ from ..utils import ( class KeezMoviesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P[^/]+)-)?(?P\d+)' _TESTS = [{ - 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', - 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0', + 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681', + 'md5': '2ac69cdb882055f71d82db4311732a1a', 'info_dict': { - 'id': '1214711', - 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', + 'id': '18070681', + 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money', 'ext': 'mp4', - 'title': 'Petite Asian Lady Mai Playing In Bathtub', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.', + 'thumbnail': None, 'view_count': int, 'age_limit': 18, } }, { - 'url': 'http://www.keezmovies.com/video/1214711', + 'url': 'http://www.keezmovies.com/video/18070681', 'only_matching': True, }] From c8c602469027eaec0473197936eb5975f79230b6 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 12 Apr 2018 11:07:12 -0500 Subject: [PATCH 3/8] [KeezMovies] Applied requested fixes for PR #16154 --- youtube_dl/extractor/keezmovies.py | 41 +++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index b031ce4ea..43bfdc2c1 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -36,16 +36,22 @@ class KeezMoviesIE(InfoExtractor): 'only_matching': True, }] - def _extract_info(self, url): + def _get_ids(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = (mobj.group('display_id') if 'display_id' in mobj.groupdict() else None) or mobj.group('id') + return video_id, display_id - webpage = self._download_webpage( + def _download_webpage_age_verified(self, url, display_id): + return self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) + def _extract_info(self, url, require_http_prefix=True): + video_id, display_id = self._get_ids(url) + webpage = self._download_webpage_age_verified(url, display_id) + formats = [] format_urls = set() @@ -54,9 +60,13 @@ class KeezMoviesIE(InfoExtractor): duration = None encrypted = False - def extract_format(format_url, height=None): - if not isinstance(format_url, compat_str): - return + def extract_format(format_url, height=None, require_http_prefix=True): + if require_http_prefix: + if not isinstance(format_url, compat_str) or not format_url.startswith('http'): + return + else: + if not isinstance(format_url, compat_str): + return if format_url in format_urls: return format_urls.add(format_url) @@ -89,24 +99,23 @@ class KeezMoviesIE(InfoExtractor): for key, value in flashvars.items(): mobj = re.search(r'quality_(\d+)[pP]', key) if mobj: - extract_format(value, int(mobj.group(1))) + extract_format(value, int(mobj.group(1)), require_http_prefix=require_http_prefix) video_url = flashvars.get('video_url') if video_url and determine_ext(video_url, None): - extract_format(video_url) + extract_format(video_url, require_http_prefix=require_http_prefix) video_url = self._html_search_regex( r'flashvars\.video_url\s*=\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', default=None, group='url') if video_url: - extract_format(compat_urllib_parse_unquote(video_url)) + extract_format(compat_urllib_parse_unquote(video_url), require_http_prefix=require_http_prefix) if not formats: if 'title="This video is no longer available"' in webpage: raise ExtractorError( 'Video %s is no longer available' % video_id, expected=True) - if len(formats) > 0: - self._sort_formats(formats) + self._sort_formats(formats) if not title: title = self._html_search_regex( @@ -123,11 +132,13 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - webpage, info = self._extract_info(url) - if len(info['formats']) == 0: - embed_url = self._search_regex( - r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) return info From 37168ccb64c8c609c44886c03c2d81c168d2cb48 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 12 Apr 2018 12:10:32 -0500 Subject: [PATCH 4/8] [KeezMovies] Use default None for embedded url regex --- youtube_dl/extractor/keezmovies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 43bfdc2c1..1567fe956 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -135,7 +135,7 @@ class KeezMoviesIE(InfoExtractor): video_id, display_id = self._get_ids(url) webpage = self._download_webpage_age_verified(url, display_id) embed_url = self._search_regex( - r' Date: Tue, 17 Apr 2018 12:52:49 -0500 Subject: [PATCH 5/8] [KeezMovies] Applied second requested fixes for PR #16154 --- youtube_dl/extractor/common.py | 9 +++++-- youtube_dl/extractor/keezmovies.py | 40 +++++++++++------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 59b9d3739..0a3776a43 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1078,9 +1078,14 @@ class InfoExtractor(object): html, '%s form' % form_id, group='form') return self._hidden_inputs(form) - def _sort_formats(self, formats, field_preference=None): + def _sort_formats(self, formats, field_preference=None, fatal=True): if not formats: - raise ExtractorError('No video formats found') + msg = 'No video formats found' + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return None for f in formats: # Automatically determine tbr when missing based on abr and vbr (improves diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 1567fe956..a8b4ddcfb 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -36,22 +36,16 @@ class KeezMoviesIE(InfoExtractor): 'only_matching': True, }] - def _get_ids(self, url): + def _extract_info(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = (mobj.group('display_id') if 'display_id' in mobj.groupdict() else None) or mobj.group('id') - return video_id, display_id - def _download_webpage_age_verified(self, url, display_id): - return self._download_webpage( + webpage = self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) - def _extract_info(self, url, require_http_prefix=True): - video_id, display_id = self._get_ids(url) - webpage = self._download_webpage_age_verified(url, display_id) - formats = [] format_urls = set() @@ -60,13 +54,9 @@ class KeezMoviesIE(InfoExtractor): duration = None encrypted = False - def extract_format(format_url, height=None, require_http_prefix=True): - if require_http_prefix: - if not isinstance(format_url, compat_str) or not format_url.startswith('http'): - return - else: - if not isinstance(format_url, compat_str): - return + def extract_format(format_url, height=None): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')): + return if format_url in format_urls: return format_urls.add(format_url) @@ -99,23 +89,23 @@ class KeezMoviesIE(InfoExtractor): for key, value in flashvars.items(): mobj = re.search(r'quality_(\d+)[pP]', key) if mobj: - extract_format(value, int(mobj.group(1)), require_http_prefix=require_http_prefix) + extract_format(value, int(mobj.group(1))) video_url = flashvars.get('video_url') if video_url and determine_ext(video_url, None): - extract_format(video_url, require_http_prefix=require_http_prefix) + extract_format(video_url) video_url = self._html_search_regex( r'flashvars\.video_url\s*=\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', default=None, group='url') if video_url: - extract_format(compat_urllib_parse_unquote(video_url), require_http_prefix=require_http_prefix) + extract_format(compat_urllib_parse_unquote(video_url)) if not formats: if 'title="This video is no longer available"' in webpage: raise ExtractorError( 'Video %s is no longer available' % video_id, expected=True) - self._sort_formats(formats) + self._sort_formats(formats, fatal=False) if not title: title = self._html_search_regex( @@ -132,13 +122,11 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - video_id, display_id = self._get_ids(url) - webpage = self._download_webpage_age_verified(url, display_id) - embed_url = self._search_regex( - r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) return info From 7e8ca819c307ad187d7d94c360241b45f4c66e6e Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 17 Apr 2018 14:47:26 -0500 Subject: [PATCH 6/8] [KeezMovies] Applied third requested fixes for PR #16154 --- youtube_dl/extractor/common.py | 9 ++------- youtube_dl/extractor/keezmovies.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0a3776a43..59b9d3739 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1078,14 +1078,9 @@ class InfoExtractor(object): html, '%s form' % form_id, group='form') return self._hidden_inputs(form) - def _sort_formats(self, formats, field_preference=None, fatal=True): + def _sort_formats(self, formats, field_preference=None): if not formats: - msg = 'No video formats found' - if fatal: - raise ExtractorError(msg) - else: - self._downloader.report_warning(msg) - return None + raise ExtractorError('No video formats found') for f in formats: # Automatically determine tbr when missing based on abr and vbr (improves diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index a8b4ddcfb..9b1bdf8d4 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -36,7 +36,7 @@ class KeezMoviesIE(InfoExtractor): 'only_matching': True, }] - def _extract_info(self, url): + def _extract_info(self, url, fatal=True): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = (mobj.group('display_id') @@ -105,7 +105,13 @@ class KeezMoviesIE(InfoExtractor): raise ExtractorError( 'Video %s is no longer available' % video_id, expected=True) - self._sort_formats(formats, fatal=False) + try: + self._sort_formats(formats) + except ExtractorError as e: + if fatal: + raise ExtractorError(e, expected=True) + else: + self._downloader.report_warning(e) if not title: title = self._html_search_regex( From e643b7ebf56990b11d0762047f6917b2a8a3f8cd Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Tue, 17 Apr 2018 14:53:57 -0500 Subject: [PATCH 7/8] [KeezMovies] Set _extract_info as non-fatal --- youtube_dl/extractor/keezmovies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 9b1bdf8d4..96275549f 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -128,7 +128,7 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - webpage, info = self._extract_info(url) + webpage, info = self._extract_info(url, fatal=False) if not info['formats']: embed_url = self._search_regex( r' Date: Tue, 17 Apr 2018 17:39:28 -0500 Subject: [PATCH 8/8] [KeezMovies] Applied fourth requested fixes for PR #16154 --- youtube_dl/extractor/keezmovies.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 96275549f..d4e6f7ac1 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -107,11 +107,9 @@ class KeezMoviesIE(InfoExtractor): try: self._sort_formats(formats) - except ExtractorError as e: + except ExtractorError: if fatal: - raise ExtractorError(e, expected=True) - else: - self._downloader.report_warning(e) + raise if not title: title = self._html_search_regex( @@ -130,9 +128,7 @@ class KeezMoviesIE(InfoExtractor): def _real_extract(self, url): webpage, info = self._extract_info(url, fatal=False) if not info['formats']: - embed_url = self._search_regex( - r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) return info