From ce2fe4c01cceef4b636995275b573baf51587fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 20 May 2019 23:23:18 +0700 Subject: [PATCH 01/12] [extractor/common] Add doc string for _apply_first_set_cookie_header --- youtube_dl/extractor/common.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f994953bc..937237b3f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2818,15 +2818,19 @@ class InfoExtractor(object): return compat_cookies.SimpleCookie(req.get_header('Cookie')) def _apply_first_set_cookie_header(self, url_handle, cookie): - # Some sites (e.g. [1-3]) may serve two cookies under the same name - # in Set-Cookie header and expect the first (old) one to be set rather - # than second (new). However, as of RFC6265 the newer one cookie - # should be set into cookie store what actually happens. - # We will workaround this issue by resetting the cookie to - # the first one manually. - # 1. https://new.vk.com/ - # 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201 - # 3. https://learning.oreilly.com/ + """ + Apply first Set-Cookie header instead of the last. Experimental. + + Some sites (e.g. [1-3]) may serve two cookies under the same name + in Set-Cookie header and expect the first (old) one to be set rather + than second (new). However, as of RFC6265 the newer one cookie + should be set into cookie store what actually happens. + We will workaround this issue by resetting the cookie to + the first one manually. + 1. https://new.vk.com/ + 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201 + 3. https://learning.oreilly.com/ + """ for header, cookies in url_handle.headers.items(): if header.lower() != 'set-cookie': continue From 42c971341b804b758d12b7a85547be05160f1b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 20 May 2019 23:24:27 +0700 Subject: [PATCH 02/12] [ChangeLog] Actualize [ci skip] --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13cb6288d..eba7202dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Core ++ [extractor/common] Move workaround for applying first Set-Cookie header + into a separate _apply_first_set_cookie_header method + +Extractors +* [safari] Fix authentication (#21090) +* [vk] Use _apply_first_set_cookie_header +* [vrt] Fix extraction (#20527) ++ [canvas] Add support for vrtnieuws and sporza site ids and extract + AES HLS formats ++ [vrv] Extract captions (#19238) +* [tele5] Improve video id extraction +* [tele5] Relax URL regular expression (#21020, #21063) +* [svtplay] Update API URL (#21075) ++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071) + + version 2019.05.11 Core From 6ab30ff50bf6bd0585927cb73c7421bef184f87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 20 May 2019 23:29:49 +0700 Subject: [PATCH 03/12] release 2019.05.20 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 6b931b3cf..dc303946e 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2019.05.11** +- [ ] I've verified that I'm running youtube-dl version **2019.05.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.05.11 + [debug] youtube-dl version 2019.05.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index f2dc784a2..46e143c8a 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2019.05.11** +- [ ] I've verified that I'm running youtube-dl version **2019.05.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 39a0af13f..bc6c4694b 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2019.05.11** +- [ ] I've verified that I'm running youtube-dl version **2019.05.20** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 139f36ab8..bcc51f986 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2019.05.11** +- [ ] I've verified that I'm running youtube-dl version **2019.05.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2019.05.11 + [debug] youtube-dl version 2019.05.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index d60da6db9..c8d16960e 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2019.05.11** +- [ ] I've verified that I'm running youtube-dl version **2019.05.20** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index eba7202dd..3babb6f48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.05.20 Core + [extractor/common] Move workaround for applying first Set-Cookie header diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a8a9224cb..404a2f0a4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1071,7 +1071,7 @@ - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be + - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: VrtNU.be - **vrv** - **vrv:series** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e63527dbb..8df77378b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.05.11' +__version__ = '2019.05.20' From 0e6f914b3b40ef2ca78d82051a194faaad64dd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20H=C3=B6pfl?= Date: Wed, 13 Feb 2019 16:29:43 +0100 Subject: [PATCH 04/12] [vivo] Fix extraction (closes #18906) --- youtube_dl/extractor/shared.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 931a0f70e..eade8fd9e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( @@ -7,6 +9,7 @@ from ..utils import ( int_or_none, url_or_none, urlencode_postdata, + unescapeHTML, ) @@ -22,8 +25,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') + title = self._extract_title(webpage) filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -35,6 +37,10 @@ class SharedBaseIE(InfoExtractor): 'title': title, } + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -86,6 +92,14 @@ class VivoIE(SharedBaseIE): }, } + def _extract_title(self, webpage): + data_title = self._search_regex( + r'data-name\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='title') + if data_title: + return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + return self._og_search_title(webpage) + def _extract_video_url(self, webpage, video_id, *args): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') From e438e8146965d2c650c1575dc97809bcc9504f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 03:04:58 +0700 Subject: [PATCH 05/12] [vivo] Improve extraction (closes #19217) --- youtube_dl/extractor/shared.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index eade8fd9e..ff575f592 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,15 +1,15 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( + determine_ext, ExtractorError, int_or_none, + KNOWN_EXTENSIONS, + parse_filesize, url_or_none, urlencode_postdata, - unescapeHTML, ) @@ -26,8 +26,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) title = self._extract_title(webpage) - filesize = int_or_none(self._html_search_meta( - 'full:size', webpage, 'file size', fatal=False)) + filesize = int_or_none(self._extract_filesize(webpage)) return { 'id': video_id, @@ -41,6 +40,10 @@ class SharedBaseIE(InfoExtractor): return compat_b64decode(self._html_search_meta( 'full:title', webpage, 'title')).decode('utf-8') + def _extract_filesize(self, webpage): + return self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False) + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -88,19 +91,27 @@ class VivoIE(SharedBaseIE): 'id': 'd7ddda0e78', 'ext': 'mp4', 'title': 'Chicken', - 'filesize': 528031, + 'filesize': 515659, }, } def _extract_title(self, webpage): - data_title = self._search_regex( + title = self._html_search_regex( r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage, 'title', default=None, group='title') - if data_title: - return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + if title: + ext = determine_ext(title) + if ext.lower() in KNOWN_EXTENSIONS: + title = title.rpartition('.' + ext)[0] + return title return self._og_search_title(webpage) - def _extract_video_url(self, webpage, video_id, *args): + def _extract_filesize(self, webpage): + return parse_filesize(self._search_regex( + r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', + webpage, 'filesize', fatal=False)) + + def _extract_video_url(self, webpage, video_id, url): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8') From ea7538209468f630075d08d44ef7b0119f78d2eb Mon Sep 17 00:00:00 2001 From: smed79 <1873139+smed79@users.noreply.github.com> Date: Wed, 22 May 2019 21:30:17 +0100 Subject: [PATCH 06/12] [openload] Add support for oload.press (#21135) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a8e906858..b96be6f64 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -244,7 +244,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' + _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' _VALID_URL = r'''(?x) https?:// (?P<host> @@ -357,6 +357,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.services/embed/bs1NWj1dCag/', 'only_matching': True, + }, { + 'url': 'https://oload.press/embed/drTBl1aOTvk/', + 'only_matching': True, }, { 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'only_matching': True, From 612300a686fd83d475b7fddc17cb2ccd8ca0b5ef Mon Sep 17 00:00:00 2001 From: ealgase <mostdigitsofpi@gmail.com> Date: Wed, 22 May 2019 16:38:48 -0400 Subject: [PATCH 07/12] [novamov] Remove extractors (#21077) Sites no longer exist --- youtube_dl/extractor/extractors.py | 7 - youtube_dl/extractor/generic.py | 13 -- youtube_dl/extractor/novamov.py | 212 ----------------------------- 3 files changed, 232 deletions(-) delete mode 100644 youtube_dl/extractor/novamov.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3037b5a45..e5aee96c2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -772,13 +772,6 @@ from .nova import ( NovaEmbedIE, NovaIE, ) -from .novamov import ( - AuroraVidIE, - CloudTimeIE, - NowVideoIE, - VideoWeedIE, - WholeCloudIE, -) from .nowness import ( NownessIE, NownessPlaylistIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3a13c62eb..eeb0d25f6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2583,19 +2583,6 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group(1), 'Mpora') - # Look for embedded NovaMov-based player - mobj = re.search( - r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\']) - (?P<url>http://(?:(?:embed|www)\.)? - (?:novamov\.com| - nowvideo\.(?:ch|sx|eu|at|ag|co)| - videoweed\.(?:es|com)| - movshare\.(?:net|sx|ag)| - divxstage\.(?:eu|net|ch|co|at|ag)) - /embed\.php.+?)\1''', webpage) - if mobj is not None: - return self.url_result(mobj.group('url')) - # Look for embedded Facebook player facebook_urls = FacebookIE._extract_urls(webpage) if facebook_urls: diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py deleted file mode 100644 index 829c71960..000000000 --- a/youtube_dl/extractor/novamov.py +++ /dev/null @@ -1,212 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - NO_DEFAULT, - sanitized_Request, - urlencode_postdata, -) - - -class NovaMovIE(InfoExtractor): - IE_NAME = 'novamov' - IE_DESC = 'NovaMov' - - _VALID_URL_TEMPLATE = r'''(?x) - http:// - (?: - (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/| - (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv= - ) - (?P<id>[a-z\d]{13}) - ''' - _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'} - - _HOST = 'www.novamov.com' - - _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>' - _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);' - _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>' - _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>' - _URL_TEMPLATE = 'http://%s/video/%s' - - _TEST = None - - def _check_existence(self, webpage, video_id): - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) - - def _real_extract(self, url): - video_id = self._match_id(url) - - url = self._URL_TEMPLATE % (self._HOST, video_id) - - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - self._check_existence(webpage, video_id) - - def extract_filekey(default=NO_DEFAULT): - filekey = self._search_regex( - self._FILEKEY_REGEX, webpage, 'filekey', default=default) - if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'): - return self._search_regex( - r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default) - else: - return filekey - - filekey = extract_filekey(default=None) - - if not filekey: - fields = self._hidden_inputs(webpage) - post_url = self._search_regex( - r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, - 'post url', default=url, group='url') - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(url, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(fields)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - request.add_header('Referer', post_url) - webpage = self._download_webpage( - request, video_id, 'Downloading continue to the video page') - self._check_existence(webpage, video_id) - - filekey = extract_filekey() - - title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title') - description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) - - api_response = self._download_webpage( - 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, - 'Downloading video api response') - - response = compat_urlparse.parse_qs(api_response) - - if 'error_msg' in response: - raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) - - video_url = response['url'][0] - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description - } - - -class WholeCloudIE(NovaMovIE): - IE_NAME = 'wholecloud' - IE_DESC = 'WholeCloud' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'} - - _HOST = 'www.wholecloud.net' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>' - _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>' - - _TEST = { - 'url': 'http://www.wholecloud.net/video/559e28be54d96', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', - 'info_dict': { - 'id': '559e28be54d96', - 'ext': 'flv', - 'title': 'dissapeared image', - 'description': 'optical illusion dissapeared image magic illusion', - } - } - - -class NowVideoIE(NovaMovIE): - IE_NAME = 'nowvideo' - IE_DESC = 'NowVideo' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'} - - _HOST = 'www.nowvideo.to' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<h4>([^<]+)</h4>' - _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>' - - _TEST = { - 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b', - 'md5': '12c82cad4f2084881d8bc60ee29df092', - 'info_dict': { - 'id': 'f1d6fce9a968b', - 'ext': 'flv', - 'title': 'youtubedl test video BaWjenozKc', - 'description': 'Description', - }, - } - - -class VideoWeedIE(NovaMovIE): - IE_NAME = 'videoweed' - IE_DESC = 'VideoWeed' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'} - - _HOST = 'www.videoweed.es' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>' - _URL_TEMPLATE = 'http://%s/file/%s' - - _TEST = { - 'url': 'http://www.videoweed.es/file/b42178afbea14', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', - 'info_dict': { - 'id': 'b42178afbea14', - 'ext': 'flv', - 'title': 'optical illusion dissapeared image magic illusion', - 'description': '' - }, - } - - -class CloudTimeIE(NovaMovIE): - IE_NAME = 'cloudtime' - IE_DESC = 'CloudTime' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'} - - _HOST = 'www.cloudtime.to' - - _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>' - - _TEST = None - - -class AuroraVidIE(NovaMovIE): - IE_NAME = 'auroravid' - IE_DESC = 'AuroraVid' - - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'} - - _HOST = 'www.auroravid.to' - - _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' - - _TESTS = [{ - 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', - 'md5': '7205f346a52bbeba427603ba10d4b935', - 'info_dict': { - 'id': '4rurhn9x446jj', - 'ext': 'flv', - 'title': 'search engine optimization', - 'description': 'search engine optimization is used to rank the web page in the google search engine' - }, - 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' - }, { - 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj', - 'only_matching': True, - }] From 186d185b6ecdee102866777121d6abe9ed7f59ba Mon Sep 17 00:00:00 2001 From: Malte Kiefer <malte.kiefer@mailgermania.de> Date: Wed, 22 May 2019 22:46:20 +0200 Subject: [PATCH 08/12] [streamcloud] Reduce waiting time to 6 seconds (#21092) --- youtube_dl/extractor/streamcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 4a410611d..b97bb4374 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor): value="([^"]*)" ''', orig_webpage) - self._sleep(12, video_id) + self._sleep(6, video_id) webpage = self._download_webpage( url, video_id, data=urlencode_postdata(fields), headers={ From bbf1defe586f4b4cb7b35aa3da67c5dc786d9a2c Mon Sep 17 00:00:00 2001 From: Georgi Saev <georgi.saev@gmail.com> Date: Wed, 22 May 2019 23:51:50 +0300 Subject: [PATCH 09/12] [bitchute] Fix uploader extraction (#21076) --- youtube_dl/extractor/bitchute.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 4f39424f5..1d69dafbd 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -65,8 +65,9 @@ class BitChuteIE(InfoExtractor): webpage, default=None) or self._html_search_meta( 'twitter:image:src', webpage, 'thumbnail') uploader = self._html_search_regex( - r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage, - 'uploader', fatal=False) + (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', + r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), + webpage, 'uploader', fatal=False) return { 'id': video_id, From 2c53c0ebc63b7fbb36d05491d5d3796d3e511e26 Mon Sep 17 00:00:00 2001 From: NRTICN <50528161+NRTICN@users.noreply.github.com> Date: Wed, 22 May 2019 20:56:54 +0000 Subject: [PATCH 10/12] [pornhub] Use https (#21061) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index bf8f0be88..cb59d526f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -170,7 +170,7 @@ class PornHubIE(PornHubBaseIE): def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( - 'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), + 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), video_id, 'Downloading %s webpage' % platform) webpage = dl_webpage('pc') From afd4985f72a6641907aee1cd0b4b42da524b0ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 06:06:49 +0700 Subject: [PATCH 11/12] [travis] Force dist to Ubuntu Trusty by default According to https://blog.travis-ci.com/2019-04-15-xenial-default-build-environment Ubuntu Xenial is now default, but it lacks python 2.6, 3.2 and 3.3 support needed by tests --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 82e81d078..6d16c2955 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ python: - "3.6" - "pypy" - "pypy3" +dist: trusty env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download From 9c5f2988b91609d49f7010ac580376f42e01d4f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 May 2019 23:38:01 +0700 Subject: [PATCH 12/12] [criterion] Remove extractor (closes #21195) --- youtube_dl/extractor/criterion.py | 39 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/criterion.py diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py deleted file mode 100644 index f7815b905..000000000 --- a/youtube_dl/extractor/criterion.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+' - _TEST = { - 'url': 'http://www.criterion.com/films/184-le-samourai', - 'md5': 'bc51beba55685509883a9a7830919ec3', - 'info_dict': { - 'id': '184', - 'ext': 'mp4', - 'title': 'Le Samouraï', - 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - final_url = self._search_regex( - r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') - title = self._og_search_title(webpage) - description = self._html_search_meta('description', webpage) - thumbnail = self._search_regex( - r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') - - return { - 'id': video_id, - 'url': final_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5aee96c2..7705f9bdd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -240,7 +240,6 @@ from .condenast import CondeNastIE from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE -from .criterion import CriterionIE from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE,