From 7bee705d8f110f09d8e72b1c863ff197ccc1d4f1 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Thu, 14 Feb 2019 11:28:16 -0400 Subject: [PATCH 01/83] [openload] Add support for oload.live --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a2ae25272..c1dcbb7eb 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) ) )/ (?:f|embed)/ @@ -346,6 +346,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.pw/f/WyKgK8s94N0', 'only_matching': True, + }, { + 'url': 'https://oload.live/f/-Z58UZ-GR4M', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From 794c1b6e02591b04da931fa59745bc47bfae7492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Feb 2019 23:40:46 +0700 Subject: [PATCH 02/83] [vshare] Pass Referer to download request (closes #19205, closes #19221) --- youtube_dl/extractor/vshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index e4ec77889..c631ac1fa 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -48,7 +48,7 @@ class VShareIE(InfoExtractor): webpage = self._download_webpage( 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, - video_id) + video_id, headers={'Referer': url}) title = self._html_search_regex( r'([^<]+)', webpage, 'title') From 2b2da3ba10cc325d00b665aae87f0fa8508bccdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Feb 2019 23:56:29 +0700 Subject: [PATCH 03/83] [rai] Relax _VALID_URL (closes #19232) --- youtube_dl/extractor/rai.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 548a6553b..149153b8f 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor): class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ # var uniquename = "ContentItem-..." # data-id="ContentItem-..." @@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE): # Direct MMS URL 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, }] def _extract_from_content_id(self, content_id, url): From ba2e3730d125eab952eded3bb7749d479a2262d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Feb 2019 22:45:53 +0700 Subject: [PATCH 04/83] [noovo] Fix extraction (closes #19230) --- youtube_dl/extractor/noovo.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/noovo.py b/youtube_dl/extractor/noovo.py index 974de3c3e..b40770d07 100644 --- a/youtube_dl/extractor/noovo.py +++ b/youtube_dl/extractor/noovo.py @@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - bc_url = BrightcoveNewIE._extract_url(self, webpage) + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') data = self._parse_json( self._search_regex( @@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor): return { '_type': 'url_transparent', 'ie_key': BrightcoveNewIE.ie_key(), - 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, 'title': title, 'description': description, 'series': series, From ae65c93a26f2b3cf806477a3ee891aa461b5c6b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 00:58:13 +0700 Subject: [PATCH 05/83] [udemy] Update User-Agent and detect captcha (closes #14713, closes #15839, closes #18126) --- youtube_dl/extractor/udemy.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 105826e9b..89a7f6ade 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -123,10 +123,22 @@ class UdemyIE(InfoExtractor): def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}).copy() - headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers - return super(UdemyIE, self)._download_webpage_handle( + ret = super(UdemyIE, self)._download_webpage_handle( *args, **compat_kwargs(kwargs)) + if not ret: + return ret + webpage, _ = ret + if any(p in webpage for p in ( + '>Please verify you are a human', + 'Access to this page has been denied because we believe you are using automation tools to browse the website', + '"_pxCaptcha"')): + raise ExtractorError( + 'Udemy asks you to solve a CAPTCHA. Login with browser, ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies.', expected=True) + return ret def _download_json(self, url_or_request, *args, **kwargs): headers = { From d7d513891b7e63337218c5cb0bf743c8f7044381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 01:05:01 +0700 Subject: [PATCH 06/83] [udemy] Extend _VALID_URLs (closes #14330, closes #15883) --- youtube_dl/extractor/udemy.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89a7f6ade..ae8de9897 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -29,7 +29,7 @@ class UdemyIE(InfoExtractor): IE_NAME = 'udemy' _VALID_URL = r'''(?x) https?:// - www\.udemy\.com/ + (?:[^/]+\.)?udemy\.com/ (?: [^#]+\#/lecture/| lecture/view/?\?lectureId=| @@ -64,6 +64,9 @@ class UdemyIE(InfoExtractor): # only outputs rendition 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0', 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757', + 'only_matching': True, }] def _extract_course_info(self, webpage, video_id): @@ -415,8 +418,14 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P[^/?#&]+)' - _TESTS = [] + _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.udemy.com/java-tutorial/', + 'only_matching': True, + }, { + 'url': 'https://wipro.udemy.com/java-tutorial/', + 'only_matching': True, + }] @classmethod def suitable(cls, url): From c9a0ea6e51eff28b9a383a47215870fd5875fc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 05:00:16 +0700 Subject: [PATCH 07/83] [bilibili] Update keys (closes #19233) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4d6b051fe..3746671d3 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor): }] }] - _APP_KEY = '84956560bc028eb7' - _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def _report_error(self, result): if 'message' in result: From 659e93fcf5c0480ac461cda412335cecf6a5595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 07:12:10 +0700 Subject: [PATCH 08/83] [linuxacademy] Add extractor (closes #12207) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/linuxacademy.py | 174 +++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 youtube_dl/extractor/linuxacademy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3e1b63b4b..c70452dcd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -593,6 +593,7 @@ from .linkedin import ( LinkedInLearningIE, LinkedInLearningCourseIE, ) +from .linuxacademy import LinuxAcademyIE from .litv import LiTVIE from .liveleak import ( LiveLeakIE, diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py new file mode 100644 index 000000000..a78c6556e --- /dev/null +++ b/youtube_dl/extractor/linuxacademy.py @@ -0,0 +1,174 @@ +from __future__ import unicode_literals + +import json +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + orderedSet, + unescapeHTML, + urlencode_postdata, + urljoin, +) + + +class LinuxAcademyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?linuxacademy\.com/cp/ + (?: + courses/lesson/course/(?P\d+)/lesson/(?P\d+)| + modules/view/id/(?P\d+) + ) + ''' + _TESTS = [{ + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', + 'info_dict': { + 'id': '1498-2', + 'ext': 'mp4', + 'title': "Introduction to the Practitioner's Brief", + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Requires Linux Academy account credentials', + }, { + 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', + 'only_matching': True, + }, { + 'url': 'https://linuxacademy.com/cp/modules/view/id/154', + 'info_dict': { + 'id': '154', + 'title': 'AWS Certified Cloud Practitioner', + 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', + }, + 'playlist_count': 41, + 'skip': 'Requires Linux Academy account credentials', + }] + + _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' + _ORIGIN_URL = 'https://linuxacademy.com' + _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' + _NETRC_MACHINE = 'linuxacademy' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def random_string(): + return ''.join([ + random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') + for _ in range(32)]) + + webpage, urlh = self._download_webpage_handle( + self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ + 'client_id': self._CLIENT_ID, + 'response_type': 'token id_token', + 'redirect_uri': self._ORIGIN_URL, + 'scope': 'openid email user_impersonation profile', + 'audience': self._ORIGIN_URL, + 'state': random_string(), + 'nonce': random_string(), + }) + + login_data = self._parse_json( + self._search_regex( + r'atob\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'login info', group='value'), None, + transform_source=lambda x: compat_b64decode(x).decode('utf-8') + )['extraParams'] + + login_data.update({ + 'client_id': self._CLIENT_ID, + 'redirect_uri': self._ORIGIN_URL, + 'tenant': 'lacausers', + 'connection': 'Username-Password-Authentication', + 'username': username, + 'password': password, + 'sso': 'true', + }) + + login_state_url = compat_str(urlh.geturl()) + + try: + login_page = self._download_webpage( + 'https://login.linuxacademy.com/usernamepassword/login', None, + 'Downloading login page', data=json.dumps(login_data).encode(), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read(), None) + message = error.get('description') or error['code'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) + raise + + callback_page, urlh = self._download_webpage_handle( + 'https://login.linuxacademy.com/login/callback', None, + 'Downloading callback page', + data=urlencode_postdata(self._hidden_inputs(login_page)), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Origin': 'https://login.linuxacademy.com', + 'Referer': login_state_url, + }) + + access_token = self._search_regex( + r'access_token=([^=&]+)', compat_str(urlh.geturl()), + 'access token') + + self._download_webpage( + 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' + % access_token, None, 'Downloading token validation page') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') + item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) + + webpage = self._download_webpage(url, item_id) + + # course path + if course_id: + entries = [ + self.url_result( + urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) + for lesson_url in orderedSet(re.findall( + r']+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', + webpage))] + title = unescapeHTML(self._html_search_regex( + (r'class=["\']course-title["\'][^>]*>(?P[^<]+)', + r'var\s+title\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', default=None, group='value')) + description = unescapeHTML(self._html_search_regex( + r'var\s+description\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'description', default=None, group='value')) + return self.playlist_result(entries, course_id, title, description) + + # single video path + info = self._extract_jwplayer_data( + webpage, item_id, require_title=False, m3u8_id='hls',) + title = self._search_regex( + (r'>Lecture\s*:\s*(?P[^<]+)', + r'lessonName\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage, + 'title', group='value') + info.update({ + 'id': item_id, + 'title': title, + }) + return info From 3c9647372e78134777d201e157a5ef42345c9da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:38:21 +0700 Subject: [PATCH 09/83] [tvp] Fix description extraction, make thumbnail optional and fix tests --- youtube_dl/extractor/tvp.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 3954f0b93..f9bf600b0 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -19,12 +19,12 @@ class TVPIE(InfoExtractor): _TESTS = [{ 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', - 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', 'info_dict': { 'id': '194536', 'ext': 'mp4', - 'title': 'Czas honoru, I seria – odc. 13', - 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d', + 'title': 'Czas honoru, odc. 13 – Władek', + 'description': 'md5:437f48b93558370b031740546b696e24', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -45,6 +45,7 @@ class TVPIE(InfoExtractor): 'title': 'Wiadomości, 28.09.2017, 19:30', 'description': 'Wydanie główne codziennego serwisu informacyjnego.' }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', 'only_matching': True, @@ -75,8 +76,10 @@ class TVPIE(InfoExtractor): return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -87,6 +90,14 @@ class TVPEmbedIE(InfoExtractor): _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P\d+)' _TESTS = [{ + 'url': 'tvp:194536', + 'md5': 'a21eb0aa862f25414430f15fdfb9e76c', + 'info_dict': { + 'id': '194536', + 'ext': 'mp4', + 'title': 'Czas honoru, odc. 13 – Władek', + }, + }, { 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { From 34568dc2967d227630ed9d7150deaa62a689b937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Feb 2019 13:39:00 +0700 Subject: [PATCH 10/83] [tvp] Detect unavailable videos --- youtube_dl/extractor/tvp.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f9bf600b0..536b580fc 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -98,6 +98,7 @@ class TVPEmbedIE(InfoExtractor): 'title': 'Czas honoru, odc. 13 – Władek', }, }, { + # not available 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', 'md5': '8c9cd59d16edabf39331f93bf8a766c7', 'info_dict': { @@ -105,6 +106,7 @@ class TVPEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': 'Panorama, 07.12.2015, 15:40', }, + 'skip': 'Transmisja została zakończona lub materiał niedostępny', }, { 'url': 'tvp:22670268', 'only_matching': True, @@ -116,10 +118,13 @@ class TVPEmbedIE(InfoExtractor): webpage = self._download_webpage( 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) - error_massage = get_element_by_attribute('class', 'msg error', webpage) - if error_massage: + error = self._html_search_regex( + r'(?s)]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)

', + webpage, 'error', default=None) or clean_html( + get_element_by_attribute('class', 'msg error', webpage)) + if error: raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error_massage)), expected=True) + self.IE_NAME, clean_html(error)), expected=True) title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P.+?)\1', From d93083789bf9c318b18d52ac132e9495345b9ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:09:30 +0700 Subject: [PATCH 11/83] [tvp:series] Fix extraction --- youtube_dl/extractor/tvp.py | 67 ++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 536b580fc..05669a366 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor from ..utils import ( - determine_ext, clean_html, - get_element_by_attribute, + determine_ext, ExtractorError, + get_element_by_attribute, + orderedSet, ) @@ -198,46 +200,35 @@ class TVPEmbedIE(InfoExtractor): class TVPSeriesIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem', + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { - 'title': 'Ogniem i mieczem', - 'id': '4278026', + 'id': '38678312', }, - 'playlist_count': 4, - }, { - 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat', - 'info_dict': { - 'title': 'Boso przez świat', - 'id': '9329207', - }, - 'playlist_count': 86, + 'playlist_count': 115, }] + def _entries(self, url, display_id): + for page_num in itertools.count(1): + page = self._download_webpage( + url, display_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + + video_ids = orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, + page)) + + if not video_ids: + break + + for video_id in video_ids: + yield self.url_result( + 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), + video_id=video_id) + def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id, tries=5) - - title = self._html_search_regex( - r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series') - playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id') - playlist = self._download_webpage( - 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend' - 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5, - note='Downloading playlist') - - videos_paths = re.findall( - '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) - entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) - for v_path in videos_paths] - - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': display_id, - 'title': title, - 'entries': entries, - } + mobj = re.match(self._VALID_URL, url) + display_id, playlist_id = mobj.group('display_id', 'id') + return self.playlist_result(self._entries(url, display_id), playlist_id) From 388cfbd3d8915ebb99714ac8e7ce4151edf96d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 17 Feb 2019 14:27:00 +0700 Subject: [PATCH 12/83] [tvp:website] Improve support --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/tvp.py | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c70452dcd..923dfe7f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1218,7 +1218,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, - TVPSeriesIE, + TVPWebsiteIE, ) from .tvplay import ( TVPlayIE, diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 05669a366..accff75b5 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -198,19 +198,36 @@ class TVPEmbedIE(InfoExtractor): } -class TVPSeriesIE(InfoExtractor): +class TVPWebsiteIE(InfoExtractor): IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)/video' + _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' _TESTS = [{ + # series 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', 'info_dict': { 'id': '38678312', }, 'playlist_count': 115, + }, { + # film + 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'info_dict': { + 'id': '36637049', + 'ext': 'mp4', + 'title': 'Gloria, Gloria', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['TVPEmbed'], + }, { + 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', + 'only_matching': True, }] - def _entries(self, url, display_id): + def _entries(self, display_id, playlist_id): + url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) for page_num in itertools.count(1): page = self._download_webpage( url, display_id, 'Downloading page %d' % page_num, @@ -231,4 +248,5 @@ class TVPSeriesIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id, playlist_id = mobj.group('display_id', 'id') - return self.playlist_result(self._entries(url, display_id), playlist_id) + return self.playlist_result( + self._entries(display_id, playlist_id), playlist_id) From c76fc5b22a70f9ac24fe7e34c37aa8ef82e85c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:10:06 +0700 Subject: [PATCH 13/83] [ChangeLog] Actualize [ci skip] --- ChangeLog | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 398528f76..adbdf166d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +version <unreleased> + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + version 2019.02.08 Core From 77a842c8926625fe791ed36613f183bb195394cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 18 Feb 2019 02:11:11 +0700 Subject: [PATCH 14/83] release 2019.02.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7128d998f..ff626883d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.08 +[debug] youtube-dl version 2019.02.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index adbdf166d..f9dd7928f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.02.18 Extractors * [tvp:website] Fix and improve extraction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 32fe6b647..d8a8d7ede 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -458,6 +458,7 @@ - **LineTV** - **linkedin:learning** - **linkedin:learning:course** + - **LinuxAcademy** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -915,6 +916,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **TruNews** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4dc5a611e..ea1d5a4a5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.08' +__version__ = '2019.02.18' From caf48f557a8f4f904c88346bcfc462069b8745bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 21 Feb 2019 05:59:07 +0700 Subject: [PATCH 15/83] [metacafe] Fix family filter bypass (closes #19287) --- youtube_dl/extractor/metacafe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 28f59f63c..9e92416d1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_urllib_parse, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor): headers = { # Disable family filter - 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False})) } # AnyClip videos require the flashversion cookie so that we get the link From 37b239b3b66ea9e2a71bae41e9da6dba8ee5554c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:43:29 +0700 Subject: [PATCH 16/83] [downloader/external] Fix infinite retries for curl (closes #19303) --- youtube_dl/downloader/external.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 958d00aac..0b88bfd94 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -121,7 +121,11 @@ class CurlFD(ExternalFD): cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') - cmd += self._option('--retry', 'retries') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') From 8c80603f1adea843d96c0598b902106c7a3efb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 23 Feb 2019 00:58:56 +0700 Subject: [PATCH 17/83] [downloader/external] Add support for rate limit and retries for wget --- youtube_dl/downloader/external.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0b88bfd94..22e6093b3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -164,6 +164,12 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') From f0228f56fb2441510aa966ba9298e388b209cde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 21:01:25 +0700 Subject: [PATCH 18/83] [bbccouk] Make subtitles non fatal (#19651) --- youtube_dl/extractor/bbc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index eac9a5a46..13340ec64 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re +import xml from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, + url_or_none, urlencode_postdata, urljoin, ) @@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor): def _get_subtitles(self, media, programme_id): subtitles = {} for connection in self._extract_connections(media): - captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') + cc_url = url_or_none(connection.get('href')) + if not cc_url: + continue + captions = self._download_xml( + cc_url, programme_id, 'Downloading captions', fatal=False) + if not isinstance(captions, xml.etree.ElementTree.Element): + continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ { From 55b8588f0e4dd9597b6da5c46d05b9dd1e9f5960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 Feb 2019 23:19:15 +0700 Subject: [PATCH 19/83] [servus] Fix extraction (closes #19297) --- youtube_dl/extractor/servus.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py index 264e1dd8b..e579d42cf 100644 --- a/youtube_dl/extractor/servus.py +++ b/youtube_dl/extractor/servus.py @@ -1,31 +1,44 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class ServusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)' + _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)' _TESTS = [{ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', - 'md5': '046dee641cda1c4cabe13baef3be2c1c', + 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', 'info_dict': { 'id': 'AA-1T6VBU5PW1W12', 'ext': 'mp4', - 'title': 'Die Grünen aus Volkssicht', - 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Die Grünen aus Sicht des Volkes', + 'description': 'md5:1247204d85783afe3682644398ff2ec4', + 'thumbnail': r're:^https?://.*\.jpg', } }, { 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/', + 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + video_id = self._match_id(url).upper() webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = self._search_regex( + (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) From db1c3a9d3f202cc6f3fd83a2a918869e7c0d147f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 Feb 2019 03:41:15 +0700 Subject: [PATCH 20/83] [periscope] Extract width and height (closes #20015) --- youtube_dl/extractor/periscope.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 8afe541ec..b337a56c0 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, parse_iso8601, unescapeHTML, ) @@ -75,6 +76,14 @@ class PeriscopeIE(PeriscopeBaseIE): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + video_urls = set() formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): @@ -83,16 +92,21 @@ class PeriscopeIE(PeriscopeBaseIE): continue video_urls.add(video_url) if format_id != 'rtmp': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, token, 'mp4', entry_protocol='m3u8_native' if state in ('ended', 'timed_out') else 'm3u8', - m3u8_id=format_id, fatal=False)) + m3u8_id=format_id, fatal=False) + if len(m3u8_formats) == 1: + add_width_and_height(m3u8_formats[0]) + formats.extend(m3u8_formats) continue - formats.append({ + rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', - }) + } + add_width_and_height(rtmp_format) + formats.append(rtmp_format) self._sort_formats(formats) return { From 9d9a8676dc02101069cf5fa9862500d39352538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Feb 2019 23:26:52 +0700 Subject: [PATCH 21/83] [francetv:site] Extend video id regex (closes #20029, closes #20071) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2ffe83a78..3c4ef08a8 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From ff60ec8f029d12c119855ec82d7ce9ecda388651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 00:47:18 +0700 Subject: [PATCH 22/83] [npo] Fix extraction (#20084) --- youtube_dl/extractor/npo.py | 120 +++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5a427c396..857845d35 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -12,11 +12,16 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, int_or_none, + merge_dicts, orderedSet, parse_duration, qualities, + str_or_none, strip_jsonp, unified_strdate, + unified_timestamp, + url_or_none, + urlencode_postdata, ) @@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - return self._get_info(video_id) + try: + return self._get_info(url, video_id) + except ExtractorError: + return self._get_old_info(video_id) - def _get_info(self, video_id): + def _get_info(self, url, video_id): + token = self._download_json( + 'https://www.npostart.nl/api/token', video_id, + 'Downloading token', headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + })['token'] + + player = self._download_json( + 'https://www.npostart.nl/player/%s' % video_id, video_id, + 'Downloading player JSON', data=urlencode_postdata({ + 'autoplay': 0, + 'share': 1, + 'pageUrl': url, + 'hasAdConsent': 0, + '_token': token, + })) + + player_token = player['token'] + + format_urls = set() + formats = [] + for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): + streams = self._download_json( + 'https://start-player.npo.nl/video/%s/streams' % video_id, + video_id, 'Downloading %s profile JSON' % profile, fatal=False, + query={ + 'profile': profile, + 'quality': 'npo', + 'tokenId': player_token, + 'streamType': 'broadcast', + }) + if not streams: + continue + stream = streams.get('stream') + if not isinstance(stream, dict): + continue + stream_url = url_or_none(stream.get('src')) + if not stream_url or stream_url in format_urls: + continue + format_urls.add(stream_url) + if stream.get('protection') is not None: + continue + stream_type = stream.get('type') + stream_ext = determine_ext(stream_url) + if stream_type == 'application/dash+xml' or stream_ext == 'mpd': + formats.extend(self._extract_mpd_formats( + stream_url, video_id, mpd_id='dash', fatal=False)) + elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + elif '.ism/Manifest' in stream_url: + formats.extend(self._extract_ism_formats( + stream_url, video_id, ism_id='mss', fatal=False)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + embed_url = url_or_none(player.get('embedUrl')) + if embed_url: + webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed page', fatal=False) + if webpage: + video = self._parse_json( + self._search_regex( + r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video', + default='{}'), video_id) + if video: + title = video.get('episodeTitle') + subtitles = {} + subtitles_list = video.get('subtitles') + if isinstance(subtitles_list, list): + for cc in subtitles_list: + cc_url = url_or_none(cc.get('src')) + if not cc_url: + continue + lang = str_or_none(cc.get('language')) or 'nl' + subtitles.setdefault(lang, []).append({ + 'url': cc_url, + }) + return merge_dicts({ + 'title': title, + 'description': video.get('description'), + 'thumbnail': url_or_none( + video.get('still_image_url') or video.get('orig_image_url')), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('broadcastDate')), + 'creator': video.get('channel'), + 'series': video.get('title'), + 'episode': title, + 'episode_number': int_or_none(video.get('episodeNumber')), + 'subtitles': subtitles, + }, info) + + return info + + def _get_old_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/%s' % video_id, video_id, @@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE): # JSON else: video_url = stream_info.get('url') - if not video_url or video_url in urls: + if not video_url or 'vodnotavailable.' in video_url or video_url in urls: continue urls.add(video_url) if determine_ext(video_url) == 'm3u8': From 333f617b1207cb53efaa5e2f7af174cfa87deee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:02:36 +0700 Subject: [PATCH 23/83] [ChangeLog] Actualize [ci skip] --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index f9dd7928f..f717f99a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Core ++ [downloader/external] Add support for rate limit and retries for wget +* [downloader/external] Fix infinite retries for curl (#19303) + +Extractors +* [npo] Fix extraction (#20084) +* [francetv:site] Extend video id regex (#20029, #20071) ++ [periscope] Extract width and height (#20015) +* [servus] Fix extraction (#19297) +* [bbccouk] Make subtitles non fatal (#19651) +* [metacafe] Fix family filter bypass (#19287) + + version 2019.02.18 Extractors From 04c33bdfb3cd73e71bf0788f02998cab30cf1da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Mar 2019 01:03:51 +0700 Subject: [PATCH 24/83] release 2019.03.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ff626883d..71a500f04 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.02.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.02.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.02.18 +[debug] youtube-dl version 2019.03.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f717f99a8..018a30641 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2019.03.01 Core + [downloader/external] Add support for rate limit and retries for wget diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ea1d5a4a5..42ba37f15 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.02.18' +__version__ = '2019.03.01' From 06242d44fe261999e2424d9ecb00f20ff30ccb9b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 08:14:34 +0100 Subject: [PATCH 25/83] [vimeo] add support for Vimeo Pro portfolio protected videos(closes #20070) --- youtube_dl/extractor/vimeo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6215b3258..6f32ea6f1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -502,7 +502,11 @@ class VimeoIE(VimeoBaseInfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') orig_url = url - if mobj.group('pro') or mobj.group('player'): + if mobj.group('pro'): + # some videos require portfolio_id to be present in player url + # https://github.com/rg3/youtube-dl/issues/20070 + url = self._extract_url(url, self._download_webpage(url, video_id)) + elif mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id From c5b02efe20cff1612104fd731c7f02cbbce4f5f3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:08:11 +0100 Subject: [PATCH 26/83] [sixplay] handle videos with empty assets(closes #20016) --- youtube_dl/extractor/sixplay.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index 0c4f865ef..35bc9fa50 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor): quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] subtitles = {} - for asset in clip_data['assets']: + assets = clip_data.get('assets') or [] + for asset in assets: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls: From 398e1e21d6cbf6eb1e8e7e84de4fad30b7d59613 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 1 Mar 2019 15:34:05 +0100 Subject: [PATCH 27/83] [espn] extend _VALID_URL regex(closes #20013) --- youtube_dl/extractor/espn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 127c69b2e..8cc9bd165 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -29,7 +29,8 @@ class ESPNIE(OnceIE): (?: .*?\?.*?\bid=| /_/id/ - ) + )| + [^/]+/video/ ) )| (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ @@ -94,6 +95,9 @@ class ESPNIE(OnceIE): }, { 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', + 'only_matching': True, }] def _real_extract(self, url): From dca0e0040ae97b2fc0cd54d5e819a5a278937350 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:01:42 +0100 Subject: [PATCH 28/83] Revert "use older login method(closes #11572)" This reverts commit cc6a960e134614f8af2a42dcd8bf146d63638a3c. --- youtube_dl/extractor/crunchyroll.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 5e2cbe41d..ce2e2d3ba 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -56,17 +56,6 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return - self._download_webpage( - 'https://www.crunchyroll.com/?a=formhandler', - None, 'Logging in', 'Wrong login info', - data=urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'name': username, - 'password': password, - })) - - ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - ''' def _real_initialize(self): self._login() From a8f83f0c56e81b871a46c18fa9ebc6643370fa48 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 2 Mar 2019 08:25:47 +0100 Subject: [PATCH 29/83] [crunchyroll] fix is_logged check --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ce2e2d3ba..fd1e7afad 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -60,7 +60,7 @@ class CrunchyrollBaseIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login page') def is_logged(webpage): - return '<title>Redirecting' in webpage + return 'href="/logout"' in webpage # Already logged in if is_logged(login_page): From 7465e0aee2301c3e86fe38d6e0ef5ad01c16ec79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 Mar 2019 06:25:45 +0700 Subject: [PATCH 30/83] [spankbang] Fix extraction (closes #20023) --- youtube_dl/extractor/spankbang.py | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index fbe6ef31a..f11d728ca 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -9,6 +9,8 @@ from ..utils import ( parse_duration, parse_resolution, str_to_int, + url_or_none, + urlencode_postdata, ) @@ -64,16 +66,49 @@ class SpankBangIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) formats = [] - for mobj in re.finditer( - r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', - webpage): - format_id, format_url = mobj.group('id', 'url') + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return f = parse_resolution(format_id) f.update({ - 'url': format_url, + 'url': f_url, 'format_id': format_id, }) formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + self._sort_formats(formats) title = self._html_search_regex( From 7aeb788e564d397face83b580362189753edd9dd Mon Sep 17 00:00:00 2001 From: cclauss <cclauss@me.com> Date: Sun, 3 Mar 2019 02:16:48 +0100 Subject: [PATCH 31/83] [travis] Remove sudo: false Travis now recommends removing `sudo: false` from configuration: https://blog.travis-ci.com/2018-11-19-required-linux-infrastructure-migration. --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 79287ccf6..82e81d078 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ python: - "3.6" - "pypy" - "pypy3" -sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download From 8ae113ca9df0abd790e3391cd529bac42fce304f Mon Sep 17 00:00:00 2001 From: dimqua <dimqua@users.noreply.github.com> Date: Sun, 3 Mar 2019 04:19:36 +0300 Subject: [PATCH 32/83] [youtube] Add more invidious instances See [Invidious-Instances](https://github.com/omarroth/invidious/wiki/Invidious-Instances) for the reference. --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c8bf98b58..457e2acea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -352,6 +352,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| (?:www\.)?invidio\.us/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: From 0a5baf9c210df9f492ae48dd8fdae90561c971bd Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 3 Mar 2019 06:18:15 +0100 Subject: [PATCH 33/83] [libsyn] improve extraction(closes #20229) --- youtube_dl/extractor/libsyn.py | 64 +++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index f7311f483..2cf444258 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_duration, + strip_or_none, unified_strdate, ) @@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor): 'id': '6385796', 'ext': 'mp3', 'title': "Champion Minded - Developing a Growth Mindset", - 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', + # description fetched using another request: + # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 + # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', 'upload_date': '20180320', 'thumbnail': 're:^https?://.*', }, @@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor): }] def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - url = m.group('mainurl') + url, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, video_id) - podcast_title = self._search_regex( - r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) - if podcast_title: - podcast_title = podcast_title.strip() - episode_title = self._search_regex( - r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') - if episode_title: - episode_title = episode_title.strip() + data = self._parse_json(self._search_regex( + r'var\s+playlistItem\s*=\s*({.+?});', + webpage, 'JSON data block'), video_id) + + episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage) + if not episode_title: + self._search_regex( + [r'data-title="([^"]+)"', r'<title>(.+?)'], + webpage, 'episode title') + episode_title = episode_title.strip() + + podcast_title = strip_or_none(clean_html(self._search_regex( + r'

([^<]+)

', webpage, 'podcast title', + default=None) or get_element_by_class('podcast-title', webpage))) title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title + formats = [] + for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): + f_url = data.get(k) + if not f_url: + continue + formats.append({ + 'url': f_url, + 'format_id': format_id, + }) + description = self._html_search_regex( r'(.+?)

', webpage, 'description', default=None) @@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor): # Strip non-breaking and normal spaces description = description.replace('\u00A0', ' ').strip() release_date = unified_strdate(self._search_regex( - r'
Released: ([^<]+)<', webpage, 'release date', fatal=False)) - - data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') - data = json.loads(data_json) - - formats = [{ - 'url': data['media_url'], - 'format_id': 'main', - }, { - 'url': data['media_url_libsyn'], - 'format_id': 'libsyn', - }] - thumbnail = data.get('thumbnail_url') - duration = parse_duration(data.get('duration')) + r'
Released: ([^<]+)<', + webpage, 'release date', default=None) or data.get('release_date')) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': data.get('thumbnail_url'), 'upload_date': release_date, - 'duration': duration, + 'duration': parse_duration(data.get('duration')), 'formats': formats, } From e7e62441cdde6dca6211c073be73677f195a0dff Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 3 Mar 2019 13:23:59 +0100 Subject: [PATCH 34/83] [utils] strip #HttpOnly_ prefix from cookies files (#20219) --- test/test_YoutubeDLCookieJar.py | 10 ++++++++++ test/testdata/cookies/httponly_cookies.txt | 6 ++++++ youtube_dl/utils.py | 18 +++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test/testdata/cookies/httponly_cookies.txt diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 6a8243590..f959798de 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase): tf.close() os.remove(tf.name) + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt new file mode 100644 index 000000000..c46541d6b --- /dev/null +++ b/test/testdata/cookies/httponly_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE +www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5a0bb4b0..a71eda85d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + _HTTPONLY_PREFIX = '#HttpOnly_' + def save(self, filename=None, ignore_discard=False, ignore_expires=False): # Store session cookies with `expires` set to 0 instead of an empty # string @@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) def load(self, filename=None, ignore_discard=False, ignore_expires=False): - compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires) + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + cf = io.StringIO() + with open(filename) as f: + for line in f: + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + cf.write(compat_str(line)) + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to # an empty string or 0. MozillaCookieJar only recognizes the former # (see [1]). So we need force the latter to be recognized as session From 39c780fdec2c62135f37e3565efedf7dcad605ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:37:39 +0700 Subject: [PATCH 35/83] [extractor/common] Return MPD manifest as format's url meta field (#20242) For symmetry with other segmented media --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c3b0586a0..1fa8048b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2120,7 +2120,7 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': base_url, + 'url': mpd_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From c790e93ab5db5f318fb094b8a45f9160cdf4bd9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 00:39:15 +0700 Subject: [PATCH 36/83] [extractor/common] Clarify url and manifest_url meta fields --- youtube_dl/extractor/common.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1fa8048b8..641e50f3c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -102,10 +102,20 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of - fragmented media (DASH, hls, hds) + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). From 5dcd630dca9b75ec2ca920ae7799252e0e0bb599 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Mar 2019 22:26:32 +0100 Subject: [PATCH 37/83] [paramountnetwork] fix mgid extraction(closes #20241) --- youtube_dl/extractor/spike.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 6090e0066..21b93a5b3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,8 +46,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): _GEO_COUNTRIES = ['US'] def _extract_mgid(self, webpage): - cs = self._parse_json(self._search_regex( + root_data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+})', - webpage, 'data'), None)['children'] - c = next(c for c in cs if c.get('type') == 'VideoPlayer') + webpage, 'data'), None) + + def find_sub_data(data, data_type): + return next(c for c in data['children'] if c.get('type') == data_type) + + c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer') return c['props']['media']['video']['config']['uri'] From d9eb580a796ef6c9a248fdd8896ccf85349c35eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:45:40 +0700 Subject: [PATCH 38/83] [extractor/common] Do not fail on invalid data while parsing F4M manifest in non fatal mode --- youtube_dl/extractor/common.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 641e50f3c..55ce1a888 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,6 +13,7 @@ import socket import sys import time import math +import xml from ..compat import ( compat_cookiejar, @@ -1464,6 +1465,9 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + return [] + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: From c17eb5b4b06cfa2c8bffb378b0a5c84d4c5a6834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:54:25 +0700 Subject: [PATCH 39/83] [rai] Improve extraction (closes #20253) --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 149153b8f..207a6c247 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor): if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): continue - if ext == 'm3u8': + if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': + elif ext == 'f4m' or platform == 'flash': manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) From bb6f112d9d57d7c6260de132cad604c1c05bc5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Mar 2019 23:57:39 +0700 Subject: [PATCH 40/83] [npo] Improve ISM extraction --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 857845d35..ad62f8ec6 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -238,7 +238,7 @@ class NPOIE(NPOBaseIE): formats.extend(self._extract_m3u8_formats( stream_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - elif '.ism/Manifest' in stream_url: + elif re.search(r'\.isml?/Manifest', stream_url): formats.extend(self._extract_ism_formats( stream_url, video_id, ism_id='mss', fatal=False)) else: From e5ada4f3ad771d4cf3f533efb2597a3f1618ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 00:33:08 +0700 Subject: [PATCH 41/83] [extractor/common] Fallback url to base URL for DASH formats --- youtube_dl/extractor/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 55ce1a888..a17f7cbc4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,10 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest, + for DASH - URL of the MPD manifest or + base URL representing the media + if MPD manifest is parsed from + a string, for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -2134,7 +2137,8 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': mpd_url, + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), From 399f76870d7dc72631e7da1f54a46ed8a039c838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:18:52 +0700 Subject: [PATCH 42/83] [compat] Introduce compat_etree_Element --- test/test_compat.py | 7 +++++++ youtube_dl/compat.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index 51fe6aa0b..4822260ac 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.compat import ( compat_getenv, compat_setenv, + compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) + def test_compat_etree_Element(self): + try: + compat_etree_Element.text + except AttributeError: + self.fail('compat_etree_Element is not a type') + def test_compat_etree_fromstring(self): xml = ''' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7b770340f..b2fe62f12 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder): pass +try: + # xml.etree.ElementTree.Element is a method in Python <=2.6 and + # the following will crash with: + # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types + isinstance(None, xml.etree.ElementTree.Element) + from xml.etree.ElementTree import Element as compat_etree_Element +except TypeError: # Python <=2.6 + from xml.etree.ElementTree import _ElementInterface as compat_etree_Element + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2969,6 +2978,7 @@ __all__ = [ 'compat_cookiejar', 'compat_cookies', 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_Element', 'compat_etree_fromstring', 'compat_etree_register_namespace', 'compat_expanduser', From ee0ba927aac067dec533a618540e43ed3deebaba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:21:57 +0700 Subject: [PATCH 43/83] Use compat_etree_Element --- youtube_dl/extractor/bbc.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/extractor/crunchyroll.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 13340ec64..d479d2577 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import itertools import re -import xml from .common import InfoExtractor from ..utils import ( @@ -23,6 +22,7 @@ from ..utils import ( urljoin, ) from ..compat import ( + compat_etree_Element, compat_HTTPError, compat_urlparse, ) @@ -317,7 +317,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, xml.etree.ElementTree.Element): + if not isinstance(captions, compat_etree_Element): continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a17f7cbc4..4839edbf7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,11 +13,11 @@ import socket import sys import time import math -import xml from ..compat import ( compat_cookiejar, compat_cookies, + compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, @@ -802,7 +802,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). + Return a tuple (xml as an compat_etree_Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -823,7 +823,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an xml.etree.ElementTree.Element. + Return the xml as an compat_etree_Element. See _download_webpage docstring for arguments specification. """ @@ -1468,7 +1468,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: + if not isinstance(manifest, compat_etree_Element) and not fatal: return [] # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index fd1e7afad..5948154f8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re import json -import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -12,6 +11,7 @@ from .common import InfoExtractor from .vrv import VRVIE from ..compat import ( compat_b64decode, + compat_etree_Element, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -390,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, etree.Element): + if not isinstance(sub_doc, compat_etree_Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -507,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, etree.Element): + if isinstance(streamdata, compat_etree_Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, etree.Element): + if isinstance(stream_info, compat_etree_Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -593,7 +593,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = thumbnail = None - if isinstance(metadata, etree.Element): + if isinstance(metadata, compat_etree_Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) From a551768acfd177e425f518c43a2992a50a2ff69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 01:27:22 +0700 Subject: [PATCH 44/83] [facebook] Improve uploader extraction (closes #20250) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 74954049d..789dd79d5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor): uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False) + default=None) or self._og_search_title(webpage, fatal=False) timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) From 97157c692c94e3853a6ad1b8a220f064815b6957 Mon Sep 17 00:00:00 2001 From: yonaikerlol Date: Tue, 5 Mar 2019 14:34:34 -0400 Subject: [PATCH 45/83] [openload] Add support for oload.space --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c1dcbb7eb..bae7c7ee7 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor): (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space) ) )/ (?:f|embed)/ @@ -349,6 +349,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.live/f/-Z58UZ-GR4M', 'only_matching': True, + }, { + 'url': 'https://oload.space/f/IY4eZSst3u8/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' From d347b52b63282b3276815fd03fc63a1bc8b82cf5 Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Tue, 5 Mar 2019 20:11:32 +0100 Subject: [PATCH 46/83] [urplay] Extract timestamp (#20235) --- youtube_dl/extractor/urplay.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py index 8e6fd4731..6030b7cb5 100644 --- a/youtube_dl/extractor/urplay.py +++ b/youtube_dl/extractor/urplay.py @@ -2,18 +2,31 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import unified_timestamp class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', - 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb', + 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', + 'md5': 'ff5b0c89928f8083c74bbd5099c9292d', + 'info_dict': { + 'id': '203704', + 'ext': 'mp4', + 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', + 'timestamp': 1513512768, + 'upload_date': '20171217', + }, + }, { + 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { 'id': '190031', 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + 'timestamp': 1440093600, + 'upload_date': '20150820', }, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', @@ -51,6 +64,7 @@ class URPlayIE(InfoExtractor): 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), 'thumbnail': urplayer_data.get('image'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), 'series': urplayer_data.get('series_title'), 'subtitles': subtitles, 'formats': formats, From fca9baf0da9720bac25d160924204395930191fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 6 Mar 2019 02:45:33 +0700 Subject: [PATCH 47/83] [test] Fix test_compat_etree_Element --- test/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index 4822260ac..86ff389fd 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -93,7 +93,7 @@ class TestCompat(unittest.TestCase): def test_compat_etree_Element(self): try: - compat_etree_Element.text + compat_etree_Element.items except AttributeError: self.fail('compat_etree_Element is not a type') From 829685b88a0c7610a874b980bc25b308c4f34590 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:20:27 +0100 Subject: [PATCH 48/83] [toutv] fix authentication(closes #20261) --- youtube_dl/extractor/toutv.py | 53 +++++++++-------------------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index f1ab91cf2..124ca064c 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,14 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import json from .radiocanada import RadioCanadaIE from ..utils import ( - extract_attributes, int_or_none, merge_dicts, - urlencode_postdata, ) @@ -38,47 +36,24 @@ class TouTvIE(RadioCanadaIE): 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] + _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' def _real_initialize(self): email, password = self._get_login_info() if email is None: return - login_webpage = self._download_webpage( - 'https://services.radio-canada.ca/auth/oauth/v2/authorize', - None, 'Downloading login page', query={ - 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', - 'redirect_uri': 'https://ici.tou.tv/logincallback', - 'response_type': 'token', - 'scope': 'id.write media-validation.read', - 'state': '/', - }) - - def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): - form, form_elem = re.search( - r'(?s)((]+?%s[^>]*?>).+?)' % form_spec_re, wp).groups() - form_data = self._hidden_inputs(form) - form_url = extract_attributes(form_elem).get('action') or default_form_url - return form_url, form_data - - post_url, form_data = extract_form_url_and_data( - login_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', - r'(?:id|name)="Form-login"') - form_data.update({ - 'login-email': email, - 'login-password': password, - }) - consent_webpage = self._download_webpage( - post_url, None, 'Logging in', data=urlencode_postdata(form_data)) - post_url, form_data = extract_form_url_and_data( - consent_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') - _, urlh = self._download_webpage_handle( - post_url, None, 'Following Redirection', - data=urlencode_postdata(form_data)) - self._access_token = self._search_regex( - r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', - urlh.geturl(), 'access token') + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 7b6e76087080eac54e14cdead4e3bc0225c654b5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 6 Mar 2019 09:28:14 +0100 Subject: [PATCH 49/83] [toutv] detect invalid login error --- youtube_dl/extractor/toutv.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 124ca064c..25e1fd46d 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -4,7 +4,9 @@ from __future__ import unicode_literals import json from .radiocanada import RadioCanadaIE +from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, int_or_none, merge_dicts, ) @@ -42,18 +44,24 @@ class TouTvIE(RadioCanadaIE): email, password = self._get_login_info() if email is None: return - self._access_token = self._download_json( - 'https://services.radio-canada.ca/toutv/profiling/accounts/login', - None, 'Logging in', data=json.dumps({ - 'ClientId': self._CLIENT_KEY, - 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', - 'Email': email, - 'Password': password, - 'Scope': 'id.write media-validation.read', - }).encode(), headers={ - 'Authorization': 'client-key ' + self._CLIENT_KEY, - 'Content-Type': 'application/json;charset=utf-8', - })['access_token'] + try: + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['Message'] + raise ExtractorError(error, expected=True) + raise self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): From 9d74ea6d36696396392974e94a40dce1e5a881a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Mar 2019 23:26:59 +0700 Subject: [PATCH 50/83] [francetv:site] Relax video id regex and update test (closes #20268) --- youtube_dl/extractor/francetv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3c4ef08a8..6101fb6bd 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', @@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: From bba35695eb4ab9cc70624583375ba3d15b4e6cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:52:08 +0700 Subject: [PATCH 51/83] [ChangeLog] Actualize [ci skip] --- ChangeLog | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ChangeLog b/ChangeLog index 018a30641..272191a01 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +version + +Core +* [extractor/common] Use compat_etree_Element ++ [compat] Introduce compat_etree_Element +* [extractor/common] Fallback url to base URL for DASH formats +* [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode +* [extractor/common] Return MPD manifest as format's url meta field (#20242) +* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + +Extractors +* [francetv:site] Relax video id regular expression (#20268) +* [toutv] Detect invalid login error +* [toutv] Fix authentication (#20261) ++ [urplay] Extract timestamp (#20235) ++ [openload] Add support for oload.space (#20246) +* [facebook] Improve uploader extraction (#20250) +* [bbc] Use compat_etree_Element +* [crunchyroll] Use compat_etree_Element +* [npo] Improve ISM extraction +* [rai] Improve extraction (#20253) +* [paramountnetwork] Fix mgid extraction (#20241) +* [libsyn] Improve extraction (#20229) ++ [youtube] Add more invidious instances to URL regular expression (#20228) +* [spankbang] Fix extraction (#20023) +* [espn] Extend URL regular expression (#20013) +* [sixplay] Handle videos with empty assets (#20016) ++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + + version 2019.03.01 Core From 10734553feef497e2810a23bbe62a0b3d630e78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 02:53:18 +0700 Subject: [PATCH 52/83] release 2019.03.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 71a500f04..5f97e2cbe 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.03.01 +[debug] youtube-dl version 2019.03.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 272191a01..eda94ad33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2019.03.09 Core * [extractor/common] Use compat_etree_Element diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 42ba37f15..f72fee57f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.01' +__version__ = '2019.03.09' From 0d08bcdb70008f0d500afbd19059b3c0971a4776 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 09:37:28 +0100 Subject: [PATCH 53/83] [fox] detect geo restriction and authentication errors(#20208) --- youtube_dl/extractor/fox.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 0ffceeb7c..f30d3cba8 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -6,10 +6,12 @@ import uuid from .adobepass import AdobePassIE from ..compat import ( + compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, @@ -48,6 +50,7 @@ class FOXIE(AdobePassIE): 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/', 'only_matching': True, }] + _GEO_BYPASS = False _HOME_PAGE_URL = 'https://www.fox.com/' _API_KEY = 'abdcbed02c124d393b39e818a4312055' _access_token = None @@ -58,9 +61,22 @@ class FOXIE(AdobePassIE): } if self._access_token: headers['Authorization'] = 'Bearer ' + self._access_token - return self._download_json( - 'https://api2.fox.com/v2.0/' + path, - video_id, data=data, headers=headers) + try: + return self._download_json( + 'https://api2.fox.com/v2.0/' + path, + video_id, data=data, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + entitlement_issues = self._parse_json( + e.cause.read().decode(), video_id)['entitlementIssues'] + for e in entitlement_issues: + if e.get('errorCode') == 1005: + raise ExtractorError( + 'This video is only available via cable service provider ' + 'subscription. You may want to use --cookies.', expected=True) + messages = ', '.join([e['message'] for e in entitlement_issues]) + raise ExtractorError(messages, expected=True) + raise def _real_initialize(self): if not self._access_token: @@ -81,7 +97,15 @@ class FOXIE(AdobePassIE): title = video['name'] release_url = video['url'] - m3u8_url = self._download_json(release_url, video_id)['playURL'] + try: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.read().decode(), video_id) + if error.get('exception') == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=['US']) + raise ExtractorError(error['description'], expected=True) + raise formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') From 276550371313dbfe7d94ceb294bd1284c1e7c404 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 10 Mar 2019 15:03:32 +0100 Subject: [PATCH 54/83] [vimeo:review] improve config url extraction and extract original format(closes #20305) --- youtube_dl/extractor/vimeo.py | 64 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 6f32ea6f1..e3ec550f0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -195,6 +195,32 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'subtitles': subtitles, } + def _extract_original_format(self, url, video_id): + download_data = self._download_json( + url, video_id, fatal=False, + query={'action': 'load_download_config'}, + headers={'X-Requested-With': 'XMLHttpRequest'}) + if download_data: + source_file = download_data.get('source_file') + if isinstance(source_file, dict): + download_url = source_file.get('download_url') + if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): + source_name = source_file.get('public_name', 'Original') + if self._is_valid_url(download_url, video_id, '%s video' % source_name): + ext = (try_get( + source_file, lambda x: x['extension'], + compat_str) or determine_ext( + download_url, None) or 'mp4').lower() + return { + 'url': download_url, + 'ext': ext, + 'width': int_or_none(source_file.get('width')), + 'height': int_or_none(source_file.get('height')), + 'filesize': parse_filesize(source_file.get('size')), + 'format_id': source_name, + 'preference': 1, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -659,29 +685,11 @@ class VimeoIE(VimeoBaseInfoExtractor): comment_count = None formats = [] - download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest'}) - download_data = self._download_json(download_request, video_id, fatal=False) - if download_data: - source_file = download_data.get('source_file') - if isinstance(source_file, dict): - download_url = source_file.get('download_url') - if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): - source_name = source_file.get('public_name', 'Original') - if self._is_valid_url(download_url, video_id, '%s video' % source_name): - ext = (try_get( - source_file, lambda x: x['extension'], - compat_str) or determine_ext( - download_url, None) or 'mp4').lower() - formats.append({ - 'url': download_url, - 'ext': ext, - 'width': int_or_none(source_file.get('width')), - 'height': int_or_none(source_file.get('height')), - 'filesize': parse_filesize(source_file.get('size')), - 'format_id': source_name, - 'preference': 1, - }) + + source_format = self._extract_original_format( + 'https://vimeo.com/' + video_id, video_id) + if source_format: + formats.append(source_format) info_dict_config = self._parse_config(config, video_id) formats.extend(info_dict_config['formats']) @@ -940,7 +948,7 @@ class VimeoGroupsIE(VimeoAlbumIE): class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P[^/]+)' + _VALID_URL = r'(?Phttps://vimeo\.com/[^/]+/review/(?P[^/]+)/[0-9a-f]{10})' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -992,7 +1000,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): data = self._parse_json(self._search_regex( r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config = data.get('vimeo_esi', {}).get('config', {}) + config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl']) if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( @@ -1000,10 +1009,13 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): return config_url def _real_extract(self, url): - video_id = self._match_id(url) + page_url, video_id = re.match(self._VALID_URL, url).groups() config_url = self._get_config_url(url, video_id) config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) + source_format = self._extract_original_format(page_url, video_id) + if source_format: + info_dict['formats'].append(source_format) self._vimeo_sort_formats(info_dict['formats']) info_dict['id'] = video_id return info_dict From 067aa17edf5a46a8cbc4d6b90864eddf051fa2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Mar 2019 19:14:41 +0700 Subject: [PATCH 55/83] Start moving to ytdl-org --- .github/ISSUE_TEMPLATE.md | 10 +++--- .github/ISSUE_TEMPLATE_tmpl.md | 10 +++--- .github/PULL_REQUEST_TEMPLATE.md | 4 +-- CONTRIBUTING.md | 20 +++++------ README.md | 42 +++++++++++------------ devscripts/buildserver.py | 2 +- devscripts/create-github-release.py | 4 +-- devscripts/gh-pages/update-feed.py | 4 +-- devscripts/release.sh | 2 +- devscripts/show-downloads-statistics.py | 2 +- setup.py | 2 +- test/test_InfoExtractor.py | 16 ++++----- test/test_YoutubeDL.py | 6 ++-- test/test_all_urls.py | 6 ++-- youtube-dl.plugin.zsh | 2 +- youtube_dl/YoutubeDL.py | 16 ++++----- youtube_dl/__init__.py | 2 +- youtube_dl/compat.py | 6 ++-- youtube_dl/downloader/external.py | 4 +-- youtube_dl/downloader/f4m.py | 6 ++-- youtube_dl/downloader/hls.py | 4 +-- youtube_dl/downloader/http.py | 2 +- youtube_dl/extractor/arkena.py | 2 +- youtube_dl/extractor/bambuser.py | 4 +-- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/brightcove.py | 6 ++-- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/common.py | 20 +++++------ youtube_dl/extractor/commonmistakes.py | 2 +- youtube_dl/extractor/crunchyroll.py | 4 +-- youtube_dl/extractor/dailymotion.py | 4 +-- youtube_dl/extractor/dreisat.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 10 +++--- youtube_dl/extractor/googledrive.py | 2 +- youtube_dl/extractor/kuwo.py | 2 +- youtube_dl/extractor/liveleak.py | 6 ++-- youtube_dl/extractor/msn.py | 2 +- youtube_dl/extractor/nhl.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/once.py | 2 +- youtube_dl/extractor/pbs.py | 6 ++-- youtube_dl/extractor/pluralsight.py | 6 ++-- youtube_dl/extractor/pornhub.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 4 +-- youtube_dl/extractor/radiocanada.py | 2 +- youtube_dl/extractor/rtlnl.py | 2 +- youtube_dl/extractor/theplatform.py | 4 +-- youtube_dl/extractor/toutv.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 2 +- youtube_dl/extractor/veehd.py | 2 +- youtube_dl/extractor/vevo.py | 2 +- youtube_dl/extractor/vimeo.py | 4 +-- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vlive.py | 2 +- youtube_dl/extractor/yandexmusic.py | 4 +-- youtube_dl/extractor/youtube.py | 44 ++++++++++++------------- youtube_dl/options.py | 2 +- youtube_dl/update.py | 2 +- youtube_dl/utils.py | 18 +++++----- 61 files changed, 182 insertions(+), 182 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5f97e2cbe..911e912a4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 8edbd5a0f..8b7e73417 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -6,12 +6,12 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** ### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones +- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser ### What is the purpose of your *issue*? @@ -51,11 +51,11 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. +Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. --- ### Description of your *issue*, suggested solution and other information -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ba4ca7553..e69b907d8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,8 +7,8 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections -- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6c1739860..cd9ccbe96 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,11 +42,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? @@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. To run youtube-dl as a developer, you don't need to build anything either. Simply execute @@ -98,7 +98,7 @@ If you want to add support for a new site, first of all **make sure** this site After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) +1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) 2. Check out the source code with: git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git @@ -150,9 +150,9 @@ After you have ensured this site is distributing its content legally, you can fo # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 youtube_dl/extractor/yourextractor.py @@ -177,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: - `id` (media identifier) - `title` (media title) @@ -185,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. -[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example @@ -341,7 +341,7 @@ Incorrect: ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -349,7 +349,7 @@ Use `try_get` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/README.md b/README.md index c1572f771..e476045b2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl) +[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl) youtube-dl - download videos from youtube.com or other video platforms @@ -43,7 +43,7 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). +Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html). # DESCRIPTION **youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. @@ -685,7 +685,7 @@ You can merge the video and audio of two formats into a single file using `-f . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). +Bugs and suggestions should be reported at: . Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` @@ -1342,11 +1342,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 1344b4d87..4a4295ba9 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -322,7 +322,7 @@ class GITBuilder(GITInfoBuilder): class YoutubeDLBuilder(object): - authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] def __init__(self, **kwargs): if self.repoName != 'youtube-dl': diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 30716ad8e..428111b3f 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -27,8 +27,8 @@ from youtube_dl.utils import ( class GitHubReleaser(object): - _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' - _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s' _NETRC_MACHINE = 'github.com' def __init__(self, debuglevel=0): diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py index e93eb60fb..506a62377 100755 --- a/devscripts/gh-pages/update-feed.py +++ b/devscripts/gh-pages/update-feed.py @@ -10,7 +10,7 @@ import textwrap atom_template = textwrap.dedent("""\ - + youtube-dl releases https://yt-dl.org/feed/youtube-dl-updates-feed @TIMESTAMP@ @@ -21,7 +21,7 @@ entry_template = textwrap.dedent(""" https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ New version @VERSION@ - +
Downloads available at https://yt-dl.org/downloads/@VERSION@/ diff --git a/devscripts/release.sh b/devscripts/release.sh index 4db5def5d..4c413bf6d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -96,7 +96,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py index e25d28411..6c8d1cc2d 100644 --- a/devscripts/show-downloads-statistics.py +++ b/devscripts/show-downloads-statistics.py @@ -24,7 +24,7 @@ total_bytes = 0 for page in itertools.count(1): releases = json.loads(compat_urllib_request.urlopen( - 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page + 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page ).read().decode('utf-8')) if not releases: diff --git a/setup.py b/setup.py index dfb669ad2..af68b485e 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,7 @@ setup( version=__version__, description=DESCRIPTION, long_description=LONG_DESCRIPTION, - url='https://github.com/rg3/youtube-dl', + url='https://github.com/ytdl-org/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', maintainer='Sergey M.', diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index f0aa8466b..da6cd39b6 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -201,7 +201,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_m3u8_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/11507 + # https://github.com/ytdl-org/youtube-dl/issues/11507 # http://pluzz.francetv.fr/videos/le_ministere.html 'pluzz_francetv_11507', 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', @@ -263,7 +263,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/11995 + # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'teamcoco_11995', 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', @@ -337,7 +337,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/12211 + # https://github.com/ytdl-org/youtube-dl/issues/12211 # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 'toggle_mobile_12211', 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', @@ -501,7 +501,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/18923 + # https://github.com/ytdl-org/youtube-dl/issues/18923 # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa 'ted_18923', 'http://hls.ted.com/talks/31241.m3u8', @@ -570,9 +570,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_mpd_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/13919 + # https://github.com/ytdl-org/youtube-dl/issues/13919 # Also tests duplicate representation ids, see - # https://github.com/rg3/youtube-dl/issues/15111 + # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', [{ @@ -652,7 +652,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, }] ), ( - # https://github.com/rg3/youtube-dl/pull/14844 + # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', [{ @@ -748,7 +748,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_f4m_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/14660 + # https://github.com/ytdl-org/youtube-dl/issues/14660 'custom_base_url', 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', [{ diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1d7452744..ce9666171 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -411,7 +411,7 @@ class TestFormatSelection(unittest.TestCase): # For extractors with incomplete formats (all formats are audio-only or # video-only) best and worst should fallback to corresponding best/worst # video-only or audio-only formats (as per - # https://github.com/rg3/youtube-dl/pull/5556) + # https://github.com/ytdl-org/youtube-dl/pull/5556) formats = [ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, @@ -442,7 +442,7 @@ class TestFormatSelection(unittest.TestCase): self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_format_selection_issue_10083(self): - # See https://github.com/rg3/youtube-dl/issues/10083 + # See https://github.com/ytdl-org/youtube-dl/issues/10083 formats = [ {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, @@ -853,7 +853,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(result, [2, 3, 4]) def test_urlopen_no_file_protocol(self): - # see https://github.com/rg3/youtube-dl/issues/8227 + # see https://github.com/ytdl-org/youtube-dl/issues/8227 ydl = YDL() self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') diff --git a/test/test_all_urls.py b/test/test_all_urls.py index cd1cd4b24..465ce0050 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -110,7 +110,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) - # https://github.com/rg3/youtube-dl/issues/1930 + # https://github.com/ytdl-org/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) @@ -119,12 +119,12 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): - # https://github.com/rg3/youtube-dl/issues/2350 + # https://github.com/ytdl-org/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) def test_yahoo_https(self): - # https://github.com/rg3/youtube-dl/issues/2701 + # https://github.com/ytdl-org/youtube-dl/issues/2701 self.assertMatch( 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh index 4edab5214..17ab1341a 100644 --- a/youtube-dl.plugin.zsh +++ b/youtube-dl.plugin.zsh @@ -7,7 +7,7 @@ # https://github.com/zsh-users/antigen # Install youtube-dl: -# antigen bundle rg3/youtube-dl +# antigen bundle ytdl-org/youtube-dl # Bundles installed by antigen are available for use immediately. # Update youtube-dl (and all other antigen bundles): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bc9fc270c..3b92acd97 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -889,7 +889,7 @@ class YoutubeDL(object): # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This - # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. + # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' @@ -1606,7 +1606,7 @@ class YoutubeDL(object): # by extractor are incomplete or not (i.e. whether extractor provides only # video-only or audio-only formats) for proper formats selection for # extractors with such incomplete formats (see - # https://github.com/rg3/youtube-dl/pull/5556). + # https://github.com/ytdl-org/youtube-dl/pull/5556). # Since formats may be filtered during format selection and may not match # the original formats the results may be incorrect. Thus original formats # or pre-calculated metrics should be passed to format selection routines @@ -1614,7 +1614,7 @@ class YoutubeDL(object): # We will pass a context object containing all necessary additional data # instead of just formats. # This fixes incorrect format selection issue (see - # https://github.com/rg3/youtube-dl/issues/10083). + # https://github.com/ytdl-org/youtube-dl/issues/10083). incomplete_formats = ( # All formats are video-only or all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or @@ -1810,7 +1810,7 @@ class YoutubeDL(object): if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 + # See https://github.com/ytdl-org/youtube-dl/issues/10268 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) except (OSError, IOError): @@ -2229,7 +2229,7 @@ class YoutubeDL(object): return if type('') is not compat_str: - # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) + # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326) self.report_warning( 'Your Python is broken! Update to a newer and supported version') @@ -2323,7 +2323,7 @@ class YoutubeDL(object): proxies = {'http': opts_proxy, 'https': opts_proxy} else: proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = PerRequestProxyHandler(proxies) @@ -2336,7 +2336,7 @@ class YoutubeDL(object): # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which # can be used for malicious purposes (see - # https://github.com/rg3/youtube-dl/issues/8227) + # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): @@ -2348,7 +2348,7 @@ class YoutubeDL(object): # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ba435ea42..94788d936 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -48,7 +48,7 @@ from .YoutubeDL import YoutubeDL def _real_main(argv=None): # Compatibility fixes for Windows if sys.platform == 'win32': - # https://github.com/rg3/youtube-dl/issues/820 + # https://github.com/ytdl-org/youtube-dl/issues/820 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) workaround_optparse_bug9161() diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2fe62f12..7992a23ca 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2364,7 +2364,7 @@ except ImportError: # Python 2 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus # implementations from cpython 3.4.3's stdlib. Python 2's version - # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244) + # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244) def compat_urllib_parse_unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" @@ -2828,7 +2828,7 @@ else: compat_socket_create_connection = socket.create_connection -# Fix https://github.com/rg3/youtube-dl/issues/4223 +# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # See http://bugs.python.org/issue9161 for what is broken def workaround_optparse_bug9161(): op = optparse.OptionParser() @@ -2953,7 +2953,7 @@ if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, # PyPy2 prior to version 5.4.0 expects byte strings as Windows function # names, see the original PyPy issue [1] and the youtube-dl one [2]. # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name - # 2. https://github.com/rg3/youtube-dl/pull/4392 + # 2. https://github.com/ytdl-org/youtube-dl/pull/4392 def compat_ctypes_WINFUNCTYPE(*args, **kwargs): real = ctypes.WINFUNCTYPE(*args, **kwargs) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 22e6093b3..5f73f7f0f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -239,7 +239,7 @@ class FFmpegFD(ExternalFD): # setting -seekable prevents ffmpeg from guessing if the server # supports seeking(by adding the header `Range: bytes=0-`), which # can cause problems in some cases - # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] @@ -334,7 +334,7 @@ class FFmpegFD(ExternalFD): # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/rg3/youtube-dl/issues/8300). + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if sys.platform != 'win32': proc.communicate(b'q') raise diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 15e71be9a..9b15a0e15 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -324,8 +324,8 @@ class F4mFD(FragmentFD): urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests - # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 - # and https://github.com/rg3/youtube-dl/issues/7823) + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) @@ -409,7 +409,7 @@ class F4mFD(FragmentFD): # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is - # See https://github.com/rg3/youtube-dl/issues/9214 + # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 4def8e2d5..419e73576 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -152,8 +152,8 @@ class HlsFD(FragmentFD): except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. - # See https://github.com/rg3/youtube-dl/issues/10165, - # https://github.com/rg3/youtube-dl/issues/10448). + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: self.report_retry_fragment(err, frag_index, count, fragment_retries) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 5b1e96013..08670ee3c 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -111,7 +111,7 @@ class HttpFD(FileDownloader): # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see - # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) + # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') if content_range: diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index 4495ddbb0..854f58767 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor): f_url, video_id, mpd_id=kind, fatal=False)) elif kind == 'silverlight': # TODO: process when ism is supported (see - # https://github.com/rg3/youtube-dl/issues/8118) + # https://github.com/ytdl-org/youtube-dl/issues/8118) continue else: tbr = float_or_none(f.get('Bitrate'), 1000) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 34f1b3d83..4400ff9c1 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -23,7 +23,7 @@ class BambuserIE(InfoExtractor): _TEST = { 'url': 'http://bambuser.com/v/4050584', - # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 + # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', 'info_dict': { 'id': '4050584', @@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor): }, 'params': { # It doesn't respect the 'Range' header, it would download the whole video - # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 + # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 'skip_download': True, }, } diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d479d2577..e76507951 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -208,7 +208,7 @@ class BBCCoUkIE(InfoExtractor): }, 'skip': 'Now it\'s really geo-restricted', }, { - # compact player (https://github.com/rg3/youtube-dl/issues/8147) + # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 465ae396e..c0345e2c3 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -126,7 +126,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'playlist_mincount': 7, }, { - # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965) + # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', 'info_dict': { 'id': '1522758701001', @@ -155,10 +155,10 @@ class BrightcoveLegacyIE(InfoExtractor): {params} """ - # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 + # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 object_str = re.sub(r'(', lambda m: m.group(1) + '/>', object_str) - # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 object_str = object_str.replace('<--', '