From 66d04c74e097c03e4d644d7292546884cbee3d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Sep 2019 01:23:22 +0700 Subject: [PATCH 1/3] [platzi:course] Add support for authentication --- youtube_dl/extractor/platzi.py | 73 ++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py index 557b2b5ad..cd6b966c5 100644 --- a/youtube_dl/extractor/platzi.py +++ b/youtube_dl/extractor/platzi.py @@ -18,43 +18,10 @@ from ..utils import ( ) -class PlatziIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: - platzi\.com/clases| # es version - courses\.platzi\.com/classes # en version - )/[^/]+/(?P\d+)-[^/?\#&]+ - ''' +class PlatziBaseIE(InfoExtractor): _LOGIN_URL = 'https://platzi.com/login/' _NETRC_MACHINE = 'platzi' - _TESTS = [{ - 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', - 'md5': '8f56448241005b561c10f11a595b37e3', - 'info_dict': { - 'id': '12074', - 'ext': 'mp4', - 'title': 'Creando nuestra primera página', - 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', - 'duration': 420, - }, - 'skip': 'Requires platzi account credentials', - }, { - 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', - 'info_dict': { - 'id': '13430', - 'ext': 'mp4', - 'title': 'Background', - 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', - 'duration': 360, - }, - 'skip': 'Requires platzi account credentials', - 'params': { - 'skip_download': True, - }, - }] - def _real_initialize(self): self._login() @@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor): 'Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + +class PlatziIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P\d+)-[^/?\#&]+ + ''' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + def _real_extract(self, url): lecture_id = self._match_id(url) @@ -146,7 +149,7 @@ class PlatziIE(InfoExtractor): } -class PlatziCourseIE(InfoExtractor): +class PlatziCourseIE(PlatziBaseIE): _VALID_URL = r'''(?x) https?:// (?: From 31dbd054c801ec14c1ea29a2167b70c980f1d782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 3 Sep 2019 01:24:20 +0700 Subject: [PATCH 2/3] [platzi] Improve client data extraction (closes #22290) --- youtube_dl/extractor/platzi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py index cd6b966c5..602207beb 100644 --- a/youtube_dl/extractor/platzi.py +++ b/youtube_dl/extractor/platzi.py @@ -107,7 +107,11 @@ class PlatziIE(PlatziBaseIE): data = self._parse_json( self._search_regex( - r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), lecture_id) material = data['initialState']['material'] From bff90fc518d6ccadaafc26407a688dc1bbd32dff Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Tue, 3 Sep 2019 01:35:32 +0700 Subject: [PATCH 3/3] [youtube] Add support for invidious tor instances (#22268) --- youtube_dl/extractor/youtube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 25d056b3c..abafd5157 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -391,6 +391,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| (?:www\.)?yt\.elukerio\.org/| + (?:www\.)?kgg2m7yk5aybusll\.onion/| + (?:www\.)?qklhadlycap4cnod\.onion/| + (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| + (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/| + (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| + (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: