From 9524dca3accc1e60cddd141f06924ed7404db9bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 02:53:20 +0700 Subject: [PATCH 01/97] [README.md] Use revision bound link to YoutubeDL options (closes #14401) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7818e58df..2879aad24 100644 --- a/README.md +++ b/README.md @@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From c110944fa2f21af733b4f3168764e1b008e11514 Mon Sep 17 00:00:00 2001 From: "M.K" Date: Tue, 3 Oct 2017 22:50:27 +0200 Subject: [PATCH 02/97] [extractor/common] Fix typo in _parse_mpd_formats --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2bbbf8f4d..a878550c2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1920,7 +1920,7 @@ class InfoExtractor(object): # can't be used at the same time if '%(Number' in media_template and 's' not in representation_ms_info: segment_duration = None - if 'total_number' not in representation_ms_info and 'segment_duration': + if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['fragments'] = [{ From 6e736d86e7d03d14279c403202fe2a632e6e5023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Oct 2017 04:27:42 +0700 Subject: [PATCH 03/97] [beeg] Fix extraction (closes #14403) --- youtube_dl/extractor/beeg.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index bbeae4bac..bf22a41b7 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -60,9 +60,13 @@ class BeegIE(InfoExtractor): beeg_version = beeg_version or '2185' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' - video = self._download_json( - 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), - video_id) + for api_path in ('', 'api.'): + video = self._download_json( + 'https://%sbeeg.com/api/v6/%s/video/%s' + % (api_path, beeg_version, video_id), video_id, + fatal=api_path == 'api.') + if video: + break def split(o, e): def cut(s, x): From 6b46285e850f8bc8155a93adf13920752537e55d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 4 Oct 2017 07:45:13 +0200 Subject: [PATCH 04/97] [comedycentral] new shortcut :theopposition for "The Opposition" show --- youtube_dl/extractor/comedycentral.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 4cac29415..d08b909a6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor): class ComedyCentralShortnameIE(InfoExtractor): - _VALID_URL = r'^:(?Ptds|thedailyshow)$' + _VALID_URL = r'^:(?Ptds|thedailyshow|theopposition)$' _TESTS = [{ 'url': ':tds', 'only_matching': True, }, { 'url': ':thedailyshow', 'only_matching': True, + }, { + 'url': ':theopposition', + 'only_matching': True, }] def _real_extract(self, url): @@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor): shortcut_map = { 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes', } return self.url_result(shortcut_map[video_id]) From cf5f6ed5be8bab252f3ded345b6b1fdf31426661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Oct 2017 00:27:24 +0700 Subject: [PATCH 05/97] [xvideos] Add support for embed URLs and improve extraction (closes #14409) --- youtube_dl/extractor/xvideos.py | 42 ++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index eca603028..085c8d4f3 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -14,8 +14,16 @@ from ..utils import ( class XVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' - _TEST = { + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?xvideos\.com/video| + flashservice\.xvideos\.com/embedframe/| + static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= + ) + (?P[0-9]+) + ''' + _TESTS = [{ 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '14cea69fcb84db54293b1e971466c2e1', 'info_dict': { @@ -25,21 +33,33 @@ class XVideosIE(InfoExtractor): 'duration': 108, 'age_limit': 18, } - } + }, { + 'url': 'https://flashservice.xvideos.com/embedframe/4588838', + 'only_matching': True, + }, { + 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + webpage = self._download_webpage( + 'http://www.xvideos.com/video%s/' % video_id, video_id) mobj = re.search(r'

(.+?)

', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_title = self._html_search_regex( - r'(.*?)\s+-\s+XVID', webpage, 'title') - video_thumbnail = self._search_regex( + title = self._html_search_regex( + (r'<title>(?P<title>.+?)\s+-\s+XVID', + r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + + thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - video_duration = int_or_none(self._og_search_property( + duration = int_or_none(self._og_search_property( 'duration', webpage, default=None)) or parse_duration( self._search_regex( r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)', @@ -74,8 +94,8 @@ class XVideosIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': video_title, - 'duration': video_duration, - 'thumbnail': video_thumbnail, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, 'age_limit': 18, } From 6be08ce60205a65a6739667783eead56ccc34456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:13:53 +0700 Subject: [PATCH 06/97] [utils] Use in OnDemandPagedList by default Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m --- youtube_dl/extractor/mixcloud.py | 2 +- youtube_dl/extractor/nba.py | 2 +- youtube_dl/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index f331db890..7b2bb6e20 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): functools.partial( self._tracks_page_func, '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result( entries, video_id, '%s (%s)' % (username, list_type), description) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 53561961c..be295a7a3 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE): playlist_title = self._og_search_title(webpage, fatal=False) entries = OnDemandPagedList( functools.partial(self._fetch_page, team, video_id), - self._PAGE_SIZE, use_cache=True) + self._PAGE_SIZE) return self.playlist_result(entries, team, playlist_title) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 92b22e639..59fb33435 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1933,7 +1933,7 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize, use_cache=False): + def __init__(self, pagefunc, pagesize, use_cache=True): self._pagefunc = pagefunc self._pagesize = pagesize self._use_cache = use_cache From 7e85e8729f1e2ed9817466acef30fa6dc7e03e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:34:46 +0700 Subject: [PATCH 07/97] [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) --- youtube_dl/YoutubeDL.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 474d6c95f..9036f0f94 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -911,12 +911,22 @@ class YoutubeDL(object): playlistitems = iter_playlistitems(playlistitems_str) ie_entries = ie_result['entries'] + + def make_playlistitems_entries(list_ie_entries): + num_entries = len(list_ie_entries) + return [ + list_ie_entries[i - 1] for i in playlistitems + if -num_entries <= i - 1 < num_entries] + + def report_download(num_entries): + self.to_screen( + '[%s] playlist %s: Downloading %d videos' % + (ie_result['extractor'], playlist, num_entries)) + if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ - ie_entries[i - 1] for i in playlistitems - if -n_all_entries <= i - 1 < n_all_entries] + entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) @@ -934,20 +944,15 @@ class YoutubeDL(object): entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) else: # iterable if playlistitems: - entry_list = list(ie_entries) - entries = [entry_list[i - 1] for i in playlistitems] + entries = make_playlistitems_entries(list(ie_entries)) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) if self.params.get('playlistreverse', False): entries = entries[::-1] From 86a15ed64b410336b2145f4a6b8e8a22cbf24f51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:41:28 +0700 Subject: [PATCH 08/97] [test_YoutubeDL] Add test for #14425 --- test/test_YoutubeDL.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e70cbcd37..5ac34e663 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -770,6 +770,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '10'}) self.assertEqual(result, []) + result = get_ids({'playlist_items': '3-10'}) + self.assertEqual(result, [3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() From cd6fc19ed76af6687fb2cb5f87d9ed4c3071c203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:46:57 +0700 Subject: [PATCH 09/97] [YoutubeDL] Ignore duplicates in --playlist-items E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]' --- test/test_YoutubeDL.py | 3 +++ youtube_dl/YoutubeDL.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5ac34e663..db936bfb8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -773,6 +773,9 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '3-10'}) self.assertEqual(result, [3, 4]) + result = get_ids({'playlist_items': '2-4,3-4,3'}) + self.assertEqual(result, [2, 3, 4]) + def test_urlopen_no_file_protocol(self): # see https://github.com/rg3/youtube-dl/issues/8227 ydl = YDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9036f0f94..855d6b8e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( locked_file, make_HTTPS_handler, MaxDownloadsReached, + orderedSet, PagedList, parse_filesize, PerRequestProxyHandler, @@ -908,7 +909,7 @@ class YoutubeDL(object): yield int(item) else: yield int(string_segment) - playlistitems = iter_playlistitems(playlistitems_str) + playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] From ac93c09ab2c4d099a628c1ed59670bef008db89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Oct 2017 23:53:32 +0700 Subject: [PATCH 10/97] [xtube] Add support for embedded URLs (closes #14417) --- youtube_dl/extractor/xtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index bea9b87ad..c6c0b3291 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -18,7 +18,7 @@ class XTubeIE(InfoExtractor): _VALID_URL = r'''(?x) (?: xtube:| - https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-) + https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-) ) (?P<id>[^/?&#]+) ''' @@ -64,6 +64,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:kVTUy_G222_', 'only_matching': True, + }, { + 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big', + 'only_matching': True, }] def _real_extract(self, url): From 2e2a8e97d53d7759d663be2a1dc3f4108342ea40 Mon Sep 17 00:00:00 2001 From: Jalaz Kumar <jaykay12@users.noreply.github.com> Date: Fri, 6 Oct 2017 22:26:31 +0530 Subject: [PATCH 11/97] [pornflip] Extend _VALID_URL (closes #14405) --- youtube_dl/extractor/pornflip.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index a4a5d390e..ee04936e1 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -14,7 +14,7 @@ from ..utils import ( class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})' _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', @@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor): }, { 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s', + 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s', + 'only_matching': True, }] def _real_extract(self, url): From b1a7bf44b95dee2803e8638b13c4467a253b5b8f Mon Sep 17 00:00:00 2001 From: remis <r-pankevicius@users.noreply.github.com> Date: Fri, 6 Oct 2017 19:59:09 +0300 Subject: [PATCH 12/97] [lnkgo] Relax _VALID_URL --- youtube_dl/extractor/lnkgo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index 068378c9c..cfec0d3d0 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -11,7 +11,7 @@ from ..utils import ( class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)' _TESTS = [{ 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', 'info_dict': { @@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, + }, { + 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', + 'only_matching': True, }] _AGE_LIMITS = { 'N-7': 7, From e95284754192ea4b5b4e32f1a2274e5767d980b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 00:57:09 +0700 Subject: [PATCH 13/97] [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 46fa26f02..ba4ca7553 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,6 +9,7 @@ ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) From 665f42d8c14626404372b349624632b8d39f3c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 01:40:00 +0700 Subject: [PATCH 14/97] [reddit] Sort formats (closes #14430) --- youtube_dl/extractor/reddit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py index 01c85ee01..4d44b9d74 100644 --- a/youtube_dl/extractor/reddit.py +++ b/youtube_dl/extractor/reddit.py @@ -35,6 +35,8 @@ class RedditIE(InfoExtractor): 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + return { 'id': video_id, 'title': video_id, From 3fc8f5b7c239ddd366012d2f0da754fcfe247297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:01:38 +0700 Subject: [PATCH 15/97] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 80299d522..760d09047 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version <unreleased> + +Core +* [YoutubeDL] Ignore duplicates in --playlist-items +* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and + reduce code duplication (#14425) ++ [utils] Use cache in OnDemandPagedList by default +* [postprocessor/ffmpeg] Convert to opus using libopus (#14381) + +Extractors +* [reddit] Sort formats (#14430) +* [lnkgo] Relax URL regular expression (#14423) +* [pornflip] Extend URL regular expression (#14405, #14406) ++ [xtube] Add support for embed URLs (#14417) ++ [xvideos] Add support for embed URLs and improve extraction (#14409) +* [beeg] Fix extraction (#14403) +* [tvn24] Relax URL regular expression (#14395) +* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378, + #14392, #14414, #14419, #14431) ++ [ketnet] Add support for videos without direct sources (#14377) +* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een ++ [afreecatv] Add support for adult videos (#14376) + + version 2017.10.01 Core From 8e751a185c53a81cd35900010118894fd8201b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 05:02:53 +0700 Subject: [PATCH 16/97] release 2017.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 3be306203..9fc425ca8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.07*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.01 +[debug] youtube-dl version 2017.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 760d09047..2feb3dc2d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.10.07 Core * [YoutubeDL] Ignore duplicates in --playlist-items diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d36a07cf6..be9df7e31 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -130,7 +130,8 @@ - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - - **Canvas**: canvas.be and een.be + - **Canvas** + - **CanvasEen**: canvas.be and een.be - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 49fa02d43..705fcade6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.01' +__version__ = '2017.10.07' From 8b561bfc9d15bf487be0e0bb5bb1d7248fd321d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Oct 2017 21:59:04 +0700 Subject: [PATCH 17/97] [youtube] Add support for hooktube.com (closes #14437) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ad2e933ee..edd8713b7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -332,6 +332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| (?:www\.)?deturl\.com/www\.youtube\.com/| (?:www\.)?pwnyoutube\.com/| + (?:www\.)?hooktube\.com/| (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains From a22ccac1f02b5957f1f083a54e1f83d22a7e6f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 01:34:17 +0700 Subject: [PATCH 18/97] [fox] Delegate to uplynk:preplay (#14147) --- youtube_dl/extractor/fox.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index facc665f6..5f98d017b 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE +from .uplynk import UplynkPreplayIE +from ..compat import compat_str from ..utils import ( + HEADRequest, int_or_none, parse_age_limit, parse_duration, @@ -53,14 +56,7 @@ class FOXIE(AdobePassIE): }) title = video['name'] - - m3u8_url = self._download_json( - video['videoRelease']['url'], video_id)['playURL'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) + release_url = video['videoRelease']['url'] description = video.get('description') duration = int_or_none(video.get('durationInSeconds')) or int_or_none( @@ -84,7 +80,7 @@ class FOXIE(AdobePassIE): # TODO: AP pass - return { + info = { 'id': video_id, 'title': title, 'description': description, @@ -97,5 +93,22 @@ class FOXIE(AdobePassIE): 'episode': episode, 'episode_number': episode_number, 'release_year': release_year, - 'formats': formats, } + + urlh = self._request_webpage(HEADRequest(release_url), video_id) + video_url = compat_str(urlh.geturl()) + + if UplynkPreplayIE.suitable(video_url): + info.update({ + '_type': 'url_transparent', + 'url': video_url, + 'ie_key': UplynkPreplayIE.ie_key(), + }) + else: + m3u8_url = self._download_json(release_url, video_id)['playURL'] + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + info['formats'] = formats + return info From b0dde6686c7110c9c2515a808d803239a81e6505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksandar=20Topuzovi=C4=87?= <aleksandar.topuzovic@gmail.com> Date: Sat, 7 Oct 2017 23:40:08 +0100 Subject: [PATCH 19/97] [hrti] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 656ce6d05..4f0369433 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE): (?: hrti:(?P<short_id>[0-9]+)| https?:// - hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? + hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)? ) ''' _TESTS = [{ @@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE): }, { 'url': 'hrti:2181385', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14', + 'only_matching': True, }] def _real_extract(self, url): From 89923316210f8e17bb1a085278940e1c56fcff48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Oct 2017 21:36:50 +0700 Subject: [PATCH 20/97] [wdr] Relax media link regex (closes #14447) --- youtube_dl/extractor/wdr.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 8bb7362bb..621de1e1e 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor): # for wdrmaus, in a tag with the class "videoButton" (previously a link # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', - webpage, 'media link', default=None, flags=re.MULTILINE) + r'''(?sx)class= + (?: + (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+| + (["\'])videoLink\b.*?\2[\s]*>\n[^\n]* + )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3 + ''', + webpage, 'media link', default=None, group='data') if not json_metadata: return From 197224b7a4e37a6581bf1a0da18d0f67ea61a476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Oct 2017 23:50:53 +0700 Subject: [PATCH 21/97] Fix some regexes --- youtube_dl/extractor/aenetworks.py | 2 +- youtube_dl/extractor/appletrailers.py | 4 ++-- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/deezer.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/hrti.py | 2 +- youtube_dl/extractor/ign.py | 2 +- youtube_dl/extractor/infoq.py | 6 +++--- youtube_dl/extractor/jeuxvideo.py | 2 +- youtube_dl/extractor/livestream.py | 2 +- youtube_dl/extractor/makertv.py | 2 +- youtube_dl/extractor/mangomolo.py | 2 +- youtube_dl/extractor/meipai.py | 2 +- youtube_dl/extractor/mtv.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/npo.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/stanfordoc.py | 4 ++-- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/thisav.py | 4 ++-- youtube_dl/extractor/twitter.py | 2 +- youtube_dl/extractor/vice.py | 2 +- youtube_dl/extractor/videopremium.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 30 files changed, 35 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 2dcdba9d2..da1b566c2 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE): r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], webpage, 'video url', group='url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) + r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b45b431e1..a9ef733e0 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor): continue formats.append({ 'format_id': '%s-%s' % (version, size), - 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), + 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), 'language': version[:2], @@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor): formats = [] for format in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) formats.append({ 'url': format_url, 'format': format['type'], diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 3f248b147..915f8862e 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', - r'<meta name="dcterms.title" content="(.*?)"/>', + r'<meta name="dcterms\.title" content="(.*?)"/>', r'<h4 class="headline">(.*?)</h4>'], webpage, 'title') description = self._html_search_meta( diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8b20c03d6..5525f7c9b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): m3u8_id=format_id, fatal=False)) if re.search(self._USP_RE, href): usp_formats = self._extract_m3u8_formats( - re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), + re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for f in usp_formats: diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index e9d0dd19c..21a2d0239 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)', + r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index ec87b94db..a38b2683d 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): 'id': '176747451', 'title': 'Best!', 'uploader': 'Anonymous', - 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, 'skip': 'Only available in .de', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 0a70ca763..7fa271b51 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') info = json.loads(info_json) return { diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1721a3dbd..68b633839 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2206,7 +2206,7 @@ class GenericIE(InfoExtractor): # And then there are the jokers who advertise that they use RTA, # but actually don't. AGE_LIMIT_MARKERS = [ - r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>', + r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', ] if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS): age_limit = 18 diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 427499b11..6b927bb44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor): 'width': int(width), 'height': int(height), } for width, height, video_url in re.findall( - r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)] + r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 4f0369433..7cef5f6ce 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c1367cf51..a96ea8010 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -203,7 +203,7 @@ class PCMagIE(IGNIE): _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)' IE_NAME = 'pcmag' - _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]' + _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]' _TESTS = [{ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index fe425e786..57c9b0cc4 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_cookies(self, webpage): - policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') - signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') - key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') + signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') + key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( policy, signature, key_pair_id) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 1a4227f6b..e9f4ed738 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor): webpage = self._download_webpage(url, title) title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) config_url = self._html_search_regex( - r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"', + r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', webpage, 'config URL') config_url = 'http://www.jeuxvideo.com' + config_url diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 7f946c6ed..317ebbc4e 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor): info = { 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), + 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), } video_data = self._download_json(stream_url, content_id) is_live = video_data.get('isLive') diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py index 3c34d4604..8eda69cfc 100644 --- a/youtube_dl/extractor/makertv.py +++ b/youtube_dl/extractor/makertv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' + _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})' _TEST = { 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 1885ac7df..dbd761a67 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor): format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r'<a[^>]+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index c8eacb4f4..2445b8b39 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -11,7 +11,7 @@ from ..utils import ( class MeipaiIE(InfoExtractor): IE_DESC = '美拍' - _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)' _TESTS = [{ # regular uploaded video 'url': 'http://www.meipai.com/media/531697625', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 25af5ddfd..1154a3536 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], + [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], webpage, 'mgid', default=None) if not mgid: diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 6bb64eb63..367e811db 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): else: video_playpath = '' - video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') + video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj') video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index b91d86528..9e8d28f48 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): release_url = self._search_regex( r'video_auth_playlist_url\s*=\s*"([^"]+)"', webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path') + theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') video_id = theplatform_path.split('/')[-1] query = { 'mbr': 'true', diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index e8131333f..2047d4402 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -43,7 +43,7 @@ class NaverIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', + m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: error = self._html_search_regex( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fa4ef20c5..b8fe24407 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): IE_NAME = 'hetklokhuis' - _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 2b830cf47..3c8053a26 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor): video_url = self._html_search_regex( r'<param name="src" value="([^"]+)"', webpage, 'video url') title = self._html_search_regex( - r'<title>([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', + r'([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', webpage, 'title') description = self._html_search_regex( r'(?s)
(.+?)', diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index cce65fb10..ae3dd1380 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): r'(?s)([^<]+)', coursepage, 'description', fatal=False) - links = orderedSet(re.findall(r'', coursepage)) + links = orderedSet(re.findall(r'', coursepage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): rootpage = self._download_webpage(rootURL, info['id'], errnote='Unable to download course info page') - links = orderedSet(re.findall(r'', rootpage)) + links = orderedSet(re.findall(r'', rootpage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index de236bbba..b1a985ff6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): def hex_to_bytes(hex): return binascii.a2b_hex(hex.encode('ascii')) - relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) + relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 33683b139..dc3dd03c8 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor): info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) uploader = self._html_search_regex( - r': ([^<]+)', + r': ([^<]+)', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( - r': (?:[^<]+)', + r': (?:[^<]+)', webpage, 'uploader id', fatal=False) info_dict.update({ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 0df3ad7c7..1b0b96371 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE): webpage = self._download_webpage(url, video_id) iframe_url = self._html_search_regex( - r']+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', + r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', webpage, 'video iframe', default=None) if iframe_url: return self.url_result(iframe_url) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b8b8bf979..bcc28693a 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor): class ViceArticleIE(InfoExtractor): IE_NAME = 'vice:article' - _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py index 5de8273c3..cf690d7b0 100644 --- a/youtube_dl/extractor/videopremium.py +++ b/youtube_dl/extractor/videopremium.py @@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor): webpage_url = 'http://videopremium.tv/' + video_id webpage = self._download_webpage(webpage_url, video_id) - if re.match(r'^]*>window.location\s*=', webpage): + if re.match(r'^]*>window\.location\s*=', webpage): # Download again, we need a cookie webpage = self._download_webpage( webpage_url, video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index edd8713b7..54f5d7279 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1683,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_uploader_id = None video_uploader_url = None mobj = re.search( - r'', + r'', video_webpage) if mobj is not None: video_uploader_id = mobj.group('uploader_id') From ae5af89079558e0e128dc4d7f033459e68ad81e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Oct 2017 23:52:39 +0700 Subject: [PATCH 22/97] [hrti:playlist] Relax _VALID_URL --- youtube_dl/extractor/hrti.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 7cef5f6ce..6424d34ac 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P[0-9]+)/(?P[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P[0-9]+)/(?P[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { @@ -185,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE): }, { 'url': 'https://hrti.hrt.hr/#/video/list/category/212/', 'only_matching': True, + }, { + 'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena', + 'only_matching': True, }] def _real_extract(self, url): From 9e71f88105e546573b8615d49f9d0c064496d6e6 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Mon, 9 Oct 2017 22:48:26 +0200 Subject: [PATCH 23/97] [vvvvid] Fix typo --- youtube_dl/extractor/vvvvid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index d44ec85fd..656a4b9e5 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -133,7 +133,7 @@ class VVVVIDIE(InfoExtractor): 'season_id': season_id, 'season_number': video_data.get('season_number'), 'episode_id': str_or_none(video_data.get('id')), - 'epidode_number': int_or_none(video_data.get('number')), + 'episode_number': int_or_none(video_data.get('number')), 'episode_title': video_data['title'], 'view_count': int_or_none(video_data.get('views')), 'like_count': int_or_none(video_data.get('video_likes')), From 01c742ecd09be734ca4f3db08aba73424683ac1b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 10 Oct 2017 23:20:38 +0800 Subject: [PATCH 24/97] [facebook] Support thumbnails (closes #14416) --- ChangeLog | 6 ++++++ youtube_dl/extractor/facebook.py | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2feb3dc2d..2684cbcdd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [facebook] Support thumbnails (#14416) + + version 2017.10.07 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4b3f6cc86..220ada3a6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor): 'uploader': 'Tennis on Facebook', 'upload_date': '20140908', 'timestamp': 1410199200, - } + }, + 'skip': 'Requires logging in', }, { - 'note': 'Video without discernible title', 'url': 'https://www.facebook.com/video.php?v=274175099429670', 'info_dict': { 'id': '274175099429670', @@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, + 'thumbnail': r're:^https?://.*', }, 'expected_warnings': [ 'title' @@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor): 'upload_date': '20160110', 'timestamp': 1452431627, }, + 'skip': 'Requires logging in', }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', @@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10153664894881749', 'ext': 'mp4', - 'title': 'Facebook video #10153664894881749', + 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', + 'thumbnail': r're:^https?://.*', + 'timestamp': 1456259628, + 'upload_date': '20160223', + 'uploader': 'Barack Obama', }, }, { # have 1080P, but only up to 720p in swf params @@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', - 'title': 'Holocaust survivor becomes US citizen', + 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...', 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', + 'thumbnail': r're:^https?://.*', }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall @@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', + 'thumbnail': r're:^https?://.*', }, 'params': { 'skip_download': True, @@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor): timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) + thumbnail = self._og_search_thumbnail(webpage) info_dict = { 'id': video_id, @@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'uploader': uploader, 'timestamp': timestamp, + 'thumbnail': thumbnail, } return webpage, info_dict From d0f2d6411406a35c9593bbb85375c2d7f8300c77 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 10 Oct 2017 18:45:10 +0200 Subject: [PATCH 25/97] [slideslive] Add extractor (closes #2680) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/slideslive.py | 34 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/slideslive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24e9acda6..d0f71aecd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -943,6 +943,7 @@ from .skynewsarabia import ( ) from .skysports import SkySportsIE from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .smotri import ( SmotriIE, diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py new file mode 100644 index 000000000..104576033 --- /dev/null +++ b/youtube_dl/extractor/slideslive.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SlidesLiveIE(InfoExtractor): + _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', + 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', + 'info_dict': { + 'id': 'LMtgR8ba0b0', + 'ext': 'mp4', + 'title': '38902413: external video', + 'description': '3890241320170925-9-1yd6ech.mp4', + 'uploader': 'SlidesLive Administrator', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20170925', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + url, video_id, headers={'Accept': 'application/json'}) + service_name = video_data['video_service_name'] + if service_name == 'YOUTUBE': + yt_video_id = video_data['video_service_id'] + return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + else: + raise ExtractorError( + 'Unsupported service name: {0}'.format(service_name), expected=True) From 04af3aca049588b6b3d4d4b57ee47224fdeee90f Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Thu, 5 Oct 2017 21:37:18 +0700 Subject: [PATCH 26/97] Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/youtube.py | 33 ------------------------------ 2 files changed, 34 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0f71aecd..d96eafbc3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1346,7 +1346,6 @@ from .youtube import ( YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, - YoutubeSharedVideoIE, YoutubeShowIE, YoutubeSubscriptionsIE, YoutubeTruncatedIDIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 54f5d7279..6e2d57d6a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2040,39 +2040,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' - IE_NAME = 'youtube:shared' - - _TEST = { - 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', - 'info_dict': { - 'id': 'uPDB5I9wfp8', - 'ext': 'webm', - 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', - 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', - 'upload_date': '20160219', - 'uploader': 'Pocoyo - Português (BR)', - 'uploader_id': 'PocoyoBrazil', - }, - 'add_ie': ['Youtube'], - 'params': { - # There are already too many Youtube downloads - 'skip_download': True, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - real_video_id = self._html_search_meta( - 'videoId', webpage, 'YouTube video id', fatal=True) - - return self.url_result(real_video_id, YoutubeIE.ie_key()) - - class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: From dfc80bdd2e4ef3d30f161a93f99f3050537944ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 11 Oct 2017 02:03:00 +0800 Subject: [PATCH 27/97] [ChangeLog] Update after #14420 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 2684cbcdd..c541c4f62 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +- [youtube:shared] Removed extractor (#14420) + [facebook] Support thumbnails (#14416) From cdab1df91242fb617b09a31c023822ef31ea37b8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 10:04:46 +0000 Subject: [PATCH 28/97] [afreecatv] remove AfreecaTVGlobalIE the website now show this message > Global AfreecaTV will be merged and integrated on July 20th, 2017. Every user around the world are now able to interact with one another on www.afreecatv.com! --- youtube_dl/extractor/afreecatv.py | 104 ----------------------------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 1 insertion(+), 108 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 2c58f4617..e6513c7a4 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -271,107 +271,3 @@ class AfreecaTVIE(InfoExtractor): }) return info - - -class AfreecaTVGlobalIE(AfreecaTVIE): - IE_NAME = 'afreecatv:global' - _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P\d+)(?:/v/(?P\d+))?' - _TESTS = [{ - 'url': 'http://afreeca.tv/36853014/v/58301', - 'info_dict': { - 'id': '58301', - 'title': 'tryhard top100', - 'uploader_id': '36853014', - 'uploader': 'makgi Hearthstone Live!', - }, - 'playlist_count': 3, - }] - - def _real_extract(self, url): - channel_id, video_id = re.match(self._VALID_URL, url).groups() - video_type = 'video' if video_id else 'live' - query = { - 'pt': 'view', - 'bid': channel_id, - } - if video_id: - query['vno'] = video_id - video_data = self._download_json( - 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), - video_id or channel_id, query=query)['channel'] - - if video_data.get('result') != 1: - raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) - - title = video_data['title'] - - info = { - 'thumbnail': video_data.get('thumb'), - 'view_count': int_or_none(video_data.get('vcnt')), - 'age_limit': int_or_none(video_data.get('grade')), - 'uploader_id': channel_id, - 'uploader': video_data.get('cname'), - } - - if video_id: - entries = [] - for i, f in enumerate(video_data.get('flist', [])): - video_key = self.parse_video_key(f.get('key', '')) - f_url = f.get('file') - if not video_key or not f_url: - continue - entries.append({ - 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), - 'title': title, - 'upload_date': video_key.get('upload_date'), - 'duration': int_or_none(f.get('length')), - 'url': f_url, - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - - info.update({ - 'id': video_id, - 'title': title, - 'duration': int_or_none(video_data.get('length')), - }) - if len(entries) > 1: - info['_type'] = 'multi_video' - info['entries'] = entries - elif len(entries) == 1: - i = entries[0].copy() - i.update(info) - info = i - else: - formats = [] - for s in video_data.get('strm', []): - s_url = s.get('purl') - if not s_url: - continue - stype = s.get('stype') - if stype == 'HLS': - formats.extend(self._extract_m3u8_formats( - s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False)) - elif stype == 'RTMP': - format_id = [stype] - label = s.get('label') - if label: - format_id.append(label) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': s_url, - 'tbr': int_or_none(s.get('bps')), - 'height': int_or_none(s.get('brt')), - 'ext': 'flv', - 'rtmp_live': True, - }) - self._sort_formats(formats) - - info.update({ - 'id': channel_id, - 'title': self._live_title(title), - 'is_live': True, - 'formats': formats, - }) - - return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d96eafbc3..a363d95bf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,10 +31,7 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import ( - AfreecaTVIE, - AfreecaTVGlobalIE, -) +from .afreecatv import AfreecaTVIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From 4fe4bda287f4b506850f0baa6ff86445423751e7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 11:36:05 +0000 Subject: [PATCH 29/97] [tubitv] add support for new url format(fixes #14460) --- youtube_dl/extractor/tubitv.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index c44018aec..36f6c1673 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -13,11 +13,11 @@ from ..utils import ( class TubiTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P[0-9]+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' _GEO_COUNTRIES = ['US'] - _TEST = { + _TESTS = [{ 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', 'info_dict': { @@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor): 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, - } + }, { + 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories', + 'only_matching': True, + }, { + 'url': 'http://tubitv.com/movies/383676/tracker', + 'only_matching': True, + }] def _login(self): (username, password) = self._get_login_info() From 5fe75f976f6c8da76bfea68e073e5f35c4300442 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 14:14:51 +0000 Subject: [PATCH 30/97] [tva] fix extraction(fixes #14328) --- youtube_dl/extractor/tva.py | 48 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index 3ced098f9..b57abeaa4 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -3,52 +3,50 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, - parse_iso8601, + float_or_none, smuggle_url, ) class TVAIE(InfoExtractor): - _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P\d+)' _TEST = { - 'url': 'http://videos.tva.ca/episode/85538', + 'url': 'https://videos.tva.ca/details/_5596811470001', 'info_dict': { - 'id': '85538', + 'id': '5596811470001', 'ext': 'mp4', - 'title': 'Épisode du 25 janvier 2017', - 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', - 'upload_date': '20170126', - 'timestamp': 1485442329, + 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !', + 'uploader_id': '5481942443001', + 'upload_date': '20171003', + 'timestamp': 1507064617, }, 'params': { # m3u8 download 'skip_download': True, } } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s' def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, - video_id, query={ - '$expand': 'Metadata,CustomId', - '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', - '$format': 'json', + 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ + 'Accept': 'application/json', }) - metadata = video_data.get('Metadata', {}) + + def get_attribute(key): + for attribute in video_data.get('attributes', []): + if attribute.get('key') == key: + return attribute.get('value') + return None return { '_type': 'url_transparent', 'id': video_id, - 'title': video_data['Title'], - 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), - 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), - 'series': video_data.get('ShowName'), - 'episode': metadata.get('EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('EpisodeNumber')), - 'categories': video_data.get('Categories'), - 'average_rating': video_data.get('AverageUserRating'), - 'timestamp': parse_iso8601(video_data.get('CreatedDate')), - 'ie_key': 'Ooyala', + 'title': get_attribute('title'), + 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), + 'description': get_attribute('description'), + 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'), + 'duration': float_or_none(get_attribute('video-duration'), 1000), + 'ie_key': 'BrightcoveNew', } From 26bae2d96509b5c5ec80c27c1ea754b53c53818c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 21:59:30 +0700 Subject: [PATCH 31/97] [generic] Add support for channel9 embeds (closes #14469) --- youtube_dl/extractor/channel9.py | 6 ++++++ youtube_dl/extractor/generic.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index e92894246..81108e704 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor): _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', + webpage) + def _extract_list(self, video_id, rss_url=None): if not rss_url: rss_url = self._RSS_URL % video_id diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 68b633839..6dab4c7f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -101,6 +101,7 @@ from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE +from .channel9 import Channel9IE class GenericIE(InfoExtractor): @@ -2871,6 +2872,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + channel9_urls = Channel9IE._extract_urls(webpage) + if channel9_urls: + return self.playlist_from_matches( + channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): From 782195a9d417aab21cff4abe35ad9d705f4d8d83 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:48:32 +0000 Subject: [PATCH 32/97] [once] add support for vmap urls --- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/once.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 00d311158..02804d297 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -105,7 +105,7 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 1bf96ea56..a637c8ecf 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class OnceIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' + _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' From 9e38dbb19ca6874c7350647ea2883d5bbb3b50a1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 11 Oct 2017 15:50:00 +0000 Subject: [PATCH 33/97] [voxmedia] add support for recode.net(fixes #14173) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/voxmedia.py | 66 ++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a363d95bf..5629c7623 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1244,7 +1244,10 @@ from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voot import VootIE -from .voxmedia import VoxMediaIE +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index f8e331493..c7a0a88fe 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -2,11 +2,44 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .once import OnceIE from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError + + +class VoxMediaVolumeIE(OnceIE): + _VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P[0-9a-f]{9})' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._search_regex( + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id) + for provider_video_type in ('ooyala', 'youtube', 'brightcove'): + provider_video_id = video_data.get('%s_id' % provider_video_type) + if not provider_video_id: + continue + info = { + 'id': video_id, + 'title': video_data.get('title_short'), + 'description': video_data.get('description_long') or video_data.get('description_short'), + 'thumbnail': video_data.get('brightcove_thumbnail') + } + if provider_video_type == 'brightcove': + info['formats'] = self._extract_once_formats(provider_video_id) + self._sort_formats(info['formats']) + else: + info.update({ + '_type': 'url_transparent', + 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'ie_key': provider_video_type.capitalize(), + }) + return info + raise ExtractorError('Unable to find provider video id') class VoxMediaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', 'info_dict': { @@ -31,6 +64,7 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', }, 'add_ie': ['Ooyala'], + 'skip': 'Video Not Found', }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -84,6 +118,17 @@ class VoxMediaIE(InfoExtractor): 'description': 'md5:e02d56b026d51aa32c010676765a690d', }, }], + }, { + # volume embed, Brightcove Once + 'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya', + 'md5': '01571a896281f77dc06e084138987ea2', + 'info_dict': { + 'id': '1231c973d', + 'ext': 'mp4', + 'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella', + 'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.', + }, + 'add_ie': ['VoxMediaVolume'], }] def _real_extract(self, url): @@ -91,9 +136,14 @@ class VoxMediaIE(InfoExtractor): webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) def create_entry(provider_video_id, provider_video_type, title=None, description=None): + video_url = { + 'youtube': '%s', + 'ooyala': 'ooyala:%s', + 'volume': 'http://volume.vox-cdn.com/embed/%s', + }[provider_video_type] % provider_video_id return { '_type': 'url_transparent', - 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'url': video_url, 'title': title or self._og_search_title(webpage), 'description': description or self._og_search_description(webpage), } @@ -124,17 +174,7 @@ class VoxMediaIE(InfoExtractor): volume_uuid = self._search_regex( r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) if volume_uuid: - volume_webpage = self._download_webpage( - 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) - for provider_video_type in ('ooyala', 'youtube'): - provider_video_id = video_data.get('%s_id' % provider_video_type) - if provider_video_id: - description = video_data.get('description_long') or video_data.get('description_short') - entries.append(create_entry( - provider_video_id, provider_video_type, video_data.get('title_short'), description)) - break + entries.append(create_entry(volume_uuid, 'volume')) if len(entries) == 1: return entries[0] From af0f74288dc1b46147bc8f6b5692d2a21c6e178b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Oct 2017 23:45:03 +0700 Subject: [PATCH 34/97] [YoutubeDL] Improve _default_format_spec (closes #14461) --- test/test_YoutubeDL.py | 10 ++++++++-- youtube_dl/YoutubeDL.py | 31 ++++++++++++++++++------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index db936bfb8..4af92fbd4 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True, 'is_live': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + ydl = YDL({'outtmpl': '-'}) - self.assertEqual(ydl._default_format_spec({}), 'best') + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') - self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') class TestYoutubeDL(unittest.TestCase): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 855d6b8e5..342d6b47c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1078,22 +1078,27 @@ class YoutubeDL(object): return _filter def _default_format_spec(self, info_dict, download=True): - req_format_list = [] - def can_have_partial_formats(): - if self.params.get('simulate', False): - return True - if not download: - return True - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': - return False - if info_dict.get('is_live'): - return False + def can_merge(): merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() - if can_have_partial_formats(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') + + def prefer_best(): + if self.params.get('simulate', False): + return False + if not download: + return False + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return True + if info_dict.get('is_live'): + return True + if not can_merge(): + return True + return False + + req_format_list = ['bestvideo+bestaudio', 'best'] + if prefer_best(): + req_format_list.reverse() return '/'.join(req_format_list) def build_format_selector(self, format_spec): From 694b61545cc3fa37c31d5f62d26101fb2620a01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:41:20 +0700 Subject: [PATCH 35/97] [nexx] Add support for shortcuts and relax domain id extraction --- youtube_dl/extractor/nexx.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index d0235fdfe..071879ba4 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -18,7 +18,13 @@ from ..utils import ( class NexxIE(InfoExtractor): - _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/(?P\d+)' + _VALID_URL = r'''(?x) + (?: + https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P\d+)/videos/byid/| + nexx:(?P\d+): + ) + (?P\d+) + ''' _TESTS = [{ # movie 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907', @@ -62,8 +68,18 @@ class NexxIE(InfoExtractor): }, { 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', 'only_matching': True, + }, { + 'url': 'nexx:748:128907', + 'only_matching': True, }] + @staticmethod + def _extract_domain_id(webpage): + mobj = re.search( + r']+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', + webpage) + return mobj.group('id') if mobj else None + @staticmethod def _extract_urls(webpage): # Reference: @@ -72,11 +88,8 @@ class NexxIE(InfoExtractor): entries = [] # JavaScript Integration - mobj = re.search( - r']+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P\d+)', - webpage) - if mobj: - domain_id = mobj.group('id') + domain_id = NexxIE._extract_domain_id(webpage) + if domain_id: for video_id in re.findall( r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)', webpage): @@ -112,7 +125,8 @@ class NexxIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - domain_id, video_id = mobj.group('domain_id', 'id') + domain_id = mobj.group('domain_id') or mobj.group('domain_id_s') + video_id = mobj.group('id') # Reverse engineered from JS code (see getDeviceID function) device_id = '%d:%d:%d%d' % ( From ff3f1a62f087332fa6409b5cbc39871d49e74f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Oct 2017 00:44:13 +0700 Subject: [PATCH 36/97] [funk] Add extractor (closes #14464) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/funk.py | 43 ++++++++++++++++++++++++++++++ youtube_dl/extractor/generic.py | 16 ----------- 3 files changed, 44 insertions(+), 16 deletions(-) create mode 100644 youtube_dl/extractor/funk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5629c7623..ecb33bc9e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -381,6 +381,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freshlive import FreshLiveIE from .funimation import FunimationIE +from .funk import FunkIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py new file mode 100644 index 000000000..ce5c67fbb --- /dev/null +++ b/youtube_dl/extractor/funk.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE +from ..utils import extract_attributes + + +class FunkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P[^?/#]+)' + _TESTS = [{ + 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/', + 'md5': '4d40974481fa3475f8bccfd20c5361f8', + 'info_dict': { + 'id': '716599', + 'ext': 'mp4', + 'title': 'Neue Rechte Welle', + 'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69', + 'timestamp': 1501337639, + 'upload_date': '20170729', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + domain_id = NexxIE._extract_domain_id(webpage) or '741' + nexx_id = extract_attributes(self._search_regex( + r'(]id=["\']mediaplayer-funk[^>]+>)', + webpage, 'media player'))['data-id'] + + return self.url_result( + 'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(), + video_id=nexx_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6dab4c7f4..39630b6f6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1613,22 +1613,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['BrightcoveLegacy'], }, - # Nexx embed - { - 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', - 'info_dict': { - 'id': '247746', - 'ext': 'mp4', - 'title': "Yesterday's Jam (OV)", - 'description': 'md5:09bc0984723fed34e2581624a84e05f0', - 'timestamp': 1492594816, - 'upload_date': '20170419', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, # Facebook