From 23495d6a39d357989bf507c0bbb7c022e7a9e2c7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 15 Jul 2016 19:53:10 +0800 Subject: [PATCH 01/26] Revert "[ffmpeg] Fix embedding subtitles (#9063)" This reverts commit ccff2c404d7ea9f5b21ede8ae57bb79feec7eb94. Fixes #10081. The new approach breaks embedding subtitles into video-only or audio-only files. FFMpeg provides a trick: add '?' after the argument of '-map' so that a missing stream is ignored. For example: opts = [ '-map', '0:v?', '-c:v', 'copy', '-map', '0:a?', '-c:a', 'copy', # other options... ] Unfortunately, such a format is not implemented in avconv, either. I guess adding '-ignore_unknown' if self.basename == 'ffmpeg' is the best solution. However, the example mentioned in #9063 no longer serves problematic files, so I can't test it. I'll reopen #9063 and wait for another example so that I can test '-ignore_unknown'. --- youtube_dl/postprocessor/ffmpeg.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index c1e9eb159..fa99b0c2a 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -363,10 +363,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): input_files = [filename] + sub_filenames opts = [ - '-map', '0:v', - '-c:v', 'copy', - '-map', '0:a', - '-c:a', 'copy', + '-map', '0', + '-c', 'copy', # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', From 317f7ab634174666e458807fa309a2e7ba459267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Jul 2016 00:55:43 +0700 Subject: [PATCH 02/26] [YoutubeDL] Fix format selection with filters (Closes #10083) --- test/test_YoutubeDL.py | 34 ++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 63 ++++++++++++++++++++++++++++++----------- 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ca25025e2..0dfe25c00 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) + def test_audio_only_extractor_format_selection(self): + # For extractors with incomplete formats (all formats are audio-only or + # video-only) best and worst should fallback to corresponding best/worst + # video-only or audio-only formats (as per + # https://github.com/rg3/youtube-dl/pull/5556) + formats = [ + {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'high') + + ydl = YDL({'format': 'worst'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'low') + + def test_format_not_available(self): + formats = [ + {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # This must fail since complete video-audio format does not match filter + # and extractor does not provide incomplete only formats (i.e. only + # video-only or audio-only). + ydl = YDL({'format': 'best[height>360]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + def test_invalid_format_specs(self): def assert_syntax_error(format_spec): ydl = YDL({'format': format_spec}) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ba72ec6f3..cf9cd8297 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals import collections import contextlib +import copy import datetime import errno import fileinput @@ -1051,9 +1052,9 @@ class YoutubeDL(object): if isinstance(selector, list): fs = [_build_selector_function(s) for s in selector] - def selector_function(formats): + def selector_function(ctx): for f in fs: - for format in f(formats): + for format in f(ctx): yield format return selector_function elif selector.type == GROUP: @@ -1061,17 +1062,17 @@ class YoutubeDL(object): elif selector.type == PICKFIRST: fs = [_build_selector_function(s) for s in selector.selector] - def selector_function(formats): + def selector_function(ctx): for f in fs: - picked_formats = list(f(formats)) + picked_formats = list(f(ctx)) if picked_formats: return picked_formats return [] elif selector.type == SINGLE: format_spec = selector.selector - def selector_function(formats): - formats = list(formats) + def selector_function(ctx): + formats = list(ctx['formats']) if not formats: return if format_spec == 'all': @@ -1084,9 +1085,10 @@ class YoutubeDL(object): if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: yield audiovideo_formats[format_idx] - # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format - elif (all(f.get('acodec') != 'none' for f in formats) or - all(f.get('vcodec') != 'none' for f in formats)): + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) we will fallback to best/worst + # {video,audio}-only format + elif ctx['incomplete_formats']: yield formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ @@ -1160,17 +1162,18 @@ class YoutubeDL(object): } video_selector, audio_selector = map(_build_selector_function, selector.selector) - def selector_function(formats): - formats = list(formats) - for pair in itertools.product(video_selector(formats), audio_selector(formats)): + def selector_function(ctx): + for pair in itertools.product( + video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): yield _merge(pair) filters = [self._build_format_filter(f) for f in selector.filters] - def final_selector(formats): + def final_selector(ctx): + ctx_copy = copy.deepcopy(ctx) for _filter in filters: - formats = list(filter(_filter, formats)) - return selector_function(formats) + ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) + return selector_function(ctx_copy) return final_selector stream = io.BytesIO(format_spec.encode('utf-8')) @@ -1377,7 +1380,35 @@ class YoutubeDL(object): req_format_list.append('best') req_format = '/'.join(req_format_list) format_selector = self.build_format_selector(req_format) - formats_to_download = list(format_selector(formats)) + + # While in format selection we may need to have an access to the original + # format set in order to calculate some metrics or do some processing. + # For now we need to be able to guess whether original formats provided + # by extractor are incomplete or not (i.e. whether extractor provides only + # video-only or audio-only formats) for proper formats selection for + # extractors with such incomplete formats (see + # https://github.com/rg3/youtube-dl/pull/5556). + # Since formats may be filtered during format selection and may not match + # the original formats the results may be incorrect. Thus original formats + # or pre-calculated metrics should be passed to format selection routines + # as well. + # We will pass a context object containing all necessary additional data + # instead of just formats. + # This fixes incorrect format selection issue (see + # https://github.com/rg3/youtube-dl/issues/10083). + incomplete_formats = all( + # All formats are video-only or + f.get('vcodec') != 'none' and f.get('acodec') == 'none' or + # all formats are audio-only + f.get('vcodec') == 'none' and f.get('acodec') != 'none' + for f in formats) + + ctx = { + 'formats': formats, + 'incomplete_formats': incomplete_formats, + } + + formats_to_download = list(format_selector(ctx)) if not formats_to_download: raise ExtractorError('requested format not available', expected=True) From 2e221ca3a85ec7a0c441dfcf301bf1c98614b9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Jul 2016 01:18:05 +0700 Subject: [PATCH 03/26] [YoutubeDL] Fix incomplete formats check --- youtube_dl/YoutubeDL.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index cf9cd8297..6551f086f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1396,12 +1396,11 @@ class YoutubeDL(object): # instead of just formats. # This fixes incorrect format selection issue (see # https://github.com/rg3/youtube-dl/issues/10083). - incomplete_formats = all( + incomplete_formats = ( # All formats are video-only or - f.get('vcodec') != 'none' and f.get('acodec') == 'none' or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or # all formats are audio-only - f.get('vcodec') == 'none' and f.get('acodec') != 'none' - for f in formats) + all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) ctx = { 'formats': formats, From 691fbe7f98dd648f3971f259b323e62c73d56e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Jul 2016 02:20:00 +0700 Subject: [PATCH 04/26] release 2016.07.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c5898701f..b1b8def3c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.13 +[debug] youtube-dl version 2016.07.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 282bd0e6b..cf194340a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -14,6 +14,7 @@ - **8tracks** - **91porn** - **9gag** + - **9now.com.au** - **abc.net.au** - **Abc7News** - **abcnews** @@ -567,6 +568,7 @@ - **rtve.es:alacarta**: RTVE a la carta - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams + - **rtve.es:television** - **RTVNH** - **Rudo** - **RUHD** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 56f9f5986..55d07d420 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.13' +__version__ = '2016.07.16' From 21ba7d0981e04237ae4ca8690a6ead331b3a82d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 16 Jul 2016 00:02:26 +0800 Subject: [PATCH 05/26] [cbc] Skip geo-restricted test case --- youtube_dl/extractor/cbc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index ff663d079..06772d492 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -25,6 +25,7 @@ class CBCIE(InfoExtractor): 'upload_date': '20160203', 'uploader': 'CBCC-NEW', }, + 'skip': 'Geo-restricted to Canada', }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', @@ -64,6 +65,7 @@ class CBCIE(InfoExtractor): 'uploader': 'CBCC-NEW', }, }], + 'skip': 'Geo-restricted to Canada', }] @classmethod @@ -104,6 +106,7 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20160210', 'uploader': 'CBCC-NEW', }, + 'skip': 'Geo-restricted to Canada', }, { # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 'url': 'http://www.cbc.ca/player/play/2657631896', From ae7b8462032ef86d803162067f0dbbbd03fa49e4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 16 Jul 2016 00:03:12 +0800 Subject: [PATCH 06/26] [cbsnews] Update _TESTS of CBSNewsLiveVideoIE --- youtube_dl/extractor/cbsnews.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 387537e76..9328e3e20 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -26,6 +26,7 @@ class CBSNewsIE(CBSBaseIE): # rtmp download 'skip_download': True, }, + 'skip': 'Subscribers only', }, { 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', @@ -69,7 +70,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): IE_DESC = 'CBS News Live Videos' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[\da-z_-]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', @@ -77,7 +78,15 @@ class CBSNewsLiveVideoIE(InfoExtractor): 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, - } + 'skip': 'Video gone, redirected to http://www.cbsnews.com/live/', + }, { + 'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/', + 'info_dict': { + 'id': 'video-shows-intense-paragliding-accident', + 'ext': 'flv', + 'title': 'Video Shows Intense Paragliding Accident', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From aadd3ce21fe4acacc5bf26c3703cc09d05e7b3f4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 16 Jul 2016 00:04:30 +0800 Subject: [PATCH 07/26] [cliphunter] Update _TESTS --- youtube_dl/extractor/cliphunter.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 19f8b397e..252c2e846 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor): (?P[0-9]+)/ (?P.+?)(?:$|[#\?]) ''' - _TEST = { + _TESTS = [{ 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', 'info_dict': { @@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor): 'title': 'Fun Jynx Maze solo', 'thumbnail': 're:^https?://.*\.jpg$', 'age_limit': 18, - } - } + }, + 'skip': 'Video gone', + }, { + 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz', + 'md5': '55a723c67bfc6da6b0cfa00d55da8a27', + 'info_dict': { + 'id': '2019449', + 'ext': 'mp4', + 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', + 'thumbnail': 're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From 998895dffac2170b7d49b0478561db05cc0730ca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 16 Jul 2016 01:21:20 +0800 Subject: [PATCH 08/26] [cloudy] Drop videoraj.to videoraj.ch is now a shoe-selling website, and videoraj.to domain name is gone. --- youtube_dl/extractor/cloudy.py | 48 ++++++++++++---------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 9a28ef354..ae5ba0015 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse_urlencode, compat_HTTPError, ) from ..utils import ( @@ -17,37 +16,26 @@ from ..utils import ( class CloudyIE(InfoExtractor): - _IE_DESC = 'cloudy.ec and videoraj.ch' + _IE_DESC = 'cloudy.ec' _VALID_URL = r'''(?x) - https?://(?:www\.)?(?Pcloudy\.ec|videoraj\.(?:ch|to))/ + https?://(?:www\.)?cloudy\.ec/ (?:v/|embed\.php\?id=) (?P[A-Za-z0-9]+) ''' - _EMBED_URL = 'http://www.%s/embed.php?id=%s' - _API_URL = 'http://www.%s/api/player.api.php?%s' + _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s' + _API_URL = 'http://www.cloudy.ec/api/player.api.php' _MAX_TRIES = 2 - _TESTS = [ - { - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'flv', - 'title': 'Funny Cats and Animals Compilation june 2013', - } - }, - { - 'url': 'http://www.videoraj.to/v/47f399fd8bb60', - 'md5': '7d0f8799d91efd4eda26587421c3c3b0', - 'info_dict': { - 'id': '47f399fd8bb60', - 'ext': 'flv', - 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', - } + _TEST = { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', } - ] + } - def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): + def _extract_video(self, video_id, file_key, error_url=None, try_num=0): if try_num > self._MAX_TRIES - 1: raise ExtractorError('Unable to extract video URL', expected=True) @@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor): 'errorUrl': error_url, }) - data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form)) player_data = self._download_webpage( - data_url, video_id, 'Downloading player data') + self._API_URL, video_id, 'Downloading player data', query=form) data = compat_parse_qs(player_data) try_num += 1 @@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: self.report_warning('Invalid video URL, requesting another', video_id) - return self._extract_video(video_host, video_id, file_key, video_url, try_num) + return self._extract_video(video_id, file_key, video_url, try_num) return { 'id': video_id, @@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_host = mobj.group('host') video_id = mobj.group('id') - url = self._EMBED_URL % (video_host, video_id) + url = self._EMBED_URL % video_id webpage = self._download_webpage(url, video_id) file_key = self._search_regex( [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], webpage, 'file_key') - return self._extract_video(video_host, video_id, file_key) + return self._extract_video(video_id, file_key) From 371ddb14fe651d4a1e5a8310d6d7c0e395cd92b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 Jul 2016 15:59:43 +0700 Subject: [PATCH 09/26] [extractor/generic] Change twitter:player embeds priority to lowest (Closes #10090) --- youtube_dl/extractor/generic.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cddd1a817..6d346cb1c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1249,6 +1249,20 @@ class GenericIE(InfoExtractor): 'uploader': 'www.hudl.com', }, }, + # twitter:player:stream embed + { + 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288', + 'info_dict': { + 'id': 'master', + 'ext': 'mp4', + 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine', + 'uploader': 'www.rtl.be', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + }, # twitter:player embed { 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/', @@ -2184,11 +2198,6 @@ class GenericIE(InfoExtractor): 'uploader': video_uploader, } - # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser - embed_url = self._html_search_meta('twitter:player', webpage, default=None) - if embed_url: - return self.url_result(embed_url) - # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default=None, expected_type='VideoObject') @@ -2245,6 +2254,9 @@ class GenericIE(InfoExtractor): r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) if not found: # Try to find twitter cards info + # twitter:player:stream should be checked before twitter:player since + # it is expected to contain a raw stream (see + # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) found = filter_video(re.findall( r' Date: Sun, 3 Jul 2016 00:39:35 +0200 Subject: [PATCH 10/26] [nintendo] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nintendo.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/nintendo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 45817d7df..2761f7095 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -538,6 +538,7 @@ from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE +from .nintendo import NintendoIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py new file mode 100644 index 000000000..57333ada0 --- /dev/null +++ b/youtube_dl/extractor/nintendo.py @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + +import re + + +class NintendoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P[\w-]+)' + _TESTS = [{ + 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', + 'info_dict': { + 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW', + 'ext': 'flv', + 'title': 'Duck Hunt Wii U VC NES - Trailer', + 'duration': 60.326, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + }, { + 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u', + 'info_dict': { + 'id': 'tokyo-mirage-sessions-fe-wii-u', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + 'playlist_count': 4, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + ooyala_codes = re.findall( + r'data-video-code=(["\'])(?P.+?)\1', + webpage) + + entries = [] + for ooyala_code in ooyala_codes: + entries.append(OoyalaIE._build_url_result(ooyala_code[1])) + + return self.playlist_result(entries, video_id, self._og_search_title(webpage)) From 49bc16b95ed7d418b353fda46ba845ac0eca648b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 00:01:25 +0700 Subject: [PATCH 11/26] [nintendo] Improve playlist extraction (Closes #9986) --- youtube_dl/extractor/nintendo.py | 35 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py index 57333ada0..4b4e66b05 100644 --- a/youtube_dl/extractor/nintendo.py +++ b/youtube_dl/extractor/nintendo.py @@ -1,13 +1,15 @@ +# coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from .ooyala import OoyalaIE - import re +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import unescapeHTML + class NintendoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', 'info_dict': { @@ -24,24 +26,21 @@ class NintendoIE(InfoExtractor): 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u', 'info_dict': { 'id': 'tokyo-mirage-sessions-fe-wii-u', + 'title': 'Tokyo Mirage Sessions ♯FE', }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - 'playlist_count': 4, + 'playlist_count': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + page_id = self._match_id(url) - ooyala_codes = re.findall( - r'data-video-code=(["\'])(?P.+?)\1', - webpage) + webpage = self._download_webpage(url, page_id) - entries = [] - for ooyala_code in ooyala_codes: - entries.append(OoyalaIE._build_url_result(ooyala_code[1])) + entries = [ + OoyalaIE._build_url_result(m.group('code')) + for m in re.finditer( + r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P(?:(?!\2).)+)\2', + webpage)] - return self.playlist_result(entries, video_id, self._og_search_title(webpage)) + return self.playlist_result( + entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) From 59cc5bd8bfb5ecce82e26daa1b8f830edd3eb8b7 Mon Sep 17 00:00:00 2001 From: Zach Bruggeman Date: Fri, 8 Apr 2016 13:50:09 -0700 Subject: [PATCH 12/26] [streamable] Add extractor --- youtube_dl/extractor/streamable.py | 76 ++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/streamable.py diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py new file mode 100644 index 000000000..5aa5f1ba5 --- /dev/null +++ b/youtube_dl/extractor/streamable.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none +) + + +class StreamableIE(InfoExtractor): + _VALID_URL = r'https?://streamable\.com/(?P[\w]+)' + _TESTS = [ + { + 'url': 'https://streamable.com/dnd1', + 'md5': '3e3bc5ca088b48c2d436529b64397fef', + 'info_dict': { + 'id': 'dnd1', + 'ext': 'mp4', + 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', + 'thumbnail': 'http://cdn.streamable.com/image/dnd1.jpg', + } + }, + # older video without bitrate, width/height, etc. info + { + 'url': 'https://streamable.com/moo', + 'md5': '2cf6923639b87fba3279ad0df3a64e73', + 'info_dict': { + 'id': 'moo', + 'ext': 'mp4', + 'title': '"Please don\'t eat me!"', + 'thumbnail': 'http://cdn.streamable.com/image/f6441ae0c84311e4af010bc47400a0a4.jpg', + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Note: Using the ajax API, as the public Streamable API doesn't seem + # to return video info like the title properly sometimes, and doesn't + # include info like the video duration + video = self._download_json( + 'https://streamable.com/ajax/videos/%s' % video_id, video_id) + + # Format IDs: + # 0 The video is being uploaded + # 1 The video is being processed + # 2 The video has at least one file ready + # 3 The video is unavailable due to an error + status = video.get('status') + if status != 2: + raise ExtractorError( + 'This video is currently unavailable. It may still be uploading or processing.', + expected=True) + + formats = [] + for key, info in video.get('files').items(): + formats.append({ + 'format_id': key, + 'url': info['url'], + 'width': info.get('width'), + 'height': info.get('height'), + 'filesize': info.get('size'), + 'fps': info.get('framerate'), + 'vbr': float_or_none(info.get('bitrate'), 1000) + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video.get('result_title'), + 'thumbnail': video.get('thumbnail_url'), + 'duration': video.get('duration'), + 'formats': formats + } From 1a8f0773b6b2550c2763f4522481df7695ad4b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 02:01:00 +0700 Subject: [PATCH 13/26] [streamable] Fix title extraction and improve (Closes #9122) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/streamable.py | 48 ++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2761f7095..10b2390bf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -783,6 +783,7 @@ from .srmediathek import SRMediathekIE from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE +from .streamable import StreamableIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 5aa5f1ba5..1c61437a4 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -4,12 +4,13 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, - float_or_none + float_or_none, + int_or_none, ) class StreamableIE(InfoExtractor): - _VALID_URL = r'https?://streamable\.com/(?P[\w]+)' + _VALID_URL = r'https?://streamable\.com/(?:e/)?(?P\w+)' _TESTS = [ { 'url': 'https://streamable.com/dnd1', @@ -18,7 +19,12 @@ class StreamableIE(InfoExtractor): 'id': 'dnd1', 'ext': 'mp4', 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', - 'thumbnail': 'http://cdn.streamable.com/image/dnd1.jpg', + 'thumbnail': 're:https?://.*\.jpg$', + 'uploader': 'teabaker', + 'timestamp': 1454964157.35115, + 'upload_date': '20160208', + 'duration': 61.516, + 'view_count': int, } }, # older video without bitrate, width/height, etc. info @@ -29,8 +35,16 @@ class StreamableIE(InfoExtractor): 'id': 'moo', 'ext': 'mp4', 'title': '"Please don\'t eat me!"', - 'thumbnail': 'http://cdn.streamable.com/image/f6441ae0c84311e4af010bc47400a0a4.jpg', + 'thumbnail': 're:https?://.*\.jpg$', + 'timestamp': 1426115495, + 'upload_date': '20150311', + 'duration': 12, + 'view_count': int, } + }, + { + 'url': 'https://streamable.com/e/dnd1', + 'only_matching': True, } ] @@ -54,23 +68,31 @@ class StreamableIE(InfoExtractor): 'This video is currently unavailable. It may still be uploading or processing.', expected=True) + title = video.get('reddit_title') or video['title'] + formats = [] - for key, info in video.get('files').items(): + for key, info in video['files'].items(): + if not info.get('url'): + continue formats.append({ 'format_id': key, - 'url': info['url'], - 'width': info.get('width'), - 'height': info.get('height'), - 'filesize': info.get('size'), - 'fps': info.get('framerate'), + 'url': self._proto_relative_url(info['url']), + 'width': int_or_none(info.get('width')), + 'height': int_or_none(info.get('height')), + 'filesize': int_or_none(info.get('size')), + 'fps': int_or_none(info.get('framerate')), 'vbr': float_or_none(info.get('bitrate'), 1000) }) self._sort_formats(formats) return { 'id': video_id, - 'title': video.get('result_title'), - 'thumbnail': video.get('thumbnail_url'), - 'duration': video.get('duration'), + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')), + 'uploader': video.get('owner', {}).get('user_name'), + 'timestamp': float_or_none(video.get('date_added')), + 'duration': float_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('plays')), 'formats': formats } From af21f56f980e22086ac734cf266141c7a9ff21ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 03:40:58 +0700 Subject: [PATCH 14/26] [ard] Add support for rbb-online (Closes #10095) --- youtube_dl/extractor/ard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 13a06396d..91c78887a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -20,7 +20,7 @@ from ..compat import compat_etree_fromstring class ARDMediathekIE(InfoExtractor): IE_NAME = 'ARD:mediathek' - _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' + _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114', From 7cdfc4c90fd4f913a96e4493a49af50189b26480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 16:56:39 +0700 Subject: [PATCH 15/26] [mtvservices] Strip description --- youtube_dl/extractor/mtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index dd0639589..f3ec2ebbc 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -15,6 +15,7 @@ from ..utils import ( float_or_none, HEADRequest, sanitized_Request, + strip_or_none, unescapeHTML, url_basename, RegexNotFoundError, @@ -133,7 +134,7 @@ class MTVServicesInfoExtractor(InfoExtractor): message += item.text raise ExtractorError(message, expected=True) - description = xpath_text(itemdoc, 'description') + description = strip_or_none(xpath_text(itemdoc, 'description')) title_el = None if title_el is None: From 45550d10395f1e7fe59c4c3ff8ff8d909074a8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 16:58:58 +0700 Subject: [PATCH 16/26] [comedycentraltv] Add extractor (Closes #10101) --- youtube_dl/extractor/comedycentral.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 2b6aaa3aa..63f68f765 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -273,3 +273,36 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): 'title': show_name + ' ' + title, 'description': description, } + + +class ComedyCentralTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4', + 'info_dict': { + 'id': 'local_playlist-f99b626bdfe13568579a', + 'ext': 'flv', + 'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://www.comedycentral.tv/shows/1074-workaholics', + 'only_matching': True, + }, { + 'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mrss_url = self._search_regex( + r'data-mrss=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'mrss url', group='url') + + return self._get_videos_info_from_url(mrss_url, video_id) From b1ea6802703f6886b3be2a9975f598fa49fc6d3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 17:29:36 +0700 Subject: [PATCH 17/26] Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests" This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a. --- youtube_dl/extractor/bbc.py | 39 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 23c6e505b..4b3cd8c65 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -55,11 +55,12 @@ class BBCCoUkIE(InfoExtractor): 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'info_dict': { 'id': 'b039d07m', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { + # rtmp download 'skip_download': True, } }, @@ -91,7 +92,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', @@ -106,7 +107,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', }, { 'url': 'http://www.bbc.co.uk/programmes/b04v20dw', 'info_dict': { @@ -126,12 +127,13 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Audio', 'info_dict': { 'id': 'p022h44j', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", 'duration': 227, }, 'params': { + # rtmp download 'skip_download': True, } }, { @@ -139,12 +141,13 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Video', 'info_dict': { 'id': 'p025c103', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', 'duration': 226, }, 'params': { + # rtmp download 'skip_download': True, } }, { @@ -160,7 +163,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'geolocation', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', 'info_dict': { @@ -174,7 +177,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available', + 'skip': 'geolocation', }, { # iptv-all mediaset fails with geolocation however there is no geo restriction # for this programme at all @@ -189,17 +192,18 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'this episode is not currently available on BBC iPlayer Radio', + 'skip': 'Now it\'s really geo-restricted', }, { # compact player (https://github.com/rg3/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', }, 'params': { + # rtmp download 'skip_download': True, }, }, { @@ -245,7 +249,7 @@ class BBCCoUkIE(InfoExtractor): pass elif transfer_format == 'hls': formats.extend(self._extract_m3u8_formats( - href, programme_id, 'mp4', 'm3u8_native', + href, programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=supplier, fatal=False)) # Direct link else: @@ -301,14 +305,13 @@ class BBCCoUkIE(InfoExtractor): for connection in self._extract_connections(media): conn_formats = self._extract_connection(connection, programme_id) for format in conn_formats: - if format.get('protocol') != 'm3u8_native': - format.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - 'vcodec': vcodec, - 'filesize': file_size, - }) + format.update({ + 'width': width, + 'height': height, + 'vbr': vbr, + 'vcodec': vcodec, + 'filesize': file_size, + }) if service: format['format_id'] = '%s_%s' % (service, format['format_id']) formats.extend(conn_formats) From 246080d37878c17e481ec112fd12df52859f3c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 18:10:16 +0700 Subject: [PATCH 18/26] [viki] Override m3u8 formats acodec --- youtube_dl/extractor/viki.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index efa15e0b6..fe2bb9df4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -281,9 +281,16 @@ class VikiIE(VikiBaseIE): r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): if format_id == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', 'm3u8_native', - m3u8_id='m3u8-%s' % protocol, fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + format_dict['url'], video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id='m3u8-%s' % protocol, fatal=False) + # Despite CODECS metadata in m3u8 all video-only formats + # are actually video+audio + for f in m3u8_formats: + if f.get('acodec') == 'none' and f.get('vcodec') != 'none': + f['acodec'] = None + formats.extend(m3u8_formats) else: formats.append({ 'url': format_dict['url'], From 890e6d3309ccddfc0445c4e3b88b16aa7a6c6c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 18:12:03 +0700 Subject: [PATCH 19/26] [viki] Lower m3u8 preference http URLs are always provde the same or better quality --- youtube_dl/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index fe2bb9df4..0d6340689 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -283,7 +283,7 @@ class VikiIE(VikiBaseIE): if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( format_dict['url'], video_id, 'mp4', - entry_protocol='m3u8_native', + entry_protocol='m3u8_native', preference=-1, m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats # are actually video+audio From 10568217992722c87ee4770ce70752936521e232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 18:13:54 +0700 Subject: [PATCH 20/26] [viki] Fix tests (Closes #10098) --- youtube_dl/extractor/viki.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0d6340689..4351ac457 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -130,7 +130,7 @@ class VikiIE(VikiBaseIE): }, { # clip 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', - 'md5': 'feea2b1d7b3957f70886e6dfd8b8be84', + 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', 'info_dict': { 'id': '1067139v', 'ext': 'mp4', @@ -156,15 +156,11 @@ class VikiIE(VikiBaseIE): 'like_count': int, 'age_limit': 13, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, 'skip': 'Blocked in the US', }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', - 'md5': '1f54697dabc8f13f31bf06bb2e4de6db', + 'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'info_dict': { 'id': '44699v', 'ext': 'mp4', @@ -200,7 +196,7 @@ class VikiIE(VikiBaseIE): }, { # non-English description 'url': 'http://www.viki.com/videos/158036v-love-in-magic', - 'md5': '013dc282714e22acf9447cad14ff1208', + 'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'info_dict': { 'id': '158036v', 'ext': 'mp4', From e8882e7043ad39295349f6c91e379adf7b021356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 18:34:25 +0700 Subject: [PATCH 21/26] [spike] Relax _VALID_URL and improve extraction (Closes #10106) --- youtube_dl/extractor/spike.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 182f286df..63ea7718b 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -4,11 +4,8 @@ from .mtv import MTVServicesInfoExtractor class SpikeIE(MTVServicesInfoExtractor): - _VALID_URL = r'''(?x)https?:// - (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+| - m\.spike\.com/videos/video\.rbml\?id=(?P[^&]+)) - ''' - _TEST = { + _VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' + _TESTS = [{ 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'md5': '1a9265f32b0c375793d6c4ce45255256', 'info_dict': { @@ -17,13 +14,19 @@ class SpikeIE(MTVServicesInfoExtractor): 'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?', 'description': 'md5:fbed7e82ed5fad493615b3094a9499cb', }, - } + }, { + 'url': 'http://www.spike.com/video-clips/lhtu8m/', + 'only_matching': True, + }, { + 'url': 'http://www.spike.com/video-clips/lhtu8m', + 'only_matching': True, + }, { + 'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', + 'only_matching': True, + }, { + 'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', + 'only_matching': True, + }] _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' - - def _real_extract(self, url): - mobile_id = self._match_id(url) - if mobile_id: - url = 'http://www.spike.com/video-clips/%s' % mobile_id - return super(SpikeIE, self)._real_extract(url) From d993a1354def6c81f1f267cb2bfe02c478336ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 18:58:47 +0700 Subject: [PATCH 22/26] [README.md] Make download URLs consistent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44332ea9a..a9f3001a6 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: - sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl + sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: From 8188b923db3f1e491ec128fe65a0ed380f1e6a04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 Jul 2016 19:04:29 +0700 Subject: [PATCH 23/26] release 2016.07.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b1b8def3c..8b68f371b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.16 +[debug] youtube-dl version 2016.07.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cf194340a..eaa165347 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -449,6 +449,7 @@ - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **NineCNineMedia** + - **Nintendo** - **njoy**: N-JOY - **njoy:embed** - **Noco** @@ -645,6 +646,7 @@ - **stanfordoc**: Stanford Open ClassRoom - **Steam** - **Stitcher** + - **Streamable** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 55d07d420..34b62480b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.16' +__version__ = '2016.07.17' From a66a73ee905cb1ef9cc63c86d68f5b87cbfe2582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 Jul 2016 02:25:31 +0700 Subject: [PATCH 24/26] [ard] Add test for rbb-online --- youtube_dl/extractor/ard.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 91c78887a..c15cf1575 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -62,6 +62,17 @@ class ARDMediathekIE(InfoExtractor): }, { 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', 'only_matching': True, + }, { + # audio + 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', + 'md5': '4e8f00631aac0395fee17368ac0e9867', + 'info_dict': { + 'id': '30796318', + 'ext': 'mp3', + 'title': 'Vor dem Fest', + 'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb', + 'duration': 3287, + }, }] def _extract_media_info(self, media_info_url, webpage, video_id): From 05087d1b4c4d7171b6121f700468149c175e2058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 19 Jul 2016 22:49:38 +0700 Subject: [PATCH 25/26] [bbc] Improve extraction from sxml playlists --- youtube_dl/extractor/bbc.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 4b3cd8c65..e3b14c854 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -589,7 +589,8 @@ class BBCIE(BBCCoUkIE): 'info_dict': { 'id': '150615_telabyad_kentin_cogu', 'ext': 'mp4', - 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", + 'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi", + 'description': 'md5:33a4805a855c9baf7115fcbde57e7025', 'timestamp': 1434397334, 'upload_date': '20150615', }, @@ -603,6 +604,7 @@ class BBCIE(BBCCoUkIE): 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw', 'ext': 'mp4', 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción', + 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8', 'timestamp': 1434713142, 'upload_date': '20150619', }, @@ -818,8 +820,20 @@ class BBCIE(BBCCoUkIE): # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) playlist = data_playable.get('otherSettings', {}).get('playlist', {}) if playlist: - entries.append(self._extract_from_playlist_sxml( - playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) + for key in ('progressiveDownload', 'streaming'): + playlist_url = playlist.get('%sUrl' % key) + if not playlist_url: + continue + try: + entries.append(self._extract_from_playlist_sxml( + playlist_url, playlist_id, timestamp)) + except Exception as e: + # Some playlist URL may fail with 500, at the same time + # the other one may work fine (e.g. + # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + continue + raise if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 4e51ec5f579465d5b64ecfef3614ac5c726fca1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 19 Jul 2016 22:50:37 +0700 Subject: [PATCH 26/26] [extractors] Add import for comedycentral.tv --- youtube_dl/extractor/extractors.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 10b2390bf..7314be747 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -156,7 +156,11 @@ from .cnn import ( ) from .coub import CoubIE from .collegerama import CollegeRamaIE -from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE +from .comedycentral import ( + ComedyCentralIE, + ComedyCentralShowsIE, + ComedyCentralTVIE, +) from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonprotocols import RtmpIE