From 16e2c8f7710bffb462921dbc93adfa6274bd9334 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 16 Jan 2017 00:06:52 +0800 Subject: [PATCH 01/25] [brightcove] Recognize another player ID Closes #11688 --- ChangeLog | 1 + youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 029d13426..2e0ddd4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) version 2017.01.14 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2e56d1df9..5c6e99da1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ac29ec600..a3ac7d26b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -422,6 +422,26 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From 906420cae37ee3c2f48d23c3a4fa0543a66947d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 21:54:47 +0700 Subject: [PATCH 02/25] [limelight] Improve and make more robust (closes #11737) + Add support for direct http for videos hosted on video.llnw.net * Check handmade http URLs --- youtube_dl/extractor/limelight.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 905a0e85f..e635f3c4d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] - urls.append(http_url) - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': format_id.replace('rtmp', 'http'), - }) - formats.append(http_fmt) + http_format_id = format_id.replace('rtmp', 'http') + + CDN_HOSTS = ( + ('delvenetworks.com', 'cpl.delvenetworks.com'), + ('video.llnw.net', 's2.content.video.llnw.net'), + ) + for cdn_host, http_host in CDN_HOSTS: + if cdn_host not in rtmp.group('host').lower(): + continue + http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + urls.append(http_url) + if self._is_valid_url(http_url, video_id, http_format_id): + http_fmt = fmt.copy() + http_fmt.update({ + 'url': http_url, + 'format_id': http_format_id, + }) + formats.append(http_fmt) + break + fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), From 0ce8c66fb05fefbe51ac1eca8d3ddbd561b38a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 22:07:12 +0700 Subject: [PATCH 03/25] [options] Include custom conf in final argv (closes #11741) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0eb4924b6..0b8c1671d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None): if '--ignore-config' not in system_conf: user_conf = _readUserConf() - argv = system_conf + user_conf + command_line_conf + argv = system_conf + user_conf + custom_conf + command_line_conf opts, args = parser.parse_args(argv) if opts.verbose: for conf_label, conf in ( From 79fc8496c6ab423d591f9ed1a41358d038242bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:31:50 +0700 Subject: [PATCH 04/25] [xiami] Improve extraction (closes #11699) * Relax _VALID_URLs * Improve track metadata extraction --- youtube_dl/extractor/xiami.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 86abef257..d017e03de 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor): return webpage def _extract_track(self, track, track_id=None): - title = track['title'] + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name track_url = self._decrypt(track['location']) subtitles = {} @@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor): 'thumbnail': track.get('pic') or track.get('album_pic'), 'duration': int_or_none(track.get('length')), 'creator': track.get('artist', '').split(';')[0], - 'track': title, - 'album': track.get('album_name'), - 'artist': track.get('artist'), + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, 'subtitles': subtitles, } @@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor): class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.xiami.com/song/1775610518', 'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'info_dict': { 'id': '1775610518', 'ext': 'mp3', - 'title': 'Woman', + 'title': 'HONNE - Woman', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 265, 'creator': 'HONNE', @@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE): 'info_dict': { 'id': '1775256504', 'ext': 'mp3', - 'title': '悟空', + 'title': '戴荃 - 悟空', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 200, 'creator': '戴荃', @@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE): }, }, 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' _TYPE = '1' _TESTS = [{ 'url': 'http://www.xiami.com/album/2100300444', @@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, }] class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' _TYPE = '2' - _TEST = { + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, 'skip': 'Georestricted', - } + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', From ddd53c392e0b3d3d2c62ba28117a9b07702c5bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:42:04 +0700 Subject: [PATCH 05/25] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e0ddd4f6..ee59e120c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,22 @@ version +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) + [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + version 2017.01.14 From c1c2fe2045911c310fd5d2eda7bbb53ad581d250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:44:04 +0700 Subject: [PATCH 06/25] release 2017.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a7bf2b90c..c04f6246a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.14 +[debug] youtube-dl version 2017.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee59e120c..f6d73f982 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.16 Core * [options] Apply custom config to final composite configuration (#11741) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0f6c4ec0c..a3c76d5db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,6 +86,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **Beam:live** - **Beatport** - **Beeg** - **BehindKink** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17c6f9eb2..c20718dd6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.14' +__version__ = '2017.01.16' From c0bd51c090d617811f5e405294dce06f5871d717 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Mon, 16 Jan 2017 22:19:52 +0300 Subject: [PATCH 07/25] [naver] Support tv.naver.com links --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 055070ff5..aba0a9a70 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -12,7 +12,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', From 8a5f0a6357746d293f7330e40a3cf5823b1b626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 21:19:57 +0700 Subject: [PATCH 08/25] [naver] Update tests for #11743 --- youtube_dl/extractor/naver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index aba0a9a70..e8131333f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -15,7 +15,7 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ - 'url': 'http://tvcast.naver.com/v/81652', + 'url': 'http://tv.naver.com/v/81652', 'info_dict': { 'id': '81652', 'ext': 'mp4', @@ -24,7 +24,7 @@ class NaverIE(InfoExtractor): 'upload_date': '20130903', }, }, { - 'url': 'http://tvcast.naver.com/v/395837', + 'url': 'http://tv.naver.com/v/395837', 'md5': '638ed4c12012c458fefcddfd01f173cd', 'info_dict': { 'id': '395837', @@ -34,6 +34,9 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, }] def _real_extract(self, url): From 136078966b2047b21e9784060cebdc893c643ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 23:14:07 +0700 Subject: [PATCH 09/25] [imdb] Extend _VALID_URL (closes #11744) --- youtube_dl/extractor/imdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f0fc8d49a..f95c00c73 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -13,7 +13,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/videoplayer/vi1562949145', + 'only_matching': True, }] def _real_extract(self, url): From 4e44598547b02d42aa628506245c40c3d633814e Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 9 Jan 2017 21:19:55 +0100 Subject: [PATCH 10/25] [20min] Fix extraction --- youtube_dl/extractor/twentymin.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index b721ecb0a..68d5a0cb5 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -13,10 +13,10 @@ class TwentyMinutenIE(InfoExtractor): _TESTS = [{ # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', - 'ext': 'flv', + 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' @@ -34,17 +34,29 @@ class TwentyMinutenIE(InfoExtractor): 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' }, 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'md5': '372917ba85ed969e176d287ae54b2f94', + 'info_dict': { + 'id': '523629', + 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', + 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', + } }, { # YouTube embed 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'md5': 'e7e237fd98da2a3cc1422ce683df234d', 'info_dict': { 'id': 'ivM7A7SpDOs', 'ext': 'mp4', 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'upload_date': '20160424', - 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader': 'CMM Castilla-La Mancha Media', 'uploader_id': 'RTVCM', }, 'add_ie': ['Youtube'], @@ -77,18 +89,31 @@ class TwentyMinutenIE(InfoExtractor): r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') if not video_id: + params = self._html_search_regex( + r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', + webpage, '20min embed URL') video_id = self._search_regex( - r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + r'.*videoId@(\d+)', + params, 'Video Id') description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) + formats = [] + format_preferences = [('sd', ''), ('hd', 'h')] + for format_id, url_extension in format_preferences: + format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) + return { 'id': video_id, 'display_id': display_id, - 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats, } From 538b17a09c6546d58babc5eb4a3abc08dcff2d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:05:11 +0700 Subject: [PATCH 11/25] [20min] Improve --- youtube_dl/extractor/twentymin.py | 122 ++++++++++++------------------ 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + int_or_none, + try_get, +) class TwentyMinutenIE(InfoExtractor): IE_NAME = '20min' - _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?20min\.ch/ + (?: + videotv/*\?.*?\bvid=| + videoplayer/videoplayer\.html\?.*?\bvideoId@ + ) + (?P\d+) + ''' _TESTS = [{ - # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '469408', - 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + 'thumbnail': r're:https?://.*\.jpg$', }, - 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', - 'md5': '372917ba85ed969e176d287ae54b2f94', + 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629', 'info_dict': { 'id': '523629', - 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', 'ext': 'mp4', 'title': 'So kommen Sie bei Eis und Schnee sicher an', - 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', - 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', - } - }, { - # YouTube embed - 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'e7e237fd98da2a3cc1422ce683df234d', - 'info_dict': { - 'id': 'ivM7A7SpDOs', - 'ext': 'mp4', - 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', - 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', - 'upload_date': '20160424', - 'uploader': 'CMM Castilla-La Mancha Media', - 'uploader_id': 'RTVCM', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + 'thumbnail': r're:https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, }, - 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, - }, { - 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', - 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=(["\'])(?P(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + webpage)] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video = self._download_json( + 'http://api.20min.ch/video/%s/show' % video_id, + video_id)['content'] - youtube_url = self._html_search_regex( - r']+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube embed URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, 'Youtube') + title = video['title'] - title = self._html_search_regex( - r'

.*?(.+?)

', - webpage, 'title', default=None) - if not title: - title = remove_end(re.sub( - r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + formats = [{ + 'format_id': format_id, + 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'quality': quality, + } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] + self._sort_formats(formats) - if not video_id: - params = self._html_search_regex( - r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', - webpage, '20min embed URL') - video_id = self._search_regex( - r'.*videoId@(\d+)', - params, 'Video Id') + description = video.get('lead') + thumbnail = video.get('thumbnail') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + def extract_count(kind): + return try_get( + video, + lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) - formats = [] - format_preferences = [('sd', ''), ('hd', 'h')] - for format_id, url_extension in format_preferences: - format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) - formats.append({ - 'format_id': format_id, - 'url': format_url, - }) + like_count = extract_count('up') + dislike_count = extract_count('down') return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'like_count': like_count, + 'dislike_count': dislike_count, 'formats': formats, } From b687c85eab942553e925256ad10de693227ba553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:08:31 +0700 Subject: [PATCH 12/25] [extractor/generic] Add support for 20 minuten embeds (closes #11683, closes #11751) --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3ac7d26b..154545df7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE +from .twentymin import TwentyMinutenIE class GenericIE(InfoExtractor): @@ -1468,6 +1469,20 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # 20 minuten embed + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'info_dict': { + 'id': '523629', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TwentyMinutenIE.ie_key()], + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2421,6 +2436,12 @@ class GenericIE(InfoExtractor): if videa_urls: return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Look for 20 minuten embeds + twentymin_urls = TwentyMinutenIE._extract_urls(webpage) + if twentymin_urls: + return _playlist_from_matches( + twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From aaf2b7c57a3d2dc9ba12f1aa401cba088e114916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:20:11 +0700 Subject: [PATCH 13/25] [canalplus] Add fallback for video id (closes #11764) --- youtube_dl/extractor/canalplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 10cf165bc..b3f76a7b1 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor): [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)', r'data-video=["\'](?P\d+)'], - webpage, 'video id', group='id') + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From baa3e1845b26d9756642325bbb0d58e22025b2ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 18 Jan 2017 17:00:15 +0100 Subject: [PATCH 14/25] [bilibili] fix extraction(closes #11077) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5051934ef..85ea5e6ee 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor): }, } - _APP_KEY = '6f90a59ac58a4123' - _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' def _real_extract(self, url): video_id = self._match_id(url) From 460f61fac42592eb273b7d58efc314cc83687b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:06:46 +0700 Subject: [PATCH 15/25] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6d73f982..994895edc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + version 2017.01.16 Core From 1560baacc677c43c1007acfc89b8190f81a59684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:10:00 +0700 Subject: [PATCH 16/25] release 2017.01.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c04f6246a..38cb13a33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.16 +[debug] youtube-dl version 2017.01.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 994895edc..5aa4e3c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.18 Extractors * [bilibili] Fix extraction (#11077) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c20718dd6..669f60f65 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.16' +__version__ = '2017.01.18' From f1e70fc2ff6f1536873ed73ffc9bff63653fd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:34:11 +0700 Subject: [PATCH 17/25] [mtv] Relax triforce feed regex (closes #11766) --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 00a980c7d..e48ea2481 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( - r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage, 'triforce feed', default='{}'), video_id, fatal=False) data_zone = self._search_regex( From eb3f008c9e686f38c50511004d5c9a51b2e8cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jan 2017 04:49:31 +0700 Subject: [PATCH 18/25] [uol] Fix extraction (closes #11770) --- youtube_dl/extractor/uol.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index c27c64387..e67083004 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -84,12 +84,27 @@ class UOLIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) - video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + media_id = None + + if video_id.isdigit(): + media_id = video_id + + if not media_id: + embed_page = self._download_webpage( + 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, + video_id, 'Downloading embed page', fatal=False) + if embed_page: + media_id = self._search_regex( + (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), + embed_page, 'media id', default=None) + + if not media_id: + webpage = self._download_webpage(url, video_id) + media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') + video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, - video_id)['item'] + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, + media_id)['item'] title = video_data['title'] query = { @@ -118,7 +133,7 @@ class UOLIE(InfoExtractor): tags.append(tag_description) return { - 'id': video_id, + 'id': media_id, 'title': title, 'description': clean_html(video_data.get('desMedia')), 'thumbnail': video_data.get('thumbnail'), From cccd70a2752ad079ed560e42ff085adcabebaac2 Mon Sep 17 00:00:00 2001 From: james mike dupont Date: Thu, 19 Jan 2017 04:18:13 -0500 Subject: [PATCH 19/25] untie --- youtube_dl/extractor/flipagram.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 1902a2393..b7be40f1b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor): 'filesize': int_or_none(cover.get('size')), } for cover in flipagram.get('covers', []) if cover.get('url')] - # Note that this only retrieves comments that are initally loaded. + # Note that this only retrieves comments that are initially loaded. # For videos with large amounts of comments, most won't be retrieved. comments = [] for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2e98b0e6f..add753635 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { From 1fe84be0f3b36822af804db6cf7c06a1ac5ac688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Jan 2017 00:47:04 +0700 Subject: [PATCH 20/25] [1tv] Add support for hls (closes #11786) --- youtube_dl/extractor/firsttv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c6fb67057..081c71842 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor): title = item['title'] quality = qualities(QUALITIES) formats = [] + path = None for f in item.get('mbr', []): src = f.get('src') if not src or not isinstance(src, compat_str): continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + if not path: + path = self._search_regex( + r'//[^/]+/(.+?)_\d+\.mp4', src, + 'm3u8 path', default=None) formats.append({ 'url': src, 'format_id': f.get('name'), 'tbr': tbr, - 'quality': quality(f.get('name')), + 'source_preference': quality(f.get('name')), }) + # m3u8 URL format is reverse engineered from [1] (search for + # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) + # is taken from [2]. + # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted + # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 + if not path and len(formats) == 1: + path = self._search_regex( + r'//[^/]+/(.+?$)', formats[0]['url'], + 'm3u8 path', default=None) + if path: + if len(formats) == 1: + m3u8_path = ',' + else: + tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + formats.extend(self._extract_m3u8_formats( + 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' + % (path, m3u8_path), + display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) From d77ac737900eede5e1508b9822e71c8595fe0879 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 21:59:24 +0800 Subject: [PATCH 21/25] [ustream] Add UstreamIE._extract_url() Ref: #11547 --- youtube_dl/extractor/generic.py | 8 ++++---- youtube_dl/extractor/ustream.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 154545df7..a7c104845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -79,6 +79,7 @@ from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE class GenericIE(InfoExtractor): @@ -2112,10 +2113,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'TED') # Look for embedded Ustream videos - mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Ustream') + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) # Look for embedded arte.tv player mobj = re.search( diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 0c06bf36b..5737d4d16 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor): }, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] From 4447fb23320b9214ab3188717794d00b18887617 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:11:43 +0800 Subject: [PATCH 22/25] [cspan] Support Ustream embedded videos Closes #11547 --- ChangeLog | 6 ++++++ youtube_dl/extractor/cspan.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5aa4e3c6b..217971ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [cspan] Support Ustream embedded videos (#11547) + + version 2017.01.18 Extractors diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7e5d4f227..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): @@ -57,12 +58,30 @@ class CSpanIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From 972efe60c3fdaff83f9b8e7a637ee81f4c27bb64 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:13:54 +0800 Subject: [PATCH 23/25] [generic] Remove a dead test The web page does not contain a video anymore Ref: #2694, #2696 --- youtube_dl/extractor/generic.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a7c104845..40201f311 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -589,17 +589,6 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embedded Ustream video - { - 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', - 'md5': '27b99cdb639c9b12a79bca876a073417', - 'info_dict': { - 'id': '45734260', - 'ext': 'flv', - 'uploader': 'AU SPA: The NSA and Privacy', - 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' - } - }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', From f3c21cb7a7e2d8685f466368e3142739077498cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:25:20 +0800 Subject: [PATCH 24/25] [cspan] Fix _TESTS --- youtube_dl/extractor/cspan.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 92a827a4b..d4576160b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -23,14 +23,13 @@ class CSpanIE(InfoExtractor): 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', - 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, + 'playlist_mincount': 2, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', + # md5 is unstable 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -39,14 +38,11 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', - 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', - 'duration': 14848, - 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, + 'playlist_mincount': 6, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', From f4ec8dce481564589419e4dffc45437211daa13f Mon Sep 17 00:00:00 2001 From: Iulian Onofrei Date: Fri, 20 Jan 2017 18:25:04 +0200 Subject: [PATCH 25/25] Update README.md (#11787) Add audio format argument dependency warning --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0b8c1671d..0d2ce8d15 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5',