From 4a7a5e41f7c653cf2b2f8b9bf10987291a19c074 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Jul 2016 14:51:44 +0100 Subject: [PATCH 01/25] [tvplay] improve extraction --- youtube_dl/extractor/tvplay.py | 123 +++++++++++++++++---------------- 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index df70a6b23..918f8f8bc 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -8,43 +8,36 @@ from ..compat import compat_str from ..utils import ( parse_iso8601, qualities, + determine_ext, + update_url_query, + int_or_none, ) class TVPlayIE(InfoExtractor): IE_DESC = 'TV3Play and related services' _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:tvplay\.lv/parraides| - tv3play\.lt/programos| - play\.tv3\.lt/programos| - tv3play\.ee/sisu| - tv3play\.se/program| - tv6play\.se/program| - tv8play\.se/program| - tv10play\.se/program| - tv3play\.no/programmer| - viasat4play\.no/programmer| - tv6play\.no/programmer| - tv3play\.dk/programmer| + (?:tvplay(?:\.skaties)?\.lv/parraides| + (?:tv3play|play\.tv3)\.lt/programos| + tv3play(?:\.tv3)?\.ee/sisu| + tv(?:3|6|8|10)play\.se/program| + (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer| play\.novatv\.bg/programi )/[^/]+/(?P\d+) ''' _TESTS = [ { 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', + 'md5': 'a1612fe0849455423ad8718fe049be21', 'info_dict': { 'id': '418113', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Kādi ir īri? - Viņas melo labāk', 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.', 'duration': 25, 'timestamp': 1406097056, 'upload_date': '20140723', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true', @@ -82,7 +75,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', 'info_dict': { 'id': '395385', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Husräddarna S02E07', 'description': 'md5:f210c6c89f42d4fc39faa551be813777', 'duration': 2574, @@ -90,7 +83,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20140520', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -98,7 +90,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', 'info_dict': { 'id': '266636', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Den sista dokusåpan S01E08', 'description': 'md5:295be39c872520221b933830f660b110', 'duration': 1492, @@ -107,7 +99,6 @@ class TVPlayIE(InfoExtractor): 'age_limit': 18, }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -115,7 +106,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', 'info_dict': { 'id': '282756', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Antikjakten S01E10', 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', 'duration': 2646, @@ -123,7 +114,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20120925', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -131,7 +121,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', 'info_dict': { 'id': '230898', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Anna Anka søker assistent - Ep. 8', 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', 'duration': 2656, @@ -139,7 +129,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20100628', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -147,7 +136,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', 'info_dict': { 'id': '21873', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Budbringerne program 10', 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', 'duration': 1297, @@ -155,7 +144,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20090929', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -163,7 +151,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', 'info_dict': { 'id': '361883', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Hotelinspektør Alex Polizzi - Ep. 10', 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', 'duration': 2594, @@ -171,7 +159,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20140224', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -191,6 +178,14 @@ class TVPlayIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', + 'only_matching': True, + }, + { + 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', + 'only_matching': True, + } ] def _real_extract(self, url): @@ -199,7 +194,9 @@ class TVPlayIE(InfoExtractor): video = self._download_json( 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') - if video['is_geo_blocked']: + title = video['title'] + + if video.get('is_geo_blocked'): self.report_warning( 'This content might not be available in your country due to copyright reasons') @@ -208,42 +205,50 @@ class TVPlayIE(InfoExtractor): quality = qualities(['hls', 'medium', 'high']) formats = [] - for format_id, video_url in streams['streams'].items(): + for format_id, video_url in streams.get('streams', {}).items(): if not video_url or not isinstance(video_url, compat_str): continue - fmt = { - 'format_id': format_id, - 'preference': quality(format_id), - } - if video_url.startswith('rtmp'): - m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) - if not m: - continue - fmt.update({ - 'ext': 'flv', - 'url': m.group('url'), - 'app': m.group('app'), - 'play_path': m.group('playpath'), - }) - elif video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( - video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id)) - continue + update_url_query(video_url, { + 'hdcore': '3.5.0', + 'plugin': 'aasp-3.5.0.151.81' + }), video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) else: - fmt.update({ - 'url': video_url, - }) - formats.append(fmt) - + fmt = { + 'format_id': format_id, + 'quality': quality(format_id), + 'ext': ext, + } + if video_url.startswith('rtmp'): + m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) + if not m: + continue + fmt.update({ + 'ext': 'flv', + 'url': m.group('url'), + 'app': m.group('app'), + 'play_path': m.group('playpath'), + }) + else: + fmt.update({ + 'url': video_url, + }) + formats.append(fmt) self._sort_formats(formats) return { 'id': video_id, - 'title': video['title'], - 'description': video['description'], - 'duration': video['duration'], - 'timestamp': parse_iso8601(video['created_at']), - 'view_count': video['views']['total'], - 'age_limit': video.get('age_limit', 0), + 'title': title, + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('created_at')), + 'view_count': int_or_none(video.get('views', {}).get('total')), + 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, } From 8a8590a61784b9f4a8ff380058a1571d5400ba3d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Jul 2016 16:30:24 +0100 Subject: [PATCH 02/25] [dbtv] delegate extraction to BrightcoveNewIE --- youtube_dl/extractor/dbtv.py | 69 ++++++++++-------------------------- 1 file changed, 19 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 133cdc50b..caff8842e 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,78 +4,47 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - int_or_none, - clean_html, -) class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P[0-9]+)(?:#(?P.+))?' + _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P[0-9]+)(?:#(?P.+))?' _TESTS = [{ 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'md5': '2e24f67936517b143a234b4cadf792ec', 'info_dict': { - 'id': '33100', + 'id': '3649835190001', 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', - 'thumbnail': 're:https?://.*\.jpg$', - 'timestamp': 1404039863.438, + 'thumbnail': 're:https?://.*\.jpg', + 'timestamp': 1404039863, 'upload_date': '20140629', 'duration': 69.544, - 'view_count': int, - 'categories': list, - } + 'uploader_id': '1027729757001', + }, + 'add_ie': ['BrightcoveNew'] }, { 'url': 'http://dbtv.no/3649835190001', 'only_matching': True, }, { 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', 'only_matching': True, + }, { + 'url': 'http://dbtv.no/vice/5000634109001', + 'only_matching': True, + }, { + 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - data = self._download_json( - 'http://api.dbtv.no/discovery/%s' % video_id, display_id) - - video = data['playlist'][0] - - formats = [{ - 'url': f['URL'], - 'vcodec': f.get('container'), - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'vbr': float_or_none(f.get('rate'), 1000), - 'filesize': int_or_none(f.get('size')), - } for f in video['renditions'] if 'URL' in f] - - if not formats: - for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: - if url_key in video: - formats.append({ - 'url': video[url_key], - 'format_id': format_id, - }) - - self._sort_formats(formats) + video_id, display_id = re.match(self._VALID_URL, url).groups() return { - 'id': compat_str(video['id']), + '_type': 'url_transparent', + 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, + 'id': video_id, 'display_id': display_id, - 'title': video['title'], - 'description': clean_html(video['desc']), - 'thumbnail': video.get('splash') or video.get('thumb'), - 'timestamp': float_or_none(video.get('publishedAt'), 1000), - 'duration': float_or_none(video.get('length'), 1000), - 'view_count': int_or_none(video.get('views')), - 'categories': video.get('tags'), - 'formats': formats, + 'ie_key': 'BrightcoveNew', } From f1b4af7d79e5ee5d74fddf5fe46ce4c6f661ad8d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Jul 2016 19:06:50 +0100 Subject: [PATCH 03/25] [beightcove:new] remove html tags from description --- youtube_dl/extractor/brightcove.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 57ce0c174..c172bad2d 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -26,6 +26,7 @@ from ..utils import ( unescapeHTML, unsmuggle_url, update_url_query, + clean_html, ) @@ -620,7 +621,7 @@ class BrightcoveNewIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': json_data.get('description'), + 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'duration': float_or_none(json_data.get('duration'), 1000), 'timestamp': parse_iso8601(json_data.get('published_at')), From 7d1219f3e053cc38a577ff2370781c1019e10a1a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 11 Jul 2016 19:08:22 +0100 Subject: [PATCH 04/25] [tmz] delegate extraction to KalturaIE --- youtube_dl/extractor/tmz.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index 7dbe68b5c..979856e9a 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -5,31 +5,27 @@ from .common import InfoExtractor class TMZIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P[^/]+)/?' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P[^/?#]+)' + _TESTS = [{ 'url': 'http://www.tmz.com/videos/0_okj015ty/', - 'md5': '791204e3bf790b1426cb2db0706184c0', + 'md5': '4d22a51ef205b6c06395d8394f72d560', 'info_dict': { 'id': '0_okj015ty', - 'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4', 'ext': 'mp4', 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', - 'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*', + 'timestamp': 1394747163, + 'uploader_id': 'batchUser', + 'upload_date': '20140313', } - } + }, { + 'url': 'http://www.tmz.com/videos/0-cegprt2p/', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'url': self._html_search_meta('VideoURL', webpage, fatal=True), - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._html_search_meta('ThumbURL', webpage), - } + video_id = self._match_id(url).replace('-', '_') + return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id) class TMZArticleIE(InfoExtractor): From 7f29cf545a45db2bd5c2681a2fac782ca460b153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Jul 2016 02:10:35 +0700 Subject: [PATCH 05/25] [youtube] Add YouTube Red paid video reference test (#10059) --- youtube_dl/extractor/youtube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1687d5ef9..49c264c3a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -858,6 +858,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): { 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', 'only_matching': True, + }, + { + # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) + 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', + 'only_matching': True, } ] From 381ff44756ecf188de476a7a4cc9d4becf6633d1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 12 Jul 2016 09:09:54 +0200 Subject: [PATCH 06/25] [devscripts/generate-download] Remove MD5 and SHA1 --- devscripts/gh-pages/generate-download.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index 392e3ba21..fcd7e1dff 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read() with open('download.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() -md5sum = hashlib.md5(data).hexdigest() -sha1sum = hashlib.sha1(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest() template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', URL) -template = template.replace('@PROGRAM_MD5SUM@', md5sum) -template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) From 41aa44259d3a0791b1a023a18c9a933f71e04c50 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 12 Jul 2016 23:15:38 +0100 Subject: [PATCH 07/25] [shahid] try to bypass geo restriction and extract more metadata(closes #10062) --- youtube_dl/extractor/shahid.py | 75 ++++++++++++---------------------- 1 file changed, 26 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index d95ea06be..ca286abb1 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + str_or_none, ) @@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor): 'only_matching': True }] - def _handle_error(self, response): - if not isinstance(response, dict): - return - error = response.get('error') + def _call_api(self, path, video_id, note): + data = self._download_json( + 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }).get('data', {}) + + error = data.get('error') if error: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), expected=True) - def _download_json(self, url, video_id, note='Downloading JSON metadata'): - response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] - self._handle_error(response) - return response + return data def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - api_vars = { - 'id': video_id, - 'type': 'player', - 'url': 'http://api.shahid.net/api/v1_1', - 'playerType': 'episode', - } - - flashvars = self._search_regex( - r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) - if flashvars: - for key in api_vars.keys(): - value = self._search_regex( - r'\b%s\s*:\s*(?P["\'])(?P.+?)(?P=q)' % key, - flashvars, 'type', default=None, group='value') - if value: - api_vars[key] = value - - player = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' - % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + player = self._call_api( + 'Content/Episode/%s' % video_id, + video_id, 'Downloading player JSON') if player.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._download_json( - '%s/%s/%s?%s' % ( - api_vars['url'], api_vars['playerType'], api_vars['id'], - compat_urllib_parse_urlencode({ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - })), - video_id, 'Downloading video JSON') - - video = video[api_vars['playerType']] + video = self._call_api( + 'episode/%s' % video_id, video_id, + 'Downloading video JSON')['episode'] title = video['title'] - description = video.get('description') - thumbnail = video.get('thumbnailUrl') - duration = int_or_none(video.get('duration')) - timestamp = parse_iso8601(video.get('referenceDate')) categories = [ category['name'] for category in video.get('genres', []) if 'name' in category] @@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnailUrl'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('referenceDate')), 'categories': categories, + 'series': video.get('showTitle') or video.get('showName'), + 'season': video.get('seasonTitle'), + 'season_number': int_or_none(video.get('seasonNumber')), + 'season_id': str_or_none(video.get('seasonId')), + 'episode_number': int_or_none(video.get('number')), + 'episode_id': video_id, 'formats': formats, } From 9d865a1af6a509fc8546a049d996b7a7d28313cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 13 Jul 2016 14:27:14 +0800 Subject: [PATCH 08/25] [travis] Skip downloading srelay SOCKS tests never run on Travis CI due to unknown reasons, and downloading them broke some tests (e.g. https://travis-ci.org/rg3/youtube-dl/builds/144306425) --- .travis.yml | 3 --- devscripts/install_srelay.sh | 8 -------- 2 files changed, 11 deletions(-) delete mode 100755 devscripts/install_srelay.sh diff --git a/.travis.yml b/.travis.yml index 136c339f0..c74c9cc12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,6 @@ python: - "3.4" - "3.5" sudo: false -install: - - bash ./devscripts/install_srelay.sh - - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh deleted file mode 100755 index 33ce8a3f7..000000000 --- a/devscripts/install_srelay.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -mkdir -p tmp && cd tmp -wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz -tar zxvf srelay-0.4.8b6.tar.gz -cd srelay-0.4.8b6 -./configure -make From 2d19fb50725489d362d13e7347f90e0d2de10939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jul 2016 21:51:44 +0700 Subject: [PATCH 09/25] [vk:wallpost] Add extractor --- youtube_dl/extractor/vk.py | 223 +++++++++++++++++++++++++++++-------- 1 file changed, 175 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 758d9c86b..bcb7df83d 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,11 +6,18 @@ import json import sys from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( + clean_html, ExtractorError, + get_element_by_class, int_or_none, orderedSet, + parse_duration, + remove_start, str_to_int, unescapeHTML, unified_strdate, @@ -20,7 +27,54 @@ from .vimeo import VimeoIE from .pladform import PladformIE -class VKIE(InfoExtractor): +class VKBaseIE(InfoExtractor): + _NETRC_MACHINE = 'vk' + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_page, url_handle = self._download_webpage_handle( + 'https://vk.com', None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username.encode('cp1251'), + 'pass': password.encode('cp1251'), + }) + + # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header + # and expects the first one to be set rather than second (see + # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). + # As of RFC6265 the newer one cookie should be set into cookie store + # what actually happens. + # We will workaround this VK issue by resetting the remixlhk cookie to + # the first one manually. + cookies = url_handle.headers.get('Set-Cookie') + if sys.version_info[0] >= 3: + cookies = cookies.encode('iso-8859-1') + cookies = cookies.decode('utf-8') + remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) + if remixlhk: + value, domain = remixlhk.groups() + self._set_cookie(domain, 'remixlhk', value) + + login_page = self._download_webpage( + 'https://login.vk.com/?act=login', None, + note='Logging in as %s' % username, + data=urlencode_postdata(login_form)) + + if re.search(r'onLoginFailed', login_page): + raise ExtractorError( + 'Unable to login, incorrect username and/or password', expected=True) + + def _real_initialize(self): + self._login() + + +class VKIE(VKBaseIE): IE_NAME = 'vk' IE_DESC = 'VK' _VALID_URL = r'''(?x) @@ -38,8 +92,6 @@ class VKIE(InfoExtractor): (?P-?\d+_\d+)(?:.*\blist=(?P[\da-f]+))? ) ''' - _NETRC_MACHINE = 'vk' - _TESTS = [ { 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', @@ -189,49 +241,6 @@ class VKIE(InfoExtractor): } ] - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - login_page, url_handle = self._download_webpage_handle( - 'https://vk.com', None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'email': username.encode('cp1251'), - 'pass': password.encode('cp1251'), - }) - - # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header - # and expects the first one to be set rather than second (see - # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). - # As of RFC6265 the newer one cookie should be set into cookie store - # what actually happens. - # We will workaround this VK issue by resetting the remixlhk cookie to - # the first one manually. - cookies = url_handle.headers.get('Set-Cookie') - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') - remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) - if remixlhk: - value, domain = remixlhk.groups() - self._set_cookie(domain, 'remixlhk', value) - - login_page = self._download_webpage( - 'https://login.vk.com/?act=login', None, - note='Logging in as %s' % username, - data=urlencode_postdata(login_form)) - - if re.search(r'onLoginFailed', login_page): - raise ExtractorError( - 'Unable to login, incorrect username and/or password', expected=True) - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') @@ -355,7 +364,7 @@ class VKIE(InfoExtractor): } -class VKUserVideosIE(InfoExtractor): +class VKUserVideosIE(VKBaseIE): IE_NAME = 'vk:uservideos' IE_DESC = "VK - User's Videos" _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' @@ -396,3 +405,121 @@ class VKUserVideosIE(InfoExtractor): webpage, 'title', default=page_id)) return self.playlist_result(entries, page_id, title) + + +class VKWallPostIE(VKBaseIE): + IE_NAME = 'vk:wallpost' + _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P-?\d+_\d+)))' + _TESTS = [{ + # public page URL, audio playlist + 'url': 'https://vk.com/bs.official?w=wall-23538238_35', + 'info_dict': { + 'id': '23538238_35', + 'title': 'Black Shadow - Wall post 23538238_35', + 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', + }, + 'playlist': [{ + 'md5': '5ba93864ec5b85f7ce19a9af4af080f6', + 'info_dict': { + 'id': '135220665_111806521', + 'ext': 'mp3', + 'title': 'Black Shadow - Слепое Верование', + 'duration': 370, + 'uploader': 'Black Shadow', + 'artist': 'Black Shadow', + 'track': 'Слепое Верование', + }, + }, { + 'md5': '4cc7e804579122b17ea95af7834c9233', + 'info_dict': { + 'id': '135220665_111802303', + 'ext': 'mp3', + 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', + 'duration': 423, + 'uploader': 'Black Shadow', + 'artist': 'Black Shadow', + 'track': 'Война - Негасимое Бездны Пламя!', + }, + 'params': { + 'skip_download': True, + }, + }], + 'skip': 'Requires vk account credentials', + }, { + # single YouTube embed, no leading - + 'url': 'https://vk.com/wall85155021_6319', + 'info_dict': { + 'id': '85155021_6319', + 'title': 'Sergey Gorbunov - Wall post 85155021_6319', + }, + 'playlist_count': 1, + 'skip': 'Requires vk account credentials', + }, { + # wall page URL + 'url': 'https://vk.com/wall-23538238_35', + 'only_matching': True, + }, { + # mobile wall page URL + 'url': 'https://m.vk.com/wall-23538238_35', + 'only_matching': True, + }] + + def _real_extract(self, url): + post_id = self._match_id(url) + + wall_url = 'https://vk.com/wall%s' % post_id + + post_id = remove_start(post_id, '-') + + webpage = self._download_webpage(wall_url, post_id) + + error = self._html_search_regex( + r'>Error\s*]+class=["\']body["\'][^>]*>([^<]+)', + webpage, 'error', default=None) + if error: + raise ExtractorError('VK said: %s' % error, expected=True) + + description = clean_html(get_element_by_class('wall_post_text', webpage)) + uploader = clean_html(get_element_by_class( + 'fw_post_author', webpage)) or self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + entries = [] + + for audio in re.finditer(r'''(?sx) + ]+ + id=(?P["\'])audio_info(?P\d+_\d+).*?(?P=q1)[^>]+ + value=(?P["\'])(?Phttp.+?)(?P=q2) + .+? + ''', webpage): + audio_html = audio.group(0) + audio_id = audio.group('id') + duration = parse_duration(get_element_by_class('duration', audio_html)) + track = self._html_search_regex( + r']+id=["\']title%s[^>]*>([^<]+)' % audio_id, + audio_html, 'title', default=None) + artist = self._html_search_regex( + r'>([^<]+)\s*&ndash', audio_html, + 'artist', default=None) + entries.append({ + 'id': audio_id, + 'url': audio.group('url'), + 'title': '%s - %s' % (artist, track) if artist and track else audio_id, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'artist': artist, + 'track': track, + }) + + for video in re.finditer( + r']+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key())) + + title = 'Wall post %s' % post_id + + return self.playlist_result( + orderedSet(entries), post_id, + '%s - %s' % (uploader, title) if uploader else title, + description) From 5f5a9d615835110380075343786e58f78f5b08a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jul 2016 21:52:52 +0700 Subject: [PATCH 10/25] [vk] Improve login --- youtube_dl/extractor/vk.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index bcb7df83d..3ee66e23e 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -53,13 +53,14 @@ class VKBaseIE(InfoExtractor): # We will workaround this VK issue by resetting the remixlhk cookie to # the first one manually. cookies = url_handle.headers.get('Set-Cookie') - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') - remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) - if remixlhk: - value, domain = remixlhk.groups() - self._set_cookie(domain, 'remixlhk', value) + if cookies: + if sys.version_info[0] >= 3: + cookies = cookies.encode('iso-8859-1') + cookies = cookies.decode('utf-8') + remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) + if remixlhk: + value, domain = remixlhk.groups() + self._set_cookie(domain, 'remixlhk', value) login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, From 51c2cd0b83777f87fbc45b18f59a6f467717ba57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jul 2016 21:53:23 +0700 Subject: [PATCH 11/25] [extractors] Add vk:wallpost extractor import --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b08df41b4..916c8bb3e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -989,6 +989,7 @@ from .viki import ( from .vk import ( VKIE, VKUserVideosIE, + VKWallPostIE, ) from .vlive import VLiveIE from .vodlocker import VodlockerIE From 00f4764cb7fdb39e3fd238440c373a2d7712d8d2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Jul 2016 15:54:43 +0100 Subject: [PATCH 12/25] [common] extract vbr, abr and fps for Unified Streaming Platform m3u8 manifests --- youtube_dl/extractor/common.py | 26 ++++++++++---------------- youtube_dl/extractor/wat.py | 12 +----------- 2 files changed, 11 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index df546da27..29544c1a8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1207,6 +1207,7 @@ class InfoExtractor(object): 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, + 'fps': float_or_none(last_info.get('FRAME-RATE')), 'protocol': entry_protocol, 'preference': preference, } @@ -1215,24 +1216,17 @@ class InfoExtractor(object): width_str, height_str = resolution.split('x') f['width'] = int(width_str) f['height'] = int(height_str) - codecs = last_info.get('CODECS') - if codecs: - vcodec, acodec = [None] * 2 - va_codecs = codecs.split(',') - if len(va_codecs) == 1: - # Audio only entries usually come with single codec and - # no resolution. For more robustness we also check it to - # be mp4 audio. - if not resolution and va_codecs[0].startswith('mp4a'): - vcodec, acodec = 'none', va_codecs[0] - else: - vcodec = va_codecs[0] - else: - vcodec, acodec = va_codecs[:2] + # Unified Streaming Platform + mobj = re.search( + r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) + if mobj: + abr, vbr = mobj.groups() + abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) f.update({ - 'acodec': acodec, - 'vcodec': vcodec, + 'vbr': vbr, + 'abr': abr, }) + f.update(parse_codecs(last_info.get('CODECS'))) if last_media is not None: f['m3u8_media'] = last_media last_media = None diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index de7d6b559..48fc438ed 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -9,7 +9,6 @@ from ..utils import ( ExtractorError, unified_strdate, HEADRequest, - float_or_none, ) @@ -95,16 +94,7 @@ class WatIE(InfoExtractor): m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), video_id, f4m_id='hds', fatal=False)) for m3u8_format in m3u8_formats: - mobj = re.search( - r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url']) - if not mobj: - continue - abr, vbr = mobj.groups() - abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) - m3u8_format.update({ - 'vbr': vbr, - 'abr': abr, - }) + vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') if not vbr or not abr: continue f = m3u8_format.copy() From 0385aa6199206e4ba7745efec73be26c5826286a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Jul 2016 15:57:50 +0100 Subject: [PATCH 13/25] [bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests --- youtube_dl/extractor/bbc.py | 49 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 4b3cd8c65..50c1da185 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -44,6 +44,8 @@ class BBCCoUkIE(InfoExtractor): _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection' _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' + # Unified Streaming Platform + _USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' _NAMESPACES = ( _MEDIASELECTION_NS, @@ -55,12 +57,11 @@ class BBCCoUkIE(InfoExtractor): 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'info_dict': { 'id': 'b039d07m', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { - # rtmp download 'skip_download': True, } }, @@ -92,7 +93,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', @@ -107,7 +108,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/programmes/b04v20dw', 'info_dict': { @@ -127,13 +128,12 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Audio', 'info_dict': { 'id': 'p022h44j', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", 'duration': 227, }, 'params': { - # rtmp download 'skip_download': True, } }, { @@ -141,13 +141,12 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Video', 'info_dict': { 'id': 'p025c103', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', 'duration': 226, }, 'params': { - # rtmp download 'skip_download': True, } }, { @@ -163,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'geolocation', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', 'info_dict': { @@ -177,7 +176,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'geolocation', + 'skip': 'this episode is not currently available', }, { # iptv-all mediaset fails with geolocation however there is no geo restriction # for this programme at all @@ -192,18 +191,17 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Now it\'s really geo-restricted', + 'skip': 'this episode is not currently available on BBC iPlayer Radio', }, { # compact player (https://github.com/rg3/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', }, 'params': { - # rtmp download 'skip_download': True, }, }, { @@ -248,9 +246,15 @@ class BBCCoUkIE(InfoExtractor): elif transfer_format == 'dash': pass elif transfer_format == 'hls': - formats.extend(self._extract_m3u8_formats( + is_unified_streaming = re.search(self._USP_RE, href) + if is_unified_streaming: + href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href) + m3u8_formats = self._extract_m3u8_formats( href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=supplier, fatal=False)) + m3u8_id=supplier, fatal=False) + if is_unified_streaming: + self._check_formats(m3u8_formats, programme_id) + formats.extend(m3u8_formats) # Direct link else: formats.append({ @@ -305,13 +309,14 @@ class BBCCoUkIE(InfoExtractor): for connection in self._extract_connections(media): conn_formats = self._extract_connection(connection, programme_id) for format in conn_formats: - format.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - 'vcodec': vcodec, - 'filesize': file_size, - }) + if format.get('protocol') != 'm3u8_native': + format.update({ + 'width': width, + 'height': height, + 'vbr': vbr, + 'vcodec': vcodec, + 'filesize': file_size, + }) if service: format['format_id'] = '%s_%s' % (service, format['format_id']) formats.extend(conn_formats) From a0560d8ab83a565d502e160ccd993317a0ed69ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jul 2016 22:41:46 +0700 Subject: [PATCH 14/25] [ellentv] Improve extraction (Closes #10067) --- youtube_dl/extractor/ellentv.py | 50 ++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 4c8190d68..74bbc5c51 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,12 +6,13 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + NO_DEFAULT, ) class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P[a-z0-9_-]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'md5': '4294cf98bc165f218aaa0b89e0fd8042', 'info_dict': { @@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor): 'timestamp': 1428035648, 'upload_date': '20150403', 'uploader_id': 'batchUser', - } - } + }, + }, { + # not available via http://widgets.ellentube.com/ + 'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', + 'info_dict': { + 'id': '1_szkgu2m2', + 'ext': 'flv', + 'title': "Ellen's Amazingly Talented Audience", + 'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', + 'timestamp': 1255140900, + 'upload_date': '20091010', + 'uploader_id': 'ellenkaltura@gmail.com', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://widgets.ellentube.com/videos/%s' % video_id, - video_id) + URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) - partner_id = self._search_regex( - r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') + for num, url_ in enumerate(URLS, 1): + webpage = self._download_webpage( + url_, video_id, fatal=num == len(URLS)) - kaltura_id = self._search_regex( - [r'id="kaltura_player_([^"]+)"', - r"_wb_entry_id\s*:\s*'([^']+)", - r'data-kaltura-entry-id="([^"]+)'], - webpage, 'kaltura id') + default = NO_DEFAULT if num == len(URLS) else None + + partner_id = self._search_regex( + r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', + default=default) + + kaltura_id = self._search_regex( + [r'id="kaltura_player_([^"]+)"', + r"_wb_entry_id\s*:\s*'([^']+)", + r'data-kaltura-entry-id="([^"]+)'], + webpage, 'kaltura id', default=default) + + if partner_id and kaltura_id: + break return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') From c485959034485f529985c0bea933951af3f4a63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jul 2016 23:58:01 +0700 Subject: [PATCH 15/25] release 2016.07.13 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 704a8b911..c5898701f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.11 +[debug] youtube-dl version 2016.07.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bcd6de1c..282bd0e6b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -568,6 +568,7 @@ - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **RTVNH** + - **Rudo** - **RUHD** - **RulePorn** - **rutube**: Rutube videos @@ -794,6 +795,7 @@ - **vine:user** - **vk**: VK - **vk:uservideos**: VK - User's Videos + - **vk:wallpost** - **vlive** - **Vodlocker** - **VoiceRepublic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d60480223..56f9f5986 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.11' +__version__ = '2016.07.13' From 35ec86689c95fdded47ea65352392516951315c3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 13 Jul 2016 17:39:56 +0100 Subject: [PATCH 16/25] [bbc] extract only the original Unified Streaming Platform m3u8 manifests https://github.com/rg3/youtube-dl/commit/0385aa6199206e4ba7745efec73be26c5826286a#commitcomment-18233275 manifests with higher birate require more time to check formats --- youtube_dl/extractor/bbc.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 50c1da185..23c6e505b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -44,8 +44,6 @@ class BBCCoUkIE(InfoExtractor): _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection' _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' - # Unified Streaming Platform - _USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' _NAMESPACES = ( _MEDIASELECTION_NS, @@ -246,15 +244,9 @@ class BBCCoUkIE(InfoExtractor): elif transfer_format == 'dash': pass elif transfer_format == 'hls': - is_unified_streaming = re.search(self._USP_RE, href) - if is_unified_streaming: - href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href) - m3u8_formats = self._extract_m3u8_formats( - href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=supplier, fatal=False) - if is_unified_streaming: - self._check_formats(m3u8_formats, programme_id) - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + href, programme_id, 'mp4', 'm3u8_native', + m3u8_id=supplier, fatal=False)) # Direct link else: formats.append({ From c39b2ed990105e640456f126321ef3d771884405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 13 Jul 2016 20:21:00 +0200 Subject: [PATCH 17/25] [rtve] Fix extraction (#10076) For http://www.rtve.es/alacarta/videos/documentos-tv/documentos-tv-revolucion-del-movil/3069778/ using 'auth/resources' fails, and other URLs seem to work fine. --- youtube_dl/extractor/rtve.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f11e3588b..de35e07d9 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -113,9 +113,7 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - video_url = video_url.replace( - 'resources/', 'auth/resources/' - ).replace('.net.rtve', '.multimedia.cdn.rtve') + video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') subtitles = None if info.get('sbtFile') is not None: From 233b58dec736205d9bf8c652063b5dba6073631f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 13 Jul 2016 21:02:34 +0200 Subject: [PATCH 18/25] Add extractor for rtve.es/television (fixes #10076) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/rtve.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 916c8bb3e..71baee90d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -689,7 +689,7 @@ from .rtlnl import RtlNlIE from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rudo import RudoIE from .ruhd import RUHDIE diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index de35e07d9..d33b05f5d 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -220,3 +220,34 @@ class RTVELiveIE(InfoExtractor): 'formats': formats, 'is_live': True, } + + +class RTVETelevisionIE(InfoExtractor): + IE_NAME = 'rtve.es:television' + _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P\d+).shtml' + + _TEST = { + 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + 'info_dict': { + 'id': '3069778', + 'ext': 'mp4', + 'title': 'Documentos TV - La revolución del móvil', + 'duration': 3496.948, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + alacarta_url = self._search_regex( + r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + webpage, 'alacarta url', default=None) + if alacarta_url is None: + raise ExtractorError( + 'The webpage doesn\'t contain any video', expected=True) + + return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) From e910fe2fe439df9aa9cf8ca1253fe6db5839024a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 14 Jul 2016 14:13:57 +0100 Subject: [PATCH 19/25] [brightcove] skip ism manifests --- youtube_dl/extractor/brightcove.py | 11 +++++++---- youtube_dl/utils.py | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c172bad2d..aeb22be16 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -27,6 +27,7 @@ from ..utils import ( unsmuggle_url, update_url_query, clean_html, + mimetype2ext, ) @@ -545,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor): formats = [] for source in json_data.get('sources', []): container = source.get('container') - source_type = source.get('type') + ext = mimetype2ext(source.get('type')) src = source.get('src') - if source_type == 'application/x-mpegURL' or container == 'M2TS': + if ext == 'ism': + continue + elif ext == 'm3u8' or container == 'M2TS': if not src: continue formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif source_type == 'application/dash+xml': + elif ext == 'mpd': if not src: continue formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) @@ -568,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor): 'tbr': tbr, 'filesize': int_or_none(source.get('size')), 'container': container, - 'ext': container.lower(), + 'ext': ext or container.lower(), } if width == 0 and height == 0: f.update({ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4c1d0d526..e6e0155b4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2123,6 +2123,7 @@ def mimetype2ext(mt): 'dash+xml': 'mpd', 'f4m': 'f4m', 'f4m+xml': 'f4m', + 'vnd.ms-sstr+xml': 'ism', }.get(res, res) From 38e0f16a94b1790ed515fcd12c8bbcb58f2e4b53 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 14 Jul 2016 14:16:11 +0100 Subject: [PATCH 20/25] [ninenow] Add new extractor(closes #5181) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ninenow.py | 72 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/ninenow.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 71baee90d..45817d7df 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -537,6 +537,7 @@ from .nick import ( from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE +from .ninenow import NineNowIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py new file mode 100644 index 000000000..f54e74de3 --- /dev/null +++ b/youtube_dl/extractor/ninenow.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + float_or_none, + ExtractorError, +) + + +class NineNowIE(InfoExtractor): + IE_NAME = '9now.com.au' + _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P[^/?#]+)' + _TESTS = [{ + # clip + 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', + 'md5': '17cf47d63ec9323e562c9957a968b565', + 'info_dict': { + 'id': '16801', + 'ext': 'mp4', + 'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike', + 'description': 'Is a boycott of the NAB Cup "on the table"?', + 'uploader_id': '4460760524001', + 'upload_date': '20160713', + 'timestamp': 1468421266, + }, + 'skip': 'Only available in Australia', + }, { + # episode + 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19', + 'only_matching': True, + }, { + # DRM protected + 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + page_data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.*?});', webpage, + 'page data'), display_id) + common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + video_data = common_data['video'] + + if video_data.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + + brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId'] + video_id = compat_str(video_data.get('id') or brightcove_id) + title = common_data['name'] + + thumbnails = [{ + 'id': thumbnail_id, + 'url': thumbnail_url, + 'width': int_or_none(thumbnail_id[1:]) + } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()] + + return { + '_type': 'url_transparent', + 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'id': video_id, + 'title': title, + 'description': common_data.get('description'), + 'duration': float_or_none(video_data.get('duration'), 1000), + 'thumbnails': thumbnails, + 'ie_key': 'BrightcoveNew', + } From 342f0c3682885ad4c7d709686b2a38a466dd05a0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 14 Jul 2016 14:19:18 +0100 Subject: [PATCH 21/25] [ninenow] correct test url --- youtube_dl/extractor/ninenow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index f54e74de3..faa577237 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -33,7 +33,7 @@ class NineNowIE(InfoExtractor): 'only_matching': True, }, { # DRM protected - 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19', + 'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1', 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' From 7b0d333a7e7f13858cc0a24a89dc32b07a2a84f3 Mon Sep 17 00:00:00 2001 From: Nehal Patel Date: Thu, 7 Jul 2016 16:39:39 -0500 Subject: [PATCH 22/25] Fix unit tests for m3u8 and RTSP extractors that require ffmpeg or mplayer --- youtube_dl/extractor/cspan.py | 5 ++++- youtube_dl/extractor/dreisat.py | 6 +++++- youtube_dl/extractor/gamespot.py | 7 +++++-- youtube_dl/extractor/orf.py | 21 +++++++++++---------- youtube_dl/extractor/threeqsdn.py | 8 ++++++-- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 84b36f44c..7e5d4f227 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor): 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', 'info_dict': { 'id': 'judiciary031715', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', + }, + 'params': { + 'skip_download': True, # m3u8 downloads } }] diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 0040e70d4..908c9e514 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE): 'ext': 'mp4', 'title': 'Waidmannsheil', 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'uploader': '3sat', + 'uploader': 'SCHWEIZWEIT', + 'uploader_id': '100000210', 'upload_date': '20140913' + }, + 'params': { + 'skip_download': True, # m3u8 downloads } }, { diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 621257c9f..4e859e09a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -28,10 +28,13 @@ class GameSpotIE(OnceIE): 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', 'info_dict': { 'id': 'gs-2300-6424837', - 'ext': 'flv', - 'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing', + 'ext': 'mp4', + 'title': 'Now Playing - The Witcher 3: Wild Hunt', 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 4e3864f0d..42d4332e8 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -40,16 +40,17 @@ class ORFTVthekIE(InfoExtractor): 'skip': 'Blocked outside of Austria / Germany', }, { 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'playlist': [{ - 'md5': '68f543909aea49d621dfc7703a11cfaf', - 'info_dict': { - 'id': '7982259', - 'ext': 'mp4', - 'title': 'Best of Ingrid Thurnher', - 'upload_date': '20140527', - 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', - } - }], + 'md5': '68f543909aea49d621dfc7703a11cfaf', + 'info_dict': { + 'id': '7982259', + 'ext': 'mp4', + 'title': 'Best of Ingrid Thurnher', + 'upload_date': '20140527', + 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', + }, + 'params': { + 'skip_download': True, # rtsp downloads + }, '_skip': 'Blocked outside of Austria / Germany', }] diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index a0bc12c81..33588424d 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -31,9 +31,13 @@ class ThreeQSDNIE(InfoExtractor): 'info_dict': { 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', 'ext': 'mp4', - 'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', - 'is_live': False, + 'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + 'expected_warnings': ['Failed to download MPD manifest'], }, { # live audio stream 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', From 84571be64592e11ec25a4b073ac74153c261441d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Jul 2016 03:17:29 +0700 Subject: [PATCH 23/25] [orf:tvthek] Remove test md5 --- youtube_dl/extractor/orf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 42d4332e8..ccb23e069 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -40,7 +40,6 @@ class ORFTVthekIE(InfoExtractor): 'skip': 'Blocked outside of Austria / Germany', }, { 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'md5': '68f543909aea49d621dfc7703a11cfaf', 'info_dict': { 'id': '7982259', 'ext': 'mp4', From ad27649be307a55b04fa497d244ccca33b4260b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Jul 2016 03:36:50 +0700 Subject: [PATCH 24/25] [3qsdn] Restrict src JS regex --- youtube_dl/extractor/threeqsdn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index 33588424d..f26937da1 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -24,7 +24,7 @@ class ThreeQSDNIE(InfoExtractor): 'title': '0280d6b9-1215-11e6-b427-0cc47a188158', 'is_live': False, }, - 'expected_warnings': ['Failed to download MPD manifest'], + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'], }, { # live video stream 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', @@ -118,7 +118,7 @@ class ThreeQSDNIE(InfoExtractor): 'vcodec': 'none' if stream_type == 'audio' else None, }) - for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): + for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js): f = self._parse_json( item_js, video_id, transform_source=js_to_json, fatal=False) if not f: From 224db034abf7dff24898ae51cbe17fb1cc0d9e00 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 14 Jul 2016 23:59:12 +0100 Subject: [PATCH 25/25] [syfy] fix extraction(closes #9087)(closes #3820)(closes #2388) --- youtube_dl/extractor/syfy.py | 80 ++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 5ca079f88..53723b66e 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -1,46 +1,56 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor +from .theplatform import ThePlatformIE +from ..utils import ( + update_url_query, + smuggle_url, +) -class SyfyIE(InfoExtractor): - _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P[0-9]+)|(?!videos)(?P[^/]+)(?:$|[?#]))' - +class SyfyIE(ThePlatformIE): + _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)' _TESTS = [{ - 'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', + 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { - 'id': 'NmqMrGnXvmO1', - 'ext': 'flv', - 'title': 'George Lucas has Advice for his Daughter', - 'description': 'Listen to what insights George Lucas give his daughter Amanda.', + 'id': '2968097', + 'ext': 'mp4', + 'title': 'The Internet Ruined My Life: Season 1 Trailer', + 'description': 'One tweet, one post, one click, can destroy everything.', + 'uploader': 'NBCU-MPAT', + 'upload_date': '20170113', + 'timestamp': 1484345640, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.syfy.com/wilwheaton', - 'md5': '94dfa54ee3ccb63295b276da08c415f6', - 'info_dict': { - 'id': '4yoffOOXC767', - 'ext': 'flv', - 'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.', - 'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.', - }, - 'add_ie': ['ThePlatform'], - 'skip': 'Blocked outside the US', }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_name = mobj.group('video_name') - if video_name: - generic_webpage = self._download_webpage(url, video_name) - video_id = self._search_regex( - r'', - generic_webpage, 'video ID') - url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( - video_name, video_name, video_id) - else: - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage)) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + syfy_mpx = list(self._parse_json(self._search_regex( + r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + display_id)['syfy']['syfy_mpx'].values())[0] + video_id = syfy_mpx['mpxGUID'] + title = syfy_mpx['episodeTitle'] + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + if syfy_mpx.get('entitlement') == 'auth': + resource = 'syfy<![CDATA[%s]]>%s%s' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'syfy', resource) + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url(update_url_query( + self._proto_relative_url(syfy_mpx['releaseURL']), query), + {'force_smil_url': True}), + 'title': title, + 'id': video_id, + 'display_id': display_id, + }