From 7441915b1e53e2a26f4c78796c4755a36b9e1b8d Mon Sep 17 00:00:00 2001 From: Serkora Date: Thu, 8 Dec 2016 00:46:42 +0800 Subject: [PATCH 01/81] [pandoratv] Fix extraction (closes #11023) --- youtube_dl/extractor/pandoratv.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 2b07958bb..3e37ae01d 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -5,12 +5,14 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_urlparse, + compat_urllib_request, ) from ..utils import ( ExtractorError, float_or_none, parse_duration, str_to_int, + urlencode_postdata, ) @@ -56,6 +58,18 @@ class PandoraTVIE(InfoExtractor): r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) if not height: continue + + post_data = {'prgid': video_id, 'runtime': info.get('runtime'), 'vod_url': format_url} + play_url = self._download_json('http://m.pandora.tv/?c=api&m=play_url', video_id, + data=urlencode_postdata(post_data), + headers={ + 'Origin': url, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + format_url = play_url.get('url') + if not format_url: + continue + formats.append({ 'format_id': '%sp' % height, 'url': format_url, From f43795e56bc55b99e89c8fafee5613921cf1fffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 7 Dec 2016 23:50:10 +0700 Subject: [PATCH 02/81] [pandoratv] PEP 8 and simplify --- youtube_dl/extractor/pandoratv.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 3e37ae01d..cbb1968d3 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_urlparse, - compat_urllib_request, ) from ..utils import ( ExtractorError, @@ -59,13 +58,17 @@ class PandoraTVIE(InfoExtractor): if not height: continue - post_data = {'prgid': video_id, 'runtime': info.get('runtime'), 'vod_url': format_url} - play_url = self._download_json('http://m.pandora.tv/?c=api&m=play_url', video_id, - data=urlencode_postdata(post_data), + play_url = self._download_json( + 'http://m.pandora.tv/?c=api&m=play_url', video_id, + data=urlencode_postdata({ + 'prgid': video_id, + 'runtime': info.get('runtime'), + 'vod_url': format_url, + }), headers={ 'Origin': url, - 'Content-Type': 'application/x-www-form-urlencoded' - }) + 'Content-Type': 'application/x-www-form-urlencoded', + }) format_url = play_url.get('url') if not format_url: continue From 6c20a0bb99e626db6870747b6329ad9c9064c123 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 9 Dec 2016 02:15:16 +0800 Subject: [PATCH 03/81] [openload] Fix extraction (closes #10408) --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 90 +++++--------------------------- 2 files changed, 14 insertions(+), 77 deletions(-) diff --git a/ChangeLog b/ChangeLog index bf5f26943..9d7de1f95 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [openload] Fix extraction (#10408) + [thisoldhouse] Recognize /tv-episode/ URLs (#11271) version 2016.12.01 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 7f19b1ba5..84aa12585 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,21 +1,12 @@ # coding: utf-8 -from __future__ import unicode_literals, division - -import re +from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_ord, -) +from ..compat import compat_chr from ..utils import ( determine_ext, ExtractorError, ) -from ..jsinterp import ( - JSInterpreter, - _NAME_RE -) class OpenloadIE(InfoExtractor): @@ -62,44 +53,6 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - def openload_decode(self, txt): - symbol_dict = { - '(゚Д゚) [゚Θ゚]': '_', - '(゚Д゚) [゚ω゚ノ]': 'a', - '(゚Д゚) [゚Θ゚ノ]': 'b', - '(゚Д゚) [\'c\']': 'c', - '(゚Д゚) [゚ー゚ノ]': 'd', - '(゚Д゚) [゚Д゚ノ]': 'e', - '(゚Д゚) [1]': 'f', - '(゚Д゚) [\'o\']': 'o', - '(o゚ー゚o)': 'u', - '(゚Д゚) [\'c\']': 'c', - '((゚ー゚) + (o^_^o))': '7', - '((o^_^o) +(o^_^o) +(c^_^o))': '6', - '((゚ー゚) + (゚Θ゚))': '5', - '(-~3)': '4', - '(-~-~1)': '3', - '(-~1)': '2', - '(-~0)': '1', - '((c^_^o)-(c^_^o))': '0', - } - delim = '(゚Д゚)[゚ε゚]+' - end_token = '(゚Д゚)[゚o゚]' - symbols = '|'.join(map(re.escape, symbol_dict.keys())) - txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt) - ret = '' - for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt): - for aachar in aacode.split(delim): - if aachar.isdigit(): - ret += compat_chr(int(aachar, 8)) - else: - m = re.match(r'^u([\da-f]{4})$', aachar) - if m: - ret += compat_chr(int(m.group(1), 16)) - else: - self.report_warning("Cannot decode: %s" % aachar) - return ret - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) @@ -107,36 +60,20 @@ class OpenloadIE(InfoExtractor): if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True) - # The following decryption algorithm is written by @yokrysty and - # declared to be freely used in youtube-dl - # See https://github.com/rg3/youtube-dl/issues/10408 - enc_data = self._html_search_regex( - r']*>([^<]+)\s*]*>[^<]+\s*]+id="streamurl"', - webpage, 'encrypted data') + ol_id = self._search_regex( + ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + webpage, 'openload ID') - enc_code = self._html_search_regex(r']+>(゚ω゚[^<]+)', - webpage, 'encrypted code') + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = '' + num = 2 - js_code = self.openload_decode(enc_code) - jsi = JSInterpreter(js_code) + while num < len(ol_id): + urlcode += compat_chr(int(float(ol_id[num:][:3])) - + first_two_chars * int(float(ol_id[num + 3:][:2]))) + num += 5 - m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') - m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function') - - offset = jsi.call_function(m_offset_fun) - diff = jsi.call_function(m_diff_fun) - - video_url_chars = [] - - for idx, c in enumerate(enc_data): - j = compat_ord(c) - if j >= 33 and j <= 126: - j = ((j + 14) % 94) + 33 - if idx == len(enc_data) - offset: - j += diff - video_url_chars += compat_chr(j) - - video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) + video_url = 'https://openload.co/stream/' + urlcode title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, @@ -155,5 +92,4 @@ class OpenloadIE(InfoExtractor): 'ext': determine_ext(title), 'subtitles': subtitles, } - return info_dict From 9ed3495eaeefdbeec5b72bd0a6575c56bc6c01c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Dec 2016 02:41:49 +0700 Subject: [PATCH 04/81] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9d7de1f95..f1d76dcd4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,18 @@ version +Core +* [socks] Fix error reporting (#11355) + Extractors * [openload] Fix extraction (#10408) +* [pandoratv] Fix extraction (#11023) ++ [telebruxelles] Add support for emission URLs +* [telebruxelles] Extract all formats ++ [bloomberg] Add another video id regular expression (#11371) +* [fusion] Update ooyala id regular expression (#11364) ++ [1tv] Add support for playlists (#11335) +* [1tv] Improve extraction (#11335) ++ [aenetworks] Extract more formats (#11321) + [thisoldhouse] Recognize /tv-episode/ URLs (#11271) version 2016.12.01 From 18ece70c4df2a4de5c7582905aa007d1237008a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Dec 2016 02:46:18 +0700 Subject: [PATCH 05/81] release 2016.12.09 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 36559dd7b..49ae3afb6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.09** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.01 +[debug] youtube-dl version 2016.12.09 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index f1d76dcd4..f906cad2b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.09 Core * [socks] Fix error reporting (#11355) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1acb630af..a8e299802 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.01' +__version__ = '2016.12.09' From 68601ef3acde6e0f78c3b4014a11f36383db8770 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Dec 2016 10:47:19 +0100 Subject: [PATCH 06/81] [rts,srgssr] improve extraction for geo restricted videos(fixes #11089)(closes #4989) --- youtube_dl/extractor/rts.py | 153 +++++++++++++++++---------------- youtube_dl/extractor/srgssr.py | 33 ++++--- 2 files changed, 100 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index 3cc32847b..ae012ab98 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -4,27 +4,24 @@ from __future__ import unicode_literals import re from .srgssr import SRGSSRIE -from ..compat import ( - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, - xpath_text, + determine_ext, ) class RTSIE(SRGSSRIE): IE_DESC = 'RTS.ch' - _VALID_URL = r'rts:(?P\d+)|https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html' + _VALID_URL = r'rts:(?P\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html' _TESTS = [ { 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', - 'md5': 'f254c4b26fb1d3c183793d52bc40d3e7', + 'md5': 'ff7f8450a90cf58dacb64e29707b4a8e', 'info_dict': { 'id': '3449373', 'display_id': 'les-enfants-terribles', @@ -38,35 +35,17 @@ class RTSIE(SRGSSRIE): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - } }, { 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', - 'md5': 'f1077ac5af686c76528dc8d7c5df29ba', 'info_dict': { - 'id': '5742494', - 'display_id': '5742494', - 'ext': 'mp4', - 'duration': 3720, - 'title': 'Les yeux dans les cieux - Mon homard au Canada', - 'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7', - 'uploader': 'Passe-moi les jumelles', - 'upload_date': '20140404', - 'timestamp': 1396635300, - 'thumbnail': 're:^https?://.*\.image', - 'view_count': int, + 'id': '5624065', + 'title': 'Passe-moi les jumelles', }, - 'params': { - # m3u8 download - 'skip_download': True, - } + 'playlist_mincount': 4, }, { 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', - 'md5': 'b4326fecd3eb64a458ba73c73e91299d', 'info_dict': { 'id': '5745975', 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', @@ -80,11 +59,15 @@ class RTSIE(SRGSSRIE): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'skip': 'Blocked outside Switzerland', }, { 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', - 'md5': '9f713382f15322181bb366cc8c3a4ff0', + 'md5': '1bae984fe7b1f78e94abc74e802ed99f', 'info_dict': { 'id': '5745356', 'display_id': 'londres-cachee-par-un-epais-smog', @@ -92,16 +75,12 @@ class RTSIE(SRGSSRIE): 'duration': 33, 'title': 'Londres cachée par un épais smog', 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', - 'uploader': 'Le Journal en continu', + 'uploader': 'L\'actu en vidéo', 'upload_date': '20140403', 'timestamp': 1396537322, 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - } }, { 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', @@ -125,6 +104,10 @@ class RTSIE(SRGSSRIE): 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', }, 'playlist_mincount': 5, + }, + { + 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', + 'only_matching': True, } ] @@ -142,19 +125,32 @@ class RTSIE(SRGSSRIE): # media_id extracted out of URL is not always a real id if 'video' not in all_info and 'audio' not in all_info: - page = self._download_webpage(url, display_id) + entries = [] - # article with videos on rhs - videos = re.findall( - r']+class="content-item"[^>]*>\s*]+data-video-urn="urn:([^"]+)"', - page) - if not videos: + for item in all_info.get('items', []): + item_url = item.get('url') + if not item_url: + continue + entries.append(self.url_result(item_url, 'RTS')) + + if not entries: + page, urlh = self._download_webpage_handle(url, display_id) + if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: + return self.url_result(urlh.geturl(), 'RTS') + + # article with videos on rhs videos = re.findall( - r'(?s)]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', + r']+class="content-item"[^>]*>\s*]+data-video-urn="urn:([^"]+)"', page) - if videos: - entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] - return self.playlist_result(entries, media_id, self._og_search_title(page)) + if not videos: + videos = re.findall( + r'(?s)]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', + page) + if videos: + entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + + if entries: + return self.playlist_result(entries, media_id, all_info.get('title')) internal_id = self._html_search_regex( r'<(?:video|audio) data-id="([0-9]+)"', page, @@ -168,36 +164,29 @@ class RTSIE(SRGSSRIE): info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] - upload_timestamp = parse_iso8601(info.get('broadcast_date')) - duration = info.get('duration') or info.get('cutout') or info.get('cutduration') - if isinstance(duration, compat_str): - duration = parse_duration(duration) - view_count = info.get('plays') - thumbnail = unescapeHTML(info.get('preview_image_url')) + title = info['title'] def extract_bitrate(url): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) formats = [] - for format_id, format_url in info['streams'].items(): - if format_id == 'hds_sd' and 'hds' in info['streams']: + streams = info.get('streams', {}) + for format_id, format_url in streams.items(): + if format_id == 'hds_sd' and 'hds' in streams: continue - if format_id == 'hls_sd' and 'hls' in info['streams']: + if format_id == 'hls_sd' and 'hls' in streams: continue - if format_url.endswith('.f4m'): - token = self._download_xml( - 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path, - media_id, 'Downloading %s token' % format_id) - auth_params = xpath_text(token, './/authparams', 'auth params') - if not auth_params: - continue - formats.extend(self._extract_f4m_formats( - '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), - media_id, f4m_id=format_id, fatal=False)) - elif format_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats( - format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + ext = determine_ext(format_url) + if ext in ('m3u8', 'f4m'): + format_url = self._get_tokenized_src(format_url, media_id, format_id) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, @@ -205,25 +194,37 @@ class RTSIE(SRGSSRIE): 'tbr': extract_bitrate(format_url), }) - if 'media' in info: - formats.extend([{ - 'format_id': '%s-%sk' % (media['ext'], media['rate']), - 'url': 'http://download-video.rts.ch/%s' % media['url'], - 'tbr': media['rate'] or extract_bitrate(media['url']), - } for media in info['media'] if media.get('rate')]) + for media in info.get('media', []): + media_url = media.get('url') + if not media_url or re.match(r'https?://', media_url): + continue + rate = media.get('rate') + ext = media.get('ext') or determine_ext(media_url, 'mp4') + format_id = ext + if rate: + format_id += '-%dk' % rate + formats.append({ + 'format_id': format_id, + 'url': 'http://download-video.rts.ch/' + media_url, + 'tbr': rate or extract_bitrate(media_url), + }) self._check_formats(formats, media_id) self._sort_formats(formats) + duration = info.get('duration') or info.get('cutout') or info.get('cutduration') + if isinstance(duration, compat_str): + duration = parse_duration(duration) + return { 'id': media_id, 'display_id': display_id, 'formats': formats, - 'title': info['title'], + 'title': title, 'description': info.get('intro'), 'duration': duration, - 'view_count': view_count, + 'view_count': int_or_none(info.get('plays')), 'uploader': info.get('programName'), - 'timestamp': upload_timestamp, - 'thumbnail': thumbnail, + 'timestamp': parse_iso8601(info.get('broadcast_date')), + 'thumbnail': unescapeHTML(info.get('preview_image_url')), } diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 246970c4d..847d3c08f 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, parse_iso8601, @@ -23,6 +24,16 @@ class SRGSSRIE(InfoExtractor): 'STARTDATE': 'This video is not yet available. Please try again later.', } + def _get_tokenized_src(self, url, video_id, format_id): + sp = compat_urllib_parse_urlparse(url).path.split('/') + token = self._download_json( + 'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]), + video_id, 'Downloading %s token' % format_id, fatal=False) or {} + auth_params = token.get('token', {}).get('authparams') + if auth_params: + url += '?' + auth_params + return url + def get_media_data(self, bu, media_type, media_id): media_data = self._download_json( 'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id), @@ -61,14 +72,16 @@ class SRGSSRIE(InfoExtractor): asset_url = asset['text'] quality = asset['@quality'] format_id = '%s-%s' % (protocol, quality) - if protocol == 'HTTP-HDS': - formats.extend(self._extract_f4m_formats( - asset_url + '?hdcore=3.4.0', media_id, - f4m_id=format_id, fatal=False)) - elif protocol == 'HTTP-HLS': - formats.extend(self._extract_m3u8_formats( - asset_url, media_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False)) + if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'): + asset_url = self._get_tokenized_src(asset_url, media_id, format_id) + if protocol.startswith('HTTP-HDS'): + formats.extend(self._extract_f4m_formats( + asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + elif protocol.startswith('HTTP-HLS'): + formats.extend(self._extract_m3u8_formats( + asset_url, media_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, @@ -94,10 +107,10 @@ class SRGSSRPlayIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', - 'md5': '4cd93523723beff51bb4bee974ee238d', + 'md5': 'da6b5b3ac9fa4761a942331cef20fcb3', 'info_dict': { 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', - 'ext': 'm4v', + 'ext': 'mp4', 'upload_date': '20130701', 'title': 'Snowden beantragt Asyl in Russland', 'timestamp': 1372713995, From f41db4059682ec4a1c2fdcffab494adad71ccf9b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Dec 2016 13:29:51 +0100 Subject: [PATCH 07/81] [prosiebensat1] extract dash formats --- youtube_dl/extractor/prosiebensat1.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7cc07a2ad..30478f979 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -85,6 +85,9 @@ class ProSiebenSat1BaseIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( source_url, clip_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + elif mimetype == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id='dash', fatal=False)) else: tbr = fix_bitrate(source['bitrate']) if protocol in ('rtmp', 'rtmpe'): From 0d7d9f94045868d22493d4932d124170d26511fe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Dec 2016 16:34:01 +0100 Subject: [PATCH 08/81] [rte] improve extraction(closes #10498)(closes #7746) --- youtube_dl/extractor/rte.py | 181 ++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 102 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index ebe563ebb..e09670da2 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -4,118 +4,31 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( float_or_none, parse_iso8601, unescapeHTML, + ExtractorError, ) -class RteIE(InfoExtractor): - IE_NAME = 'rte' - IE_DESC = 'Raidió Teilifís Éireann TV' - _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', - 'info_dict': { - 'id': '10478715', - 'ext': 'flv', - 'title': 'Watch iWitness online', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.', - 'duration': 60.046, - }, - 'params': { - 'skip_download': 'f4m fails with --test atm' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage) - description = self._html_search_meta('description', webpage, 'description') - duration = float_or_none(self._html_search_meta( - 'duration', webpage, 'duration', fatal=False), 1000) - - thumbnail = None - thumbnail_meta = self._html_search_meta('thumbnail', webpage) - if thumbnail_meta: - thumbnail_id = self._search_regex( - r'uri:irus:(.+)', thumbnail_meta, - 'thumbnail id', fatal=False) - if thumbnail_id: - thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id - - feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id - json_string = self._download_json(feeds_url, video_id) - - # f4m_url = server + relative_url - f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] - f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - self._sort_formats(f4m_formats) - - return { - 'id': video_id, - 'title': title, - 'formats': f4m_formats, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - } - - -class RteRadioIE(InfoExtractor): - IE_NAME = 'rte:radio' - IE_DESC = 'Raidió Teilifís Éireann radio' - # Radioplayer URLs have two distinct specifier formats, - # the old format #!rii=:::: - # the new format #!rii=b____ - # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. - # An uniquely defines an individual recording, and is the only part we require. - _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P[0-9]+)' - - _TESTS = [{ - # Old-style player URL; HLS and RTMPE formats - 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', - 'info_dict': { - 'id': '10507902', - 'ext': 'mp4', - 'title': 'Gloria', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', - 'timestamp': 1451203200, - 'upload_date': '20151227', - 'duration': 7230.0, - }, - 'params': { - 'skip_download': 'f4m fails with --test atm' - } - }, { - # New-style player URL; RTMPE formats only - 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', - 'info_dict': { - 'id': '3250678', - 'ext': 'flv', - 'title': 'The Lyric Concert with Paul Herriott', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': '', - 'timestamp': 1333742400, - 'upload_date': '20120406', - 'duration': 7199.016, - }, - 'params': { - 'skip_download': 'f4m fails with --test atm' - } - }] - +class RteBaseIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) - json_string = self._download_json( - 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id, - item_id) + try: + json_string = self._download_json( + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id, + item_id) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise # NB the string values in the JSON are stored using XML escaping(!) show = json_string['shows'][0] @@ -163,3 +76,67 @@ class RteRadioIE(InfoExtractor): 'duration': duration, 'formats': formats, } + + +class RteIE(RteBaseIE): + IE_NAME = 'rte' + IE_DESC = 'Raidió Teilifís Éireann TV' + _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', + 'md5': '4a76eb3396d98f697e6e8110563d2604', + 'info_dict': { + 'id': '10478715', + 'ext': 'mp4', + 'title': 'iWitness', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'The spirit of Ireland, one voice and one minute at a time.', + 'duration': 60.046, + 'upload_date': '20151012', + 'timestamp': 1444694160, + }, + } + + +class RteRadioIE(RteBaseIE): + IE_NAME = 'rte:radio' + IE_DESC = 'Raidió Teilifís Éireann radio' + # Radioplayer URLs have two distinct specifier formats, + # the old format #!rii=:::: + # the new format #!rii=b____ + # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. + # An uniquely defines an individual recording, and is the only part we require. + _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P[0-9]+)' + + _TESTS = [{ + # Old-style player URL; HLS and RTMPE formats + 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', + 'md5': 'c79ccb2c195998440065456b69760411', + 'info_dict': { + 'id': '10507902', + 'ext': 'mp4', + 'title': 'Gloria', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', + 'timestamp': 1451203200, + 'upload_date': '20151227', + 'duration': 7230.0, + }, + }, { + # New-style player URL; RTMPE formats only + 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', + 'info_dict': { + 'id': '3250678', + 'ext': 'flv', + 'title': 'The Lyric Concert with Paul Herriott', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': '', + 'timestamp': 1333742400, + 'upload_date': '20120406', + 'duration': 7199.016, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }] From 8821a718cfeca740d42d109411645427d4f8b523 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Dec 2016 17:17:13 +0100 Subject: [PATCH 09/81] [common] recognize hls manifests that contain video only formats(#11394) --- youtube_dl/extractor/cbc.py | 5 ----- youtube_dl/extractor/common.py | 13 ++++++++++--- youtube_dl/extractor/msn.py | 5 ----- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index d71fddf58..7c76ceac8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -283,11 +283,6 @@ class CBCWatchVideoIE(CBCWatchBaseIE): formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) if len(formats) < 2: formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - # Despite metadata in m3u8 all video+audio formats are - # actually video-only (no audio) - for f in formats: - if f.get('acodec') != 'none' and f.get('vcodec') != 'none': - f['acodec'] = 'none' self._sort_formats(formats) info = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 05c51fac9..6ae946569 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1224,6 +1224,7 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] + audio_groups = set() last_info = {} last_media = {} for line in m3u8_doc.splitlines(): @@ -1239,15 +1240,18 @@ class InfoExtractor(object): for v in (media.get('GROUP-ID'), media.get('NAME')): if v: format_id.append(v) - formats.append({ + f = { 'format_id': '-'.join(format_id), 'url': format_url(media_url), 'language': media.get('LANGUAGE'), - 'vcodec': 'none' if media_type == 'AUDIO' else None, 'ext': ext, 'protocol': entry_protocol, 'preference': preference, - }) + } + if media_type == 'AUDIO': + f['vcodec'] = 'none' + audio_groups.add(media['GROUP-ID']) + formats.append(f) else: # When there is no URI in EXT-X-MEDIA let this tag's # data be used by regular URI lines below @@ -1295,6 +1299,9 @@ class InfoExtractor(object): 'abr': abr, }) f.update(parse_codecs(last_info.get('CODECS'))) + if last_info.get('AUDIO') in audio_groups: + # TODO: update acodec for for audio only formats with the same GROUP-ID + f['acodec'] = 'none' formats.append(f) last_info = {} last_media = {} diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index d75ce8b3b..1473bcf48 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -78,11 +78,6 @@ class MSNIE(InfoExtractor): m3u8_formats = self._extract_m3u8_formats( format_url, display_id, 'mp4', m3u8_id='hls', fatal=False) - # Despite metadata in m3u8 all video+audio formats are - # actually video-only (no audio) - for f in m3u8_formats: - if f.get('acodec') != 'none' and f.get('vcodec') != 'none': - f['acodec'] = 'none' formats.extend(m3u8_formats) else: formats.append({ From f0b69fa91a00a1efe00e75f8ffaef3d27656f6dc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Dec 2016 17:36:32 +0100 Subject: [PATCH 10/81] [ctvnews] relax _VALID_URL regex(closes #11394) --- youtube_dl/extractor/ctvnews.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py index 1023b6130..55a127b76 100644 --- a/youtube_dl/extractor/ctvnews.py +++ b/youtube_dl/extractor/ctvnews.py @@ -8,7 +8,7 @@ from ..utils import orderedSet class CTVNewsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' + _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' _TESTS = [{ 'url': 'http://www.ctvnews.ca/video?clipId=901995', 'md5': '10deb320dc0ccb8d01d34d12fc2ea672', @@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor): }, { 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', 'only_matching': True, + }, { + 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241', + 'only_matching': True, }] def _real_extract(self, url): From 655cb545ab2a037465e18bcb88b54a4375fe2f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Dec 2016 23:45:36 +0700 Subject: [PATCH 11/81] [mixcloud] Relax _VALID_URL (closes #11406) --- youtube_dl/extractor/mixcloud.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 560fe188b..202c05dcb 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -22,7 +22,7 @@ from ..utils import ( class MixcloudIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' + _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' _TESTS = [{ @@ -51,6 +51,9 @@ class MixcloudIE(InfoExtractor): 'view_count': int, 'like_count': int, }, + }, { + 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', + 'only_matching': True, }] # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js From 6ca478d44a59b6ed2567d26c2f857060cd41e3df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Dec 2016 00:45:27 +0700 Subject: [PATCH 12/81] [canalplus] Add another video id regex (closes #11399) --- youtube_dl/extractor/canalplus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1c3c41d26..10cf165bc 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -105,7 +105,8 @@ class CanalplusIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._search_regex( [r']+?videoId=(["\'])(?P\d+)', - r'id=["\']canal_video_player(?P\d+)'], + r'id=["\']canal_video_player(?P\d+)', + r'data-video=["\'](?P\d+)'], webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) From 19b4900b7b33047e439ae9c960f1a8516ecf3ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Dec 2016 01:22:01 +0700 Subject: [PATCH 13/81] [facebook] Improve video selection (closes #11390) --- youtube_dl/extractor/facebook.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b4d38e5c2..4de21baeb 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -244,8 +244,10 @@ class FacebookIE(InfoExtractor): r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': - video_data = item[2][0]['videoData'] - break + video_item = item[2][0] + if video_item.get('video_id') == video_id: + video_data = video_item['videoData'] + break if not video_data: if not fatal_if_no_video: From d2d2495e1669e880cd2eb56fa5c9a92f63c5476b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Dec 2016 01:40:30 +0700 Subject: [PATCH 14/81] [facebook] Detect login required error message --- youtube_dl/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4de21baeb..56a8582b4 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -257,6 +257,8 @@ class FacebookIE(InfoExtractor): raise ExtractorError( 'The video is not available, Facebook said: "%s"' % m_msg.group(1), expected=True) + elif '>You must log in to continue' in webpage: + self.raise_login_required() else: raise ExtractorError('Cannot parse data') From fb37eb25d9494d71ef9321154b4be759d4d64dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Dec 2016 00:49:07 +0700 Subject: [PATCH 15/81] [utils] Add common user agents map --- youtube_dl/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9595bcf9f..3d4951ad9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -86,6 +86,11 @@ std_headers = { } +USER_AGENTS = { + 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', +} + + NO_DEFAULT = object() ENGLISH_MONTH_NAMES = [ From 3530e0d3d922c6e7289f0586c1e2febd2953b680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Dec 2016 00:58:08 +0700 Subject: [PATCH 16/81] [dplay] Use Safari user-agent for hls (closes #11418) --- youtube_dl/extractor/dplay.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 5790553f3..32028bc3b 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -8,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + USER_AGENTS, int_or_none, update_url_query, ) @@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor): manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False) # Sometimes final URLs inside m3u8 are unsigned, let's fix this - # ourselves + # ourselves. Also fragments' URLs are only served signed for + # Safari user agent. query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query) for m3u8_format in m3u8_formats: - m3u8_format['url'] = update_url_query(m3u8_format['url'], query) + m3u8_format.update({ + 'url': update_url_query(m3u8_format['url'], query), + 'http_headers': { + 'User-Agent': USER_AGENTS['Safari'], + }, + }) formats.extend(m3u8_formats) elif protocol == 'hds': formats.extend(self._extract_f4m_formats( From 62faf9b55eca4bf5b1caec384abcdba796521fa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Dec 2016 01:41:08 +0700 Subject: [PATCH 17/81] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index f906cad2b..8b69dfca9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +version + +Core ++ [utils] Add common user agents map ++ [common] Recognize HLS manifests that contain video only formats (#11394) + +Extractors ++ [dplay] Use Safari user agent for HLS (#11418) ++ [facebook] Detect login required error message +* [facebook] Improve video selection (#11390) ++ [canalplus] Add another video id pattern (#11399) +* [mixcloud] Relax URL regular expression (#11406) +* [ctvnews] Relax URL regular expression (#11394) ++ [rte] Capture and output error message (#7746, #10498) ++ [prosiebensat1] Add support for DASH formats +* [srgssr] Improve extraction for geo restricted videos (#11089) +* [rts] Improve extraction for geo restricted videos (#4989) + + version 2016.12.09 Core @@ -15,6 +34,7 @@ Extractors + [aenetworks] Extract more formats (#11321) + [thisoldhouse] Recognize /tv-episode/ URLs (#11271) + version 2016.12.01 Extractors From 3c1e9dc4ecb76929b8d9f001ea6593269b3667b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Dec 2016 01:44:50 +0700 Subject: [PATCH 18/81] release 2016.12.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 49ae3afb6..24c11302b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.09 +[debug] youtube-dl version 2016.12.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8b69dfca9..3909e350a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.12 Core + [utils] Add common user agents map diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8e299802..55ccaf66a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.09' +__version__ = '2016.12.12' From abf3494ac7b78498d8f95c92548e857b7673c7de Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 12 Dec 2016 14:43:33 +0800 Subject: [PATCH 19/81] [melonvod] Add extractor for vod.melon.com --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/melonvod.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/melonvod.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 46d007b7d..2801a380c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -498,6 +498,7 @@ from .mangomolo import ( ) from .matchtv import MatchTVIE from .mdr import MDRIE +from .melonvod import MelonVODIE from .meta import METAIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dl/extractor/melonvod.py new file mode 100644 index 000000000..9b00ce8e3 --- /dev/null +++ b/youtube_dl/extractor/melonvod.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class MelonVODIE(InfoExtractor): + _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?.*mvId=(?P[0-9]+)' + _TEST = { + 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734', + 'md5': '461fc04c6d23cbf49f4fef4d61851d32', + 'info_dict': { + 'id': '50158734', + 'ext': 'mp4', + 'title': 'Jessica \'Wonderland\' MV Making Film', + 'thumbnail': 're:^https?://.*\.jpg$', + 'artist': 'Jessica (제시카)', + 'upload_date': '20161212', + 'duration': 203, + }, + 'params': { + 'skip_download': 'm3u8 download', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + play_info = self._download_json( + 'http://vod.melon.com/video/playerInfo.json', video_id, + note='Downloading playerInfo', query={'mvId': video_id} + ) + title = play_info['mvInfo']['MVTITLE'] + artist = ', '.join([artist['ARTISTNAMEWEBLIST'] for artist in play_info.get('artistList', [])]) + + info = self._download_json( + 'http://vod.melon.com/delivery/streamingInfo.json', video_id, + note='Downloading streamingInfo', + query={'contsId': video_id, 'contsType': 'VIDEO'} + ) + stream_info = info.get('streamingInfo', {}) + m3u8_url = stream_info.get('encUrl') + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + thumbnail = info.get('staticDomain', '') + stream_info.get('imgPath', '') + duration = int_or_none(stream_info.get('playTime')) + upload_date = stream_info.get('mvSvcOpenDt', '')[:8] + + return { + 'id': video_id, + 'title': title, + 'artist': artist, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats + } From e34c33614d8e4f0208d96d71e9c0ac6571587555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Dec 2016 02:23:49 +0700 Subject: [PATCH 20/81] [utils] Add convenience urljoin --- test/test_utils.py | 14 ++++++++++++++ youtube_dl/utils.py | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 2e3cd0179..3f45b0bd1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -70,6 +70,7 @@ from youtube_dl.utils import ( lowercase_escape, url_basename, base_url, + urljoin, urlencode_postdata, urshift, update_url_query, @@ -445,6 +446,19 @@ class TestUtil(unittest.TestCase): self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_urljoin(self): + self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', None), None) + self.assertEqual(urljoin('http://foo.de/', ''), None) + self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3d4951ad9..694e9a600 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1700,6 +1700,16 @@ def base_url(url): return re.match(r'https?://[^?#&]+/', url).group() +def urljoin(base, path): + if not isinstance(path, compat_str) or not path: + return None + if re.match(r'https?://', path): + return path + if not isinstance(base, compat_str) or not re.match(r'https?://', base): + return None + return compat_urlparse.urljoin(base, path) + + class HEADRequest(compat_urllib_request.Request): def get_method(self): return 'HEAD' From 3a40f859b5e76b4fa27b9e72688400349972e82f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Dec 2016 02:25:42 +0700 Subject: [PATCH 21/81] [melonvod] Improve (closes #11419) --- youtube_dl/extractor/melonvod.py | 43 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dl/extractor/melonvod.py index 9b00ce8e3..2c80b3ba8 100644 --- a/youtube_dl/extractor/melonvod.py +++ b/youtube_dl/extractor/melonvod.py @@ -2,18 +2,20 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + urljoin, +) class MelonVODIE(InfoExtractor): - _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?.*mvId=(?P[0-9]+)' + _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P[0-9]+)' _TEST = { 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734', - 'md5': '461fc04c6d23cbf49f4fef4d61851d32', 'info_dict': { 'id': '50158734', 'ext': 'mp4', - 'title': 'Jessica \'Wonderland\' MV Making Film', + 'title': "Jessica 'Wonderland' MV Making Film", 'thumbnail': 're:^https?://.*\.jpg$', 'artist': 'Jessica (제시카)', 'upload_date': '20161212', @@ -29,24 +31,35 @@ class MelonVODIE(InfoExtractor): play_info = self._download_json( 'http://vod.melon.com/video/playerInfo.json', video_id, - note='Downloading playerInfo', query={'mvId': video_id} - ) + note='Downloading player info JSON', query={'mvId': video_id}) + title = play_info['mvInfo']['MVTITLE'] - artist = ', '.join([artist['ARTISTNAMEWEBLIST'] for artist in play_info.get('artistList', [])]) info = self._download_json( 'http://vod.melon.com/delivery/streamingInfo.json', video_id, - note='Downloading streamingInfo', - query={'contsId': video_id, 'contsType': 'VIDEO'} - ) - stream_info = info.get('streamingInfo', {}) - m3u8_url = stream_info.get('encUrl') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') + note='Downloading streaming info JSON', + query={ + 'contsId': video_id, + 'contsType': 'VIDEO', + }) + + stream_info = info['streamingInfo'] + + formats = self._extract_m3u8_formats( + stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls') self._sort_formats(formats) - thumbnail = info.get('staticDomain', '') + stream_info.get('imgPath', '') + artist_list = play_info.get('artistList') + artist = None + if isinstance(artist_list, list): + artist = ', '.join( + [a['ARTISTNAMEWEBLIST'] + for a in artist_list if a.get('ARTISTNAMEWEBLIST')]) + + thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath')) + duration = int_or_none(stream_info.get('playTime')) - upload_date = stream_info.get('mvSvcOpenDt', '')[:8] + upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None return { 'id': video_id, From 67dcbc0add399d5dc0f8ffe82723b03451782c97 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 13 Dec 2016 17:59:22 +0100 Subject: [PATCH 22/81] [canvas] extract dash formats --- youtube_dl/extractor/canvas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index d183d5d52..2cc539a6c 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor): elif format_type == 'HDS': formats.extend(self._extract_f4m_formats( format_url, display_id, f4m_id=format_type, fatal=False)) + elif format_type == 'MPEG_DASH': + formats.extend(self._extract_mpd_formats( + format_url, display_id, mpd_id=format_type, fatal=False)) else: formats.append({ 'format_id': format_type, From 577748075bf5bb2f3fcf4f6b8dbb54ae4e6af585 Mon Sep 17 00:00:00 2001 From: Corey Nicholson Date: Mon, 5 Dec 2016 21:16:12 +0000 Subject: [PATCH 23/81] [vlive] Update extraction --- youtube_dl/extractor/vlive.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index acf9fda48..1a9f10eb4 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -48,17 +48,16 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - video_params = self._search_regex( - r'\bvlive\.video\.init\(([^)]+)\)', - webpage, 'video params') - status, _, _, live_params, long_video_id, key = re.split( - r'"\s*,\s*"', video_params)[2:8] + video_params = self._parse_json(self._search_regex( + r'\bvlive\.video\.init\(([^)]+)\);', + webpage, 'video params'), video_id, + transform_source=lambda s: '[' + s + ']') + + status, long_video_id, key = video_params[2], video_params[5], video_params[6] status = remove_start(status, 'PRODUCT_') if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': - live_params = self._parse_json('"%s"' % live_params, video_id) - live_params = self._parse_json(live_params, video_id) - return self._live(video_id, webpage, live_params) + return self._live(video_id, webpage) elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': if long_video_id and key: return self._replay(video_id, webpage, long_video_id, key) @@ -89,7 +88,20 @@ class VLiveIE(InfoExtractor): 'thumbnail': thumbnail, } - def _live(self, video_id, webpage, live_params): + def _live(self, video_id, webpage): + init_page = self._download_webpage( + 'http://www.vlive.tv/video/init/view', + video_id, data="videoSeq=%s" % video_id, headers={ + 'Referer': 'http://www.vlive.tv/video/%s' % video_id, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + + live_params = self._search_regex( + r'"liveStreamInfo"\s*:\s*(".*"),', + init_page, 'video params') + live_params = self._parse_json(live_params, video_id) + live_params = self._parse_json(live_params, video_id) + formats = [] for vid in live_params.get('resolutions', []): formats.extend(self._extract_m3u8_formats( From 89c63cc5f88d4ec0fbeecbec43d5bc12b8144049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Dec 2016 21:05:50 +0700 Subject: [PATCH 24/81] [vlive] Add video params extraction fallback and improve (closes #11375) --- youtube_dl/extractor/vlive.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 1a9f10eb4..a6e60c88a 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -10,6 +10,7 @@ from ..utils import ( float_or_none, int_or_none, remove_start, + urlencode_postdata, ) from ..compat import compat_urllib_parse_urlencode @@ -48,12 +49,19 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - video_params = self._parse_json(self._search_regex( - r'\bvlive\.video\.init\(([^)]+)\);', - webpage, 'video params'), video_id, - transform_source=lambda s: '[' + s + ']') + VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' + VIDEO_PARAMS_FIELD = 'video params' - status, long_video_id, key = video_params[2], video_params[5], video_params[6] + params = self._parse_json(self._search_regex( + VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, + transform_source=lambda s: '[' + s + ']', fatal=False) + + if not params or len(params) < 7: + params = self._search_regex( + VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) + params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] + + status, long_video_id, key = params[2], params[5], params[6] status = remove_start(status, 'PRODUCT_') if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': @@ -91,14 +99,16 @@ class VLiveIE(InfoExtractor): def _live(self, video_id, webpage): init_page = self._download_webpage( 'http://www.vlive.tv/video/init/view', - video_id, data="videoSeq=%s" % video_id, headers={ + video_id, note='Downloading live webpage', + data=urlencode_postdata({'videoSeq': video_id}), + headers={ 'Referer': 'http://www.vlive.tv/video/%s' % video_id, 'Content-Type': 'application/x-www-form-urlencoded' }) live_params = self._search_regex( r'"liveStreamInfo"\s*:\s*(".*"),', - init_page, 'video params') + init_page, 'live stream info') live_params = self._parse_json(live_params, video_id) live_params = self._parse_json(live_params, video_id) From 069f9183025597d7ef5fe152b261e7de701ec260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Dec 2016 21:30:33 +0700 Subject: [PATCH 25/81] [vlive] Use live titles for live streams --- youtube_dl/extractor/vlive.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index a6e60c88a..540246c79 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -120,10 +120,14 @@ class VLiveIE(InfoExtractor): fatal=False, live=True)) self._sort_formats(formats) - return dict(self._get_common_fields(webpage), - id=video_id, - formats=formats, - is_live=True) + info = self._get_common_fields(webpage) + info.update({ + 'title': self._live_title(info['title']), + 'id': video_id, + 'formats': formats, + 'is_live': True, + }) + return info def _replay(self, video_id, webpage, long_video_id, key): playinfo = self._download_json( @@ -157,8 +161,11 @@ class VLiveIE(InfoExtractor): 'ext': 'vtt', 'url': caption['source']}] - return dict(self._get_common_fields(webpage), - id=video_id, - formats=formats, - view_count=view_count, - subtitles=subtitles) + info = self._get_common_fields(webpage) + info.update({ + 'id': video_id, + 'formats': formats, + 'view_count': view_count, + 'subtitles': subtitles, + }) + return info From 30918999f5fa3e5fc11e5b83f2f4aef0358965a0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 15 Dec 2016 01:01:14 +0800 Subject: [PATCH 26/81] [facebook] Recognize .onion URLs (closes #11443) --- ChangeLog | 5 +++++ youtube_dl/extractor/facebook.py | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3909e350a..428ade1e6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors ++ [facebook] Recognize .onion URLs (#11443) + version 2016.12.12 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 56a8582b4..c0a7fc7d8 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?:// - (?:[\w-]+\.)?facebook\.com/ + (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/ (?:[^#]*?\#!/)? (?: (?: @@ -150,6 +150,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', 'only_matching': True, + }, { + 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', + 'only_matching': True, }] @staticmethod From 5c32a5be959f04b793f2e9264e9201b4efff3e40 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 15 Dec 2016 17:51:26 +0800 Subject: [PATCH 27/81] [openload] Recognize oload.tv URLs (#10408) --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 428ade1e6..7ff8d3502 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [openload] Recognize oload.tv URLs (#10408) + [facebook] Recognize .onion URLs (#11443) version 2016.12.12 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 84aa12585..8c5ec72d9 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -10,7 +10,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -51,6 +51,9 @@ class OpenloadIE(InfoExtractor): # for title and ext 'url': 'https://openload.co/embed/Sxz5sADo82g/', 'only_matching': True, + }, { + 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', + 'only_matching': True, }] def _real_extract(self, url): From d7ef47bffda9d0c112a027dc4175ced3c497c1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 15 Dec 2016 21:15:45 +0700 Subject: [PATCH 28/81] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7ff8d3502..3aa327e00 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,15 @@ version +Core ++ [utils] Add convenience urljoin + Extractors + [openload] Recognize oload.tv URLs (#10408) + [facebook] Recognize .onion URLs (#11443) +* [vlive] Fix extraction (#11375, #11383) ++ [canvas] Extract DASH formats ++ [melonvod] Add support for vod.melon.com (#11419) + version 2016.12.12 From 38be3bc568a7233a9dbc422a0298b53ede893cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 15 Dec 2016 21:16:55 +0700 Subject: [PATCH 29/81] release 2016.12.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 24c11302b..7bf8d6fd4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.12 +[debug] youtube-dl version 2016.12.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 3aa327e00..a9bfd2711 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.15 Core + [utils] Add convenience urljoin diff --git a/docs/supportedsites.md b/docs/supportedsites.md index edb76d9cc..226552d4d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -402,6 +402,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **MelonVOD** - **META** - **metacafe** - **Metacritic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 55ccaf66a..a4bba4a52 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.12' +__version__ = '2016.12.15' From 7b1e80792bd76a4fb9b091b3b327423030cdf9a2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 16 Dec 2016 09:05:02 +0100 Subject: [PATCH 30/81] [vvvvid] Add new extractor(closes #5915) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vvvvid.py | 148 +++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 youtube_dl/extractor/vvvvid.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2801a380c..bcf9f1906 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1110,6 +1110,7 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE +from .vvvvid import VVVVIDIE from .vyborymos import VyboryMosIE from .vzaar import VzaarIE from .walla import WallaIE diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py new file mode 100644 index 000000000..04fe2e89f --- /dev/null +++ b/youtube_dl/extractor/vvvvid.py @@ -0,0 +1,148 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + str_or_none, +) + + +class VVVVIDIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P\d+)/[^/]+/(?P\d+)/(?P[0-9]+)' + _TESTS = [{ + # video_type == 'video/vvvvid' + 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong', + 'md5': 'b8d3cecc2e981adc3835adf07f6df91b', + 'info_dict': { + 'id': '489048', + 'ext': 'mp4', + 'title': 'Ping Pong', + }, + }, { + # video_type == 'video/rcs' + 'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01', + 'md5': '33e0edfba720ad73a8782157fdebc648', + 'info_dict': { + 'id': '482493', + 'ext': 'mp4', + 'title': 'Episodio 01', + }, + }] + _conn_id = None + + def _real_initialize(self): + if not self._conn_id: + user = self._downloader.cache.load('vvvvid', 'user') or {} + self._conn_id = user.get('conn_id') + if not self._conn_id: + self._conn_id = self._download_json( + 'https://www.vvvvid.it/user/login', + None, headers=self.geo_verification_headers())['data']['conn_id'] + self._downloader.cache.store( + 'vvvvid', 'user', { + 'conn_id': self._conn_id, + }) + + def _real_extract(self, url): + show_id, season_id, video_id = re.match(self._VALID_URL, url).groups() + response = self._download_json( + 'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id), + video_id, headers=self.geo_verification_headers(), query={ + 'conn_id': self._conn_id, + }) + if response['result'] == 'error': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, response['message']), expected=True) + + vid = int(video_id) + video_data = list(filter( + lambda episode: episode.get('video_id') == vid, response['data']))[0] + formats = [] + + # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js + def ds(h): + g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij" + + def f(m): + l = [] + o = 0 + b = False + m_len = len(m) + while ((not b) and o < m_len): + n = m[o] << 2 + o += 1 + k = -1 + j = -1 + if o < m_len: + n += m[o] >> 4 + o += 1 + if o < m_len: + k = (m[o - 1] << 4) & 255 + k += m[o] >> 2 + o += 1 + if o < m_len: + j = (m[o - 1] << 6) & 255 + j += m[o] + o += 1 + else: + b = True + else: + b = True + else: + b = True + l.append(n) + if k != -1: + l.append(k) + if j != -1: + l.append(j) + return l + + c = [] + for e in h: + c.append(g.index(e)) + + c_len = len(c) + for e in range(c_len * 2 - 1, -1, -1): + a = c[e % c_len] ^ c[(e + 1) % c_len] + c[e % c_len] = a + + c = f(c) + d = '' + for e in c: + d += chr(e) + + return d + + for quality in ('_sd', ''): + embed_code = video_data.get('embed_info' + quality) + if not embed_code: + continue + embed_code = ds(embed_code) + video_type = video_data.get('video_type') + if video_type in ('video/rcs', 'video/kenc'): + formats.extend(self._extract_akamai_formats( + embed_code, video_id)) + else: + formats.extend(self._extract_wowza_formats( + 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['title'], + 'formats': formats, + 'thumbnail': video_data.get('thumbnail'), + 'duration': int_or_none(video_data.get('length')), + 'series': video_data.get('show_title'), + 'season_id': season_id, + 'season_number': video_data.get('season_number'), + 'episode_id': str_or_none(video_data.get('id')), + 'epidode_number': int_or_none(video_data.get('number')), + 'episode_title': video_data['title'], + 'view_count': int_or_none(video_data.get('views')), + 'like_count': int_or_none(video_data.get('video_likes')), + } From dc1f3a9f20a9e87638d550a201a5645fdbf0dbee Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 16 Dec 2016 11:04:58 +0100 Subject: [PATCH 31/81] [vvvvid] do not cache the conn_id --- youtube_dl/extractor/vvvvid.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py index 04fe2e89f..d44ec85fd 100644 --- a/youtube_dl/extractor/vvvvid.py +++ b/youtube_dl/extractor/vvvvid.py @@ -35,17 +35,9 @@ class VVVVIDIE(InfoExtractor): _conn_id = None def _real_initialize(self): - if not self._conn_id: - user = self._downloader.cache.load('vvvvid', 'user') or {} - self._conn_id = user.get('conn_id') - if not self._conn_id: - self._conn_id = self._download_json( - 'https://www.vvvvid.it/user/login', - None, headers=self.geo_verification_headers())['data']['conn_id'] - self._downloader.cache.store( - 'vvvvid', 'user', { - 'conn_id': self._conn_id, - }) + self._conn_id = self._download_json( + 'https://www.vvvvid.it/user/login', + None, headers=self.geo_verification_headers())['data']['conn_id'] def _real_extract(self, url): show_id, season_id, video_id = re.match(self._VALID_URL, url).groups() From 0ae9560eea3e829fdb9daefac027e5c983e4db98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 16 Dec 2016 23:57:51 +0700 Subject: [PATCH 32/81] [vporn] Use urljoin for thumbnail --- youtube_dl/extractor/vporn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 1557a0e04..e22900f8d 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -7,6 +7,7 @@ from ..utils import ( ExtractorError, parse_duration, str_to_int, + urljoin, ) @@ -66,10 +67,9 @@ class VpornIE(InfoExtractor): description = self._html_search_regex( r'class="(?:descr|description_txt)">(.*?)', webpage, 'description', fatal=False) - thumbnail = self._html_search_regex( - r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None) - if thumbnail: - thumbnail = 'http://www.vporn.com' + thumbnail + thumbnail = urljoin('http://www.vporn.com', self._html_search_regex( + r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', + default=None)) uploader = self._html_search_regex( r'(?s)Uploaded by:.*?]*>(.+?)', From 594601f54570b8e79606002b6342dd5fcdc1f133 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 27 Sep 2016 13:29:21 +0800 Subject: [PATCH 33/81] [ondemandkorea] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ondemandkorea.py | 58 +++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/ondemandkorea.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bcf9f1906..519908857 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -662,6 +662,7 @@ from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE +from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py new file mode 100644 index 000000000..125c310c8 --- /dev/null +++ b/youtube_dl/extractor/ondemandkorea.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class OnDemandKoreaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P[^/]+)\.html' + _TEST = { + 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', + 'info_dict': { + 'id': 'ask-us-anything-e43', + 'ext': 'mp4', + 'title': 'Ask Us Anything : E43', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': 'm3u8 download' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, fatal=False) + + if not webpage: + # Page sometimes returns captcha page with HTTP 403 + raise ExtractorError('Unable to access page. You may have been blocked.', expected=True) + + if 'msg_block_01.png' in webpage: + raise ExtractorError('This content is not available in your region.', expected=True) + + if 'This video is only available to ODK PLUS members.' in webpage: + raise ExtractorError('This video is only available to ODK PLUS members.', expected=True) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest') + formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + subs = re.findall(r'file:\s\'(?P[^\']+\.vtt)\',\s+label:\s+\'(?P[^\']+)\'', webpage) + subtitles = {} + for sub in subs: + subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + 'subtitles': subtitles, + } From b0c65c677f5298df8653df1e382b406bea420ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:44:53 +0700 Subject: [PATCH 34/81] [utils] Improve urljoin --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 3f45b0bd1..1cdac82fc 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -448,11 +448,14 @@ class TestUtil(unittest.TestCase): def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 694e9a600..528d87bb9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1703,9 +1703,9 @@ def base_url(url): def urljoin(base, path): if not isinstance(path, compat_str) or not path: return None - if re.match(r'https?://', path): + if re.match(r'^(?:https?:)?//', path): return path - if not isinstance(base, compat_str) or not re.match(r'https?://', base): + if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base): return None return compat_urlparse.urljoin(base, path) From a495840d3bb05619f9d38168b47f55f7aeb1ca87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:45:44 +0700 Subject: [PATCH 35/81] [jwplatform] Improve subtitles extraction --- youtube_dl/extractor/jwplatform.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 5d56e0a28..7037763cb 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + urljoin, ) @@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor): tracks = video_data.get('tracks') if tracks and isinstance(tracks, list): for track in tracks: - if track.get('file') and track.get('kind') == 'captions': - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track['file']) - }) + if track.get('kind') != 'captions': + continue + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entries.append({ 'id': this_video_id, From 732d116aa7bed0940bec29af67dd271e47932818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:45:53 +0700 Subject: [PATCH 36/81] [jwplatform] Improve duration extraction --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 7037763cb..aff7ab49a 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -126,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor): 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration')), + 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, 'formats': formats, }) From 47c914f9954f6a88a9cd56f487f493e08eb78765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:47:49 +0700 Subject: [PATCH 37/81] [ondemandkorea] Fix extraction (closes #10772) --- youtube_dl/extractor/ondemandkorea.py | 52 ++++++++++++++------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 125c310c8..c3e830c23 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - -from .common import InfoExtractor -from ..utils import ExtractorError +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + ExtractorError, + js_to_json, +) -class OnDemandKoreaIE(InfoExtractor): +class OnDemandKoreaIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', @@ -29,30 +29,32 @@ class OnDemandKoreaIE(InfoExtractor): if not webpage: # Page sometimes returns captcha page with HTTP 403 - raise ExtractorError('Unable to access page. You may have been blocked.', expected=True) + raise ExtractorError( + 'Unable to access page. You may have been blocked.', + expected=True) if 'msg_block_01.png' in webpage: - raise ExtractorError('This content is not available in your region.', expected=True) - + self.raise_geo_restricted( + 'This content is not available in your region') + if 'This video is only available to ODK PLUS members.' in webpage: - raise ExtractorError('This video is only available to ODK PLUS members.', expected=True) + raise ExtractorError( + 'This video is only available to ODK PLUS members.', + expected=True) title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest') - formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) + jw_config = self._parse_json( + self._search_regex( + r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P.+?)\);', + webpage, 'jw config', group='options'), + video_id, transform_source=js_to_json) + info = self._parse_jwplayer_data( + jw_config, video_id, require_title=False, m3u8_id='hls', + base_url=url) - subs = re.findall(r'file:\s\'(?P[^\']+\.vtt)\',\s+label:\s+\'(?P[^\']+)\'', webpage) - subtitles = {} - for sub in subs: - subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}] - - return { - 'id': video_id, + info.update({ 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - 'subtitles': subtitles, - } + 'thumbnail': self._og_search_thumbnail(webpage), + }) + return info From 9b785768acdb384dba1f81af6f144b4b99c4d7a3 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Tue, 20 Sep 2016 20:14:24 -0400 Subject: [PATCH 38/81] [meipai] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/meipai.py | 99 ++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 youtube_dl/extractor/meipai.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 519908857..af9b7003b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -498,6 +498,7 @@ from .mangomolo import ( ) from .matchtv import MatchTVIE from .mdr import MDRIE +from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE from .metacafe import MetacafeIE diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py new file mode 100644 index 000000000..2ea592055 --- /dev/null +++ b/youtube_dl/extractor/meipai.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_iso8601 +from .common import InfoExtractor + + +class MeipaiIE(InfoExtractor): + IE_DESC = '美拍' + _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.meipai.com/media/531697625', + 'md5': 'e3e9600f9e55a302daecc90825854b4f', + 'info_dict': { + 'id': '531697625', + 'ext': 'mp4', + 'title': '#葉子##阿桑##余姿昀##超級女聲#', + 'description': '#葉子##阿桑##余姿昀##超級女聲#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], + 'release_date': 1465492420, + } + }, + { + 'url': 'http://www.meipai.com/media/576409659', + 'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48', + 'info_dict': { + 'id': '576409659', + 'ext': 'mp4', + 'title': '#失語者##蔡健雅##吉他彈唱#', + 'description': '#失語者##蔡健雅##吉他彈唱#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'tags': ['失語者', '蔡健雅', '吉他彈唱'], + 'release_date': 1472534847, + } + }, + # record of live streaming + { + 'url': 'http://www.meipai.com/media/585526361', + 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', + 'info_dict': { + 'id': '585526361', + 'ext': 'mp4', + 'title': '姿昀和善願 練歌練琴啦😁😁😁', + 'description': '姿昀和善願 練歌練琴啦😁😁😁', + 'thumbnail': 're:^https?://.*\.jpg$', + 'creator': '她她-TATA', + 'release_date': 1474311799, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage, default=None) + if title is None: + # fall back to text used in title + title = self._html_search_regex( + r']*>(.+)', webpage, 'title') + + release_date = self._og_search_property( + 'video:release_date', webpage, 'release date', fatal=False) + release_date = parse_iso8601(release_date) + + tags = self._og_search_property( + 'video:tag', webpage, 'tags', default='').split(',') + + info = { + 'id': video_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'release_date': release_date, + 'creator': self._og_search_property( + 'video:director', webpage, 'creator', fatal=False), + 'tags': tags, + } + + keywords = self._html_search_meta( + 'keywords', webpage, 'keywords', default=[]) + + if '直播回放' in keywords: + # recorded playback of live streaming + m3u8_url = self._html_search_regex( + r'file:\s*encodeURIComponent\(["\'](.+)["\']\)', + webpage, + 'm3u8_url') + info['formats'] = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native') + else: + # regular uploaded video + info['url'] = self._og_search_video_url(webpage) + + return info From 2786818c3360bcadc21109a9f740fba8f698c8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 19:42:34 +0700 Subject: [PATCH 39/81] [meipai] Fix regular videos extraction and improve (closes #10718) --- youtube_dl/extractor/meipai.py | 153 +++++++++++++++++---------------- 1 file changed, 79 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index 2ea592055..35914fd4b 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -1,99 +1,104 @@ # coding: utf-8 from __future__ import unicode_literals -from ..utils import parse_iso8601 from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + unified_timestamp, +) class MeipaiIE(InfoExtractor): IE_DESC = '美拍' _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.meipai.com/media/531697625', - 'md5': 'e3e9600f9e55a302daecc90825854b4f', - 'info_dict': { - 'id': '531697625', - 'ext': 'mp4', - 'title': '#葉子##阿桑##余姿昀##超級女聲#', - 'description': '#葉子##阿桑##余姿昀##超級女聲#', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], - 'release_date': 1465492420, - } - }, - { - 'url': 'http://www.meipai.com/media/576409659', - 'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48', - 'info_dict': { - 'id': '576409659', - 'ext': 'mp4', - 'title': '#失語者##蔡健雅##吉他彈唱#', - 'description': '#失語者##蔡健雅##吉他彈唱#', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'tags': ['失語者', '蔡健雅', '吉他彈唱'], - 'release_date': 1472534847, - } - }, + _TESTS = [{ + # regular uploaded video + 'url': 'http://www.meipai.com/media/531697625', + 'md5': 'e3e9600f9e55a302daecc90825854b4f', + 'info_dict': { + 'id': '531697625', + 'ext': 'mp4', + 'title': '#葉子##阿桑##余姿昀##超級女聲#', + 'description': '#葉子##阿桑##余姿昀##超級女聲#', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 152, + 'timestamp': 1465492420, + 'upload_date': '20160609', + 'view_count': 35511, + 'creator': '她她-TATA', + 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], + } + }, { # record of live streaming - { - 'url': 'http://www.meipai.com/media/585526361', - 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', - 'info_dict': { - 'id': '585526361', - 'ext': 'mp4', - 'title': '姿昀和善願 練歌練琴啦😁😁😁', - 'description': '姿昀和善願 練歌練琴啦😁😁😁', - 'thumbnail': 're:^https?://.*\.jpg$', - 'creator': '她她-TATA', - 'release_date': 1474311799, - } - }, - ] + 'url': 'http://www.meipai.com/media/585526361', + 'md5': 'ff7d6afdbc6143342408223d4f5fb99a', + 'info_dict': { + 'id': '585526361', + 'ext': 'mp4', + 'title': '姿昀和善願 練歌練琴啦😁😁😁', + 'description': '姿昀和善願 練歌練琴啦😁😁😁', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 5975, + 'timestamp': 1474311799, + 'upload_date': '20160919', + 'view_count': 1215, + 'creator': '她她-TATA', + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage, default=None) - if title is None: - # fall back to text used in title - title = self._html_search_regex( - r']*>(.+)', webpage, 'title') + title = self._og_search_title( + webpage, default=None) or self._html_search_regex( + r']*>([^<]+)', webpage, 'title') - release_date = self._og_search_property( - 'video:release_date', webpage, 'release date', fatal=False) - release_date = parse_iso8601(release_date) + formats = [] + + # recorded playback of live streaming + m3u8_url = self._html_search_regex( + r'file:\s*encodeURIComponent\((["\'])(?P(?:(?!\1).)+)\1\)', + webpage, 'm3u8 url', group='url', default=None) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + if not formats: + # regular uploaded video + video_url = self._search_regex( + r'data-video=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video url', + group='url', default=None) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': 'http', + }) + + timestamp = unified_timestamp(self._og_search_property( + 'video:release_date', webpage, 'release date', fatal=False)) tags = self._og_search_property( 'video:tag', webpage, 'tags', default='').split(',') - info = { + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration')) + creator = self._og_search_property( + 'video:director', webpage, 'creator', fatal=False) + + return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - 'release_date': release_date, - 'creator': self._og_search_property( - 'video:director', webpage, 'creator', fatal=False), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'creator': creator, 'tags': tags, + 'formats': formats, } - - keywords = self._html_search_meta( - 'keywords', webpage, 'keywords', default=[]) - - if '直播回放' in keywords: - # recorded playback of live streaming - m3u8_url = self._html_search_regex( - r'file:\s*encodeURIComponent\(["\'](.+)["\']\)', - webpage, - 'm3u8_url') - info['formats'] = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native') - else: - # regular uploaded video - info['url'] = self._og_search_video_url(webpage) - - return info From 93753aad20991c3fc23566b9fb7db8299dbc9ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 20:20:23 +0700 Subject: [PATCH 40/81] [twitch] Adapt to new videos pages schema (closes #11469) --- youtube_dl/extractor/twitch.py | 69 ++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 77414a242..8de8ec65b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -300,7 +300,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): response = self._call_api( self._PLAYLIST_PATH % (channel_id, offset, limit), channel_id, - 'Downloading %s videos JSON page %s' + 'Downloading %s JSON page %s' % (self._PLAYLIST_TYPE, counter_override or counter)) page_entries = self._extract_playlist_page(response) if not page_entries: @@ -350,19 +350,72 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): } -class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:past_broadcasts' - _VALID_URL = r'%s/(?P[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true' - _PLAYLIST_TYPE = 'past broadcasts' +class TwitchVideosBaseIE(TwitchPlaylistBaseIE): + _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE + _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' + + +class TwitchAllVideosIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:all' + _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' + _PLAYLIST_TYPE = 'all videos' _TEST = { - 'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts', + 'url': 'https://www.twitch.tv/spamfish/videos/all', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, - 'playlist_mincount': 54, + 'playlist_mincount': 869, + } + + +class TwitchUploadsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:uploads' + _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' + _PLAYLIST_TYPE = 'uploads' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/uploads', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 0, + } + + +class TwitchPastBroadcastsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:past-broadcasts' + _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' + _PLAYLIST_TYPE = 'past broadcasts' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 0, + } + + +class TwitchHighlightsIE(TwitchVideosBaseIE): + IE_NAME = 'twitch:videos:highlights' + _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' + _PLAYLIST_TYPE = 'highlights' + + _TEST = { + 'url': 'https://www.twitch.tv/spamfish/videos/highlights', + 'info_dict': { + 'id': 'spamfish', + 'title': 'Spamfish', + }, + 'playlist_mincount': 805, } From 87a449c1edbbb3761fbb6fc3a100152aa961f95b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 23:03:13 +0700 Subject: [PATCH 41/81] [extractor/common] Recognize DASH formats in html5 media entries --- youtube_dl/extractor/common.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6ae946569..40f3e2323 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1888,7 +1888,7 @@ class InfoExtractor(object): }) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1905,11 +1905,16 @@ class InfoExtractor(object): def _media_formats(src, cur_media_type): full_url = absolute_url(src) - if determine_ext(full_url) == 'm3u8': + ext = determine_ext(full_url) + if ext == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) + elif ext == 'mpd': + is_plain_url = False + formats = self._extract_mpd_formats( + full_url, video_id, mpd_id=mpd_id) else: is_plain_url = True formats = [{ From 04bf59ff64e49f06caffaa121ca5adbd0da66d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 23:03:50 +0700 Subject: [PATCH 42/81] [extractors] Add missing twitch imports --- youtube_dl/extractor/extractors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index af9b7003b..202b971e0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1001,7 +1001,10 @@ from .twitch import ( TwitchChapterIE, TwitchVodIE, TwitchProfileIE, + TwitchAllVideosIE, + TwitchUploadsIE, TwitchPastBroadcastsIE, + TwitchHighlightsIE, TwitchStreamIE, TwitchClipsIE, ) From 6e416b210cfa565d2fc692722612a2ad2fde09fa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 17 Dec 2016 18:11:13 +0100 Subject: [PATCH 43/81] [nbc] fix extraction for msnbc videos(fixes #11466) --- youtube_dl/extractor/nbc.py | 43 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 7f1bd9229..4e96e78c3 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,6 +9,7 @@ from ..utils import ( lowercase_escape, smuggle_url, unescapeHTML, + update_url_query, ) @@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'md5': 'af1adfa51312291a017720403826bb64', 'info_dict': { - 'id': '269389891880', + 'id': 'p_tweet_snow_140529', 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', @@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', 'md5': '73135a2e0ef819107bbb55a5a9b2a802', 'info_dict': { - 'id': '394064451844', + 'id': 'nn_netcast_150204', 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', @@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'md5': 'a49e173825e5fcd15c13fc297fced39d', 'info_dict': { - 'id': '529953347624', + 'id': 'x_lon_vwhorn_150922', 'ext': 'mp4', 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', 'description': 'md5:c8be487b2d80ff0594c005add88d8351', @@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'md5': '118d7ca3f0bea6534f119c68ef539f71', 'info_dict': { - 'id': '669831235788', + 'id': 'tdy_al_space_160420', 'ext': 'mp4', 'title': 'See the aurora borealis from space in stunning new NASA video', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', @@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', 'info_dict': { - 'id': '314487875924', + 'id': 'n_hayes_Aimm_140801_272214', 'ext': 'mp4', 'title': 'The chaotic GOP immigration vote', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', @@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE): 'timestamp': 1406937606, 'upload_date': '20140802', 'uploader': 'NBCU-NEWS', - 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], }, }, { @@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE): else: # "feature" and "nightly-news" pages use theplatform.com video_id = mobj.group('mpx_id') - if not video_id.isdigit(): - webpage = self._download_webpage(url, video_id) - info = None - bootstrap_json = self._search_regex( - [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', - r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], - webpage, 'bootstrap json', default=None) + webpage = self._download_webpage(url, video_id) + + filter_param = 'byId' + bootstrap_json = self._search_regex( + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"', + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'], + webpage, 'bootstrap json', default=None) + if bootstrap_json: bootstrap = self._parse_json( bootstrap_json, video_id, transform_source=unescapeHTML) + + info = None if 'results' in bootstrap: info = bootstrap['results'][0]['video'] elif 'video' in bootstrap: info = bootstrap['video'] + elif 'msnbcVideoInfo' in bootstrap: + info = bootstrap['msnbcVideoInfo']['meta'] + elif 'msnbcThePlatform' in bootstrap: + info = bootstrap['msnbcThePlatform']['videoPlayer']['video'] else: info = bootstrap - video_id = info['mpxId'] + + if 'guid' in info: + video_id = info['guid'] + filter_param = 'byGuid' + elif 'mpxId' in info: + video_id = info['mpxId'] return { '_type': 'url_transparent', 'id': video_id, # http://feed.theplatform.com/f/2E2eJC/nbcnews also works - 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, + 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}), 'ie_key': 'ThePlatformFeed', } From b42a0bf360878025817dba0b71479d509b5df4b4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 17 Dec 2016 21:48:45 +0100 Subject: [PATCH 44/81] [laola1tv] add support embed urls and improve extraction(#11460) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/laola1tv.py | 149 ++++++++++++++--------------- 2 files changed, 74 insertions(+), 80 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 202b971e0..16606a86c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -446,7 +446,10 @@ from .kuwo import ( KuwoMvIE, ) from .la7 import LA7IE -from .laola1tv import Laola1TvIE +from .laola1tv import ( + Laola1TvEmbedIE, + Laola1TvIE, +) from .lci import LCIIE from .lcp import ( LcpPlayIE, diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 2fab38079..37e38dba0 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -1,25 +1,81 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( ExtractorError, - sanitized_Request, unified_strdate, urlencode_postdata, xpath_element, xpath_text, + urljoin, ) +class Laola1TvEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + flash_vars = self._search_regex( + r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') + get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x) + + hd_doc = self._download_xml( + 'http://www.laola1.tv/server/hd_video.php', video_id, query={ + 'play': get_flashvar('streamid'), + 'partner': get_flashvar('partnerid'), + 'portal': get_flashvar('portalid'), + 'lang': get_flashvar('sprache'), + 'v5ident': '', + }) + + _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) + title = _v('title', fatal=True) + + data_abo = urlencode_postdata( + dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) + token_url = self._download_json( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', + video_id, query={ + 'videoId': _v('id'), + 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), + 'label': _v('label'), + 'area': _v('area'), + }, data=data_abo)['data']['stream-access'][0] + token_doc = self._download_xml( + token_url, video_id, 'Downloading token', + headers=self.geo_verification_headers()) + + token_attrib = xpath_element(token_doc, './/token').attrib + + if token_attrib['status'] != '0': + raise ExtractorError( + 'Token error: %s' % token_attrib['comment'], expected=True) + + formats = self._extract_akamai_formats( + '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), + video_id) + self._sort_formats(formats) + + categories_str = _v('meta_sports') + categories = categories_str.split(',') if categories_str else [] + is_live = _v('islive') == 'true' + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'upload_date': unified_strdate(_v('time_date')), + 'uploader': _v('meta_organisation'), + 'categories': categories, + 'is_live': is_live, + 'formats': formats, + } + + class Laola1TvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P[a-z]+)-(?P[a-z]+)/(?P[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'info_dict': { @@ -67,85 +123,20 @@ class Laola1TvIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('slug') - kind = mobj.group('kind') - lang = mobj.group('lang') - portal = mobj.group('portal') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) if 'Dieser Livestream ist bereits beendet.' in webpage: raise ExtractorError('This live stream has already finished.', expected=True) - iframe_url = self._search_regex( + iframe_url = urljoin(url, self._search_regex( r']*?id="videoplayer"[^>]*?src="([^"]+)"', - webpage, 'iframe url') - - video_id = self._search_regex( - r'videoid=(\d+)', iframe_url, 'video id') - - iframe = self._download_webpage(compat_urlparse.urljoin( - url, iframe_url), display_id, 'Downloading iframe') - - partner_id = self._search_regex( - r'partnerid\s*:\s*(["\'])(?P.+?)\1', - iframe, 'partner id', group='partner_id') - - hd_doc = self._download_xml( - 'http://www.laola1.tv/server/hd_video.php?%s' - % compat_urllib_parse_urlencode({ - 'play': video_id, - 'partner': partner_id, - 'portal': portal, - 'lang': lang, - 'v5ident': '', - }), display_id) - - _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) - title = _v('title', fatal=True) - - VS_TARGETS = { - 'video': '2', - 'livestream': '17', - } - - req = sanitized_Request( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % - compat_urllib_parse_urlencode({ - 'videoId': video_id, - 'target': VS_TARGETS.get(kind, '2'), - 'label': _v('label'), - 'area': _v('area'), - }), - urlencode_postdata( - dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))) - - token_url = self._download_json(req, display_id)['data']['stream-access'][0] - token_doc = self._download_xml(token_url, display_id, 'Downloading token') - - token_attrib = xpath_element(token_doc, './/token').attrib - token_auth = token_attrib['auth'] - - if token_auth in ('blocked', 'restricted', 'error'): - raise ExtractorError( - 'Token error: %s' % token_attrib['comment'], expected=True) - - formats = self._extract_f4m_formats( - '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), - video_id, f4m_id='hds') - self._sort_formats(formats) - - categories_str = _v('meta_sports') - categories = categories_str.split(',') if categories_str else [] + webpage, 'iframe url')) return { - 'id': video_id, + '_type': 'url', 'display_id': display_id, - 'title': title, - 'upload_date': unified_strdate(_v('time_date')), - 'uploader': _v('meta_organisation'), - 'categories': categories, - 'is_live': _v('islive') == 'true', - 'formats': formats, + 'url': iframe_url, + 'ie_key': 'Laola1TvEmbed', } From 199a47abba4ce57b6df4cd9f3223837ac6c4ee85 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 10:49:10 +0100 Subject: [PATCH 45/81] [ccma] Add new extractor(closes #11359) --- youtube_dl/extractor/ccma.py | 99 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 100 insertions(+) create mode 100644 youtube_dl/extractor/ccma.py diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py new file mode 100644 index 000000000..39938c9ac --- /dev/null +++ b/youtube_dl/extractor/ccma.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, + clean_html, +) + + +class CCMAIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', + 'md5': '7296ca43977c8ea4469e719c609b0871', + 'info_dict': { + 'id': '5630208', + 'ext': 'mp4', + 'title': 'L\'espot de La Marató de TV3', + 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', + 'timestamp': 1470918540, + 'upload_date': '20160811', + } + }, { + 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', + 'md5': 'fa3e38f269329a278271276330261425', + 'info_dict': { + 'id': '943685', + 'ext': 'mp3', + 'title': 'El Consell de Savis analitza el derbi', + 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', + 'upload_date': '20171205', + 'timestamp': 1512507300, + } + }] + + def _real_extract(self, url): + media_type, media_id = re.match(self._VALID_URL, url).groups() + media_data = {} + formats = [] + profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc'] + for i, profile in enumerate(profiles): + md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ + 'media': media_type, + 'idint': media_id, + 'profile': profile, + }, fatal=False) + if md: + media_data = md + media_url = media_data.get('media', {}).get('url') + if media_url: + formats.append({ + 'format_id': profile, + 'url': media_url, + 'quality': i, + }) + self._sort_formats(formats) + + informacio = media_data['informacio'] + title = informacio['titol'] + durada = informacio.get('durada', {}) + duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) + timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) + + subtitles = {} + subtitols = media_data.get('subtitols', {}) + if subtitols: + sub_url = subtitols.get('url') + if sub_url: + subtitles.setdefault( + subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({ + 'url': sub_url, + }) + + thumbnails = [] + imatges = media_data.get('imatges', {}) + if imatges: + thumbnail_url = imatges.get('url') + if thumbnail_url: + thumbnails = [{ + 'url': thumbnail_url, + 'width': int_or_none(imatges.get('amplada')), + 'height': int_or_none(imatges.get('alcada')), + }] + + return { + 'id': media_id, + 'title': title, + 'description': clean_html(informacio.get('descripcio')), + 'duration': duration, + 'timestamp': timestamp, + 'thumnails': thumbnails, + 'subtitles': subtitles, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 16606a86c..c0b10f6d0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -150,6 +150,7 @@ from .cbsnews import ( ) from .cbssports import CBSSportsIE from .ccc import CCCIE +from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE From d5e623aaa18a1a08731e46e3aff6a2a9361b69b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:46:57 +0700 Subject: [PATCH 46/81] Credit @pyx for meipai (#10718) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4a6f7e13f..798503ca7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,3 +190,4 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 +Philip Xu From 52a1d48d9fa1c33d3541b0219d5f7e36e9f66953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:48:59 +0700 Subject: [PATCH 47/81] [ChangeLog] Actualize --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a9bfd2711..8fbc39c6a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version + +Core ++ [extractor/common] Recognize DASH formats in html5 media entries + +Extractors ++ [ccma] Add support for ccma.cat (#11359) +* [laola1tv] Improve extraction ++ [laola1tv] Add support embed URLs (#11460) +* [nbc] Fix extraction for MSNBC videos (#11466) +* [twitch] Adapt to new videos pages URL schema (#11469) ++ [meipai] Add support for meipai.com (#10718) +* [jwplatform] Improve subtitles and duration extraction ++ [ondemandkorea] Add support for ondemandkorea.com (#10772) ++ [vvvvid] Add support for vvvvid.it (#5915) + + version 2016.12.15 Core From f73d7d50749cd8e49f048da62ff418f6ce0bf036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 19:50:33 +0700 Subject: [PATCH 48/81] release 2016.12.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 10 +++++++++- youtube_dl/version.py | 2 +- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7bf8d6fd4..fffdefa45 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.15 +[debug] youtube-dl version 2016.12.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8fbc39c6a..3ed5da7fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.18 Core + [extractor/common] Recognize DASH formats in html5 media entries diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 226552d4d..b55044520 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -131,6 +131,7 @@ - **cbsnews**: CBS News - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** + - **CCMA** - **CCTV** - **CDA** - **CeskaTelevize** @@ -365,6 +366,7 @@ - **kuwo:song**: 酷我音乐 - **la7.it** - **Laola1Tv** + - **Laola1TvEmbed** - **LCI** - **Lcp** - **LcpPlay** @@ -402,6 +404,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **Meipai**: 美拍 - **MelonVOD** - **META** - **metacafe** @@ -524,6 +527,7 @@ - **Odnoklassniki** - **OktoberfestTV** - **on.aol.com** + - **OnDemandKorea** - **onet.tv** - **onet.tv:channel** - **OnionStudios** @@ -785,10 +789,13 @@ - **Tweakers** - **twitch:chapter** - **twitch:clips** - - **twitch:past_broadcasts** - **twitch:profile** - **twitch:stream** - **twitch:video** + - **twitch:videos:all** + - **twitch:videos:highlights** + - **twitch:videos:past-broadcasts** + - **twitch:videos:uploads** - **twitch:vod** - **twitter** - **twitter:amplify** @@ -874,6 +881,7 @@ - **VRT** - **vube**: Vube.com - **VuClip** + - **VVVVID** - **VyboryMos** - **Vzaar** - **Walla** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a4bba4a52..5f06d4b52 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.15' +__version__ = '2016.12.18' From ec79b1de1cda9745e83ba7950d6da8eeb6b53293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Dec 2016 20:56:21 +0700 Subject: [PATCH 49/81] Revert "Credit @pyx for meipai (#10718)" This reverts commit d5e623aaa18a1a08731e46e3aff6a2a9361b69b8. --- AUTHORS | 1 - 1 file changed, 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 798503ca7..4a6f7e13f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,4 +190,3 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 -Philip Xu From e7b6caef248a834078333c166db5d12c01b102af Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 10 Sep 2016 01:49:56 +0800 Subject: [PATCH 50/81] [viu] New extractor for viu.com --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/viu.py | 253 +++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100644 youtube_dl/extractor/viu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c0b10f6d0..cae429f67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1105,6 +1105,10 @@ from .viki import ( VikiIE, VikiChannelIE, ) +from .viu import ( + ViuIE, + ViuPlaylistIE, +) from .vk import ( VKIE, VKUserVideosIE, diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py new file mode 100644 index 000000000..2cab3495f --- /dev/null +++ b/youtube_dl/extractor/viu.py @@ -0,0 +1,253 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + clean_html, +) + + +class ViuBaseIE(InfoExtractor): + + def _get_viu_auth(self, video_id): + viu_auth_res = self._request_webpage( + 'https://www.viu.com/api/apps/v2/authenticate', video_id, + note='Requesting Viu auth', + query={ + 'acct': 'test', 'appid': 'viu_desktop', 'fmt': 'json', + 'iid': 'guest', 'languageid': 'default', 'platform': 'desktop', + 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' + }) + return viu_auth_res.info().get('X-VIU-AUTH') + + +class ViuIE(ViuBaseIE): + IE_NAME = 'viu:show' + _VALID_URL = r'https?://www\.viu\.com/.+/(?:vod|media)/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', + 'info_dict': { + 'id': '3421', + 'ext': 'mp4', + 'title': 'The Prime Minister and I - Episode 17', + 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Singapore', + }, { + 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', + 'info_dict': { + 'id': '7123', + 'ext': 'mp4', + 'title': '大人女子 - Episode 10', + 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Hong Kong', + }, { + 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059', + 'info_dict': { + 'id': '1116705532', + 'ext': 'mp4', + 'title': 'Citizen Khan - Episode 1', + 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to India', + }, { + 'url': 'https://www.viu.com/en/media/1130599965', + 'info_dict': { + 'id': '1130599965', + 'ext': 'mp4', + 'title': 'Jealousy Incarnate - Episode 1', + 'description': 'md5:d3d82375cab969415d2720b6894361e9', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Indonesia', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id, note='Downloading video page') + + mobj = re.search( + r'
]+?>(?P.+?)
', webpage, flags=re.DOTALL) + + if mobj: + raise ExtractorError(clean_html(mobj.group('err')), expected=True) + + config_js_url = self._search_regex( + r'src=(["\'])(?P.+?/js/config\.js)(?:\?.+?)?\1', webpage, 'config_js', + group='api_url', default=None) + + if not config_js_url: + # content is from ID, IN, MY + video_info = self._download_json( + 'https://www.viu.com/api/clip/load', video_id, + headers={'X-VIU-AUTH': self._get_viu_auth(video_id)}, + query={'appid': 'viu_desktop', 'fmt': 'json', 'id': video_id}, + note='Downloading video info').get('response', {}).get('item', [{}])[0] + + formats = self._extract_m3u8_formats( + video_info['href'], video_id, 'mp4', + m3u8_id='hls', fatal=False) + self._sort_formats(formats) + + subtitles = {} + for key, value in list(video_info.items()): + mobj = re.match(r'^subtitle_(?P[^_]+?)_(?P(vtt|srt))', key) + if not mobj: + continue + if not subtitles.get(mobj.group('lang')): + subtitles[mobj.group('lang')] = [] + subtitles[mobj.group('lang')].append( + {'url': value, 'ext': mobj.group('ext')}) + + title = '%s - Episode %s' % (video_info['moviealbumshowname'], + video_info.get('episodeno')) + description = video_info.get('description') + duration = int_or_none(video_info.get('duration')) + series = video_info.get('moviealbumshowname') + episode_title = video_info.get('title') + episode_num = int_or_none(video_info.get('episodeno')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'series': series, + 'episode': episode_title, + 'episode_number': episode_num, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } + + # content from HK, SG + config_js = self._download_webpage( + 'http://www.viu.com' + config_js_url, video_id, note='Downloading config js') + + # try to strip away commented code which contains test urls + config_js = re.sub(r'^//.*?$', '', config_js, flags=re.MULTILINE) + config_js = re.sub(r'/\*.*?\*/', '', config_js, flags=re.DOTALL) + + # Slightly different api_url between HK and SG config.js + # http://www.viu.com/ott/hk/v1/js/config.js => '//www.viu.com/ott/hk/index.php?r=' + # http://www.viu.com/ott/sg/v1/js/config.js => 'http://www.viu.com/ott/sg/index.php?r=' + api_url = self._proto_relative_url( + self._search_regex( + r'var\s+api_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?r=)\1', + config_js, 'api_url', group='api_url'), scheme='http:') + + stream_info_url = self._proto_relative_url( + self._search_regex( + r'var\s+video_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?ccs_product_id=)\1', + config_js, 'video_url', group='video_url'), scheme='http:') + + if url.startswith('https://'): + api_url = re.sub('^http://', 'https://', api_url) + + video_info = self._download_json( + api_url + 'vod/ajax-detail&platform_flag_label=web&product_id=' + video_id, + video_id, note='Downloading video info').get('data', {}) + + ccs_product_id = video_info.get('current_product', {}).get('ccs_product_id') + + if not ccs_product_id: + raise ExtractorError('This video is not available in your region.', expected=True) + + stream_info = self._download_json( + stream_info_url + ccs_product_id, video_id, + note='Downloading stream info').get('data', {}).get('stream', {}) + + formats = [] + for vid_format, stream_url in stream_info.get('url', {}).items(): + br = int_or_none(self._search_regex( + r's(?P
[0-9]+)p', vid_format, 'bitrate', group='br')) + formats.append({ + 'format_id': vid_format, + 'url': stream_url, + 'vbr': br, + 'ext': 'mp4', + 'filesize': stream_info.get('size', {}).get(vid_format) + }) + self._sort_formats(formats) + + subtitles = {} + if video_info.get('current_product', {}).get('subtitle', []): + for sub in video_info.get('current_product', {}).get('subtitle', []): + subtitles[sub.get('name')] = [{ + 'url': sub.get('url'), + 'ext': 'srt', + }] + + episode_info = next( + p for p in video_info.get('series', {}).get('product', []) + if p.get('product_id') == video_id) + + title = '%s - Episode %s' % (video_info.get('series', {}).get('name'), + episode_info.get('number')) + description = episode_info.get('description') + thumbnail = episode_info.get('cover_image_url') + duration = int_or_none(stream_info.get('duration')) + series = video_info.get('series', {}).get('name') + episode_title = episode_info.get('synopsis') + episode_num = int_or_none(episode_info.get('number')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'series': series, + 'episode': episode_title, + 'episode_number': episode_num, + 'duration': duration, + 'thumbnail': thumbnail, + 'formats': formats, + 'subtitles': subtitles, + } + + +class ViuPlaylistIE(ViuBaseIE): + IE_NAME = 'viu:playlist' + _VALID_URL = r'https?://www\.viu\.com/.+/listing/(?Pplaylist\-[0-9]+)' + _TEST = { + 'url': 'https://www.viu.com/en/listing/playlist-22461380', + 'info_dict': { + 'id': 'playlist-22461380', + 'title': 'The Good Wife', + }, + 'playlist_count': 16, + 'skip': 'Geo-restricted to Indonesia', + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + playlist_info = self._download_json( + 'https://www.viu.com/api/container/load', playlist_id, + headers={'X-VIU-AUTH': self._get_viu_auth(playlist_id)}, + query={'appid': 'viu_desktop', 'fmt': 'json', 'id': playlist_id}, + note='Downloading playlist info').get('response', {}).get('container') + + name = playlist_info['title'] + entries = [ + self.url_result( + 'https://www.viu.com/en/media/%s' % item['id'], + 'Viu', item['id']) + for item in playlist_info['item'] if item['id']] + + return self.playlist_result(entries, playlist_id, name) From 723103151ead8b22ff4a61d009d16ec26b31248a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 17:15:53 +0100 Subject: [PATCH 51/81] [viu] improve extraction(closes #10607)(closes #11329) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/viu.py | 348 ++++++++++++++--------------- 2 files changed, 171 insertions(+), 178 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cae429f67..d75ea0c92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1108,6 +1108,7 @@ from .viki import ( from .viu import ( ViuIE, ViuPlaylistIE, + ViuOTTIE, ) from .vk import ( VKIE, diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index 2cab3495f..1a81b4845 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -4,60 +4,53 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, - clean_html, ) class ViuBaseIE(InfoExtractor): - - def _get_viu_auth(self, video_id): + def _real_initialize(self): viu_auth_res = self._request_webpage( - 'https://www.viu.com/api/apps/v2/authenticate', video_id, - note='Requesting Viu auth', - query={ - 'acct': 'test', 'appid': 'viu_desktop', 'fmt': 'json', - 'iid': 'guest', 'languageid': 'default', 'platform': 'desktop', - 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' + 'https://www.viu.com/api/apps/v2/authenticate', None, + 'Requesting Viu auth', query={ + 'acct': 'test', + 'appid': 'viu_desktop', + 'fmt': 'json', + 'iid': 'guest', + 'languageid': 'default', + 'platform': 'desktop', + 'userid': 'guest', + 'useridtype': 'guest', + 'ver': '1.0' }) - return viu_auth_res.info().get('X-VIU-AUTH') + self._auth_token = viu_auth_res.info()['X-VIU-AUTH'] + + def _call_api(self, path, *args, **kwargs): + headers = self.geo_verification_headers() + headers.update({ + 'X-VIU-AUTH': self._auth_token + }) + headers.update(kwargs.get('headers', {})) + kwargs['headers'] = headers + response = self._download_json( + 'https://www.viu.com/api/' + path, *args, **kwargs)['response'] + if response.get('status') != 'success': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, response['message']), expected=True) + return response class ViuIE(ViuBaseIE): - IE_NAME = 'viu:show' - _VALID_URL = r'https?://www\.viu\.com/.+/(?:vod|media)/(?P[0-9]+)' + _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P\d+)' _TESTS = [{ - 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', - 'info_dict': { - 'id': '3421', - 'ext': 'mp4', - 'title': 'The Prime Minister and I - Episode 17', - 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - 'skip': 'Geo-restricted to Singapore', - }, { - 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', - 'info_dict': { - 'id': '7123', - 'ext': 'mp4', - 'title': '大人女子 - Episode 10', - 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', - }, - 'params': { - 'skip_download': 'm3u8 download', - }, - 'skip': 'Geo-restricted to Hong Kong', - }, { 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059', 'info_dict': { 'id': '1116705532', 'ext': 'mp4', - 'title': 'Citizen Khan - Episode 1', + 'title': 'Citizen Khan - Ep 1', 'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e', }, 'params': { @@ -81,142 +74,46 @@ class ViuIE(ViuBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, note='Downloading video page') + video_data = self._call_api( + 'clip/load', video_id, 'Downloading video data', query={ + 'appid': 'viu_desktop', + 'fmt': 'json', + 'id': video_id + })['item'][0] - mobj = re.search( - r'
]+?>(?P.+?)
', webpage, flags=re.DOTALL) + title = video_data['title'] - if mobj: - raise ExtractorError(clean_html(mobj.group('err')), expected=True) - - config_js_url = self._search_regex( - r'src=(["\'])(?P.+?/js/config\.js)(?:\?.+?)?\1', webpage, 'config_js', - group='api_url', default=None) - - if not config_js_url: - # content is from ID, IN, MY - video_info = self._download_json( - 'https://www.viu.com/api/clip/load', video_id, - headers={'X-VIU-AUTH': self._get_viu_auth(video_id)}, - query={'appid': 'viu_desktop', 'fmt': 'json', 'id': video_id}, - note='Downloading video info').get('response', {}).get('item', [{}])[0] - - formats = self._extract_m3u8_formats( - video_info['href'], video_id, 'mp4', - m3u8_id='hls', fatal=False) - self._sort_formats(formats) - - subtitles = {} - for key, value in list(video_info.items()): - mobj = re.match(r'^subtitle_(?P[^_]+?)_(?P(vtt|srt))', key) - if not mobj: - continue - if not subtitles.get(mobj.group('lang')): - subtitles[mobj.group('lang')] = [] - subtitles[mobj.group('lang')].append( - {'url': value, 'ext': mobj.group('ext')}) - - title = '%s - Episode %s' % (video_info['moviealbumshowname'], - video_info.get('episodeno')) - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) - series = video_info.get('moviealbumshowname') - episode_title = video_info.get('title') - episode_num = int_or_none(video_info.get('episodeno')) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'series': series, - 'episode': episode_title, - 'episode_number': episode_num, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } - - # content from HK, SG - config_js = self._download_webpage( - 'http://www.viu.com' + config_js_url, video_id, note='Downloading config js') - - # try to strip away commented code which contains test urls - config_js = re.sub(r'^//.*?$', '', config_js, flags=re.MULTILINE) - config_js = re.sub(r'/\*.*?\*/', '', config_js, flags=re.DOTALL) - - # Slightly different api_url between HK and SG config.js - # http://www.viu.com/ott/hk/v1/js/config.js => '//www.viu.com/ott/hk/index.php?r=' - # http://www.viu.com/ott/sg/v1/js/config.js => 'http://www.viu.com/ott/sg/index.php?r=' - api_url = self._proto_relative_url( - self._search_regex( - r'var\s+api_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?r=)\1', - config_js, 'api_url', group='api_url'), scheme='http:') - - stream_info_url = self._proto_relative_url( - self._search_regex( - r'var\s+video_url\s*=\s*(["\'])(?P(?:https?:)?//.+?\?ccs_product_id=)\1', - config_js, 'video_url', group='video_url'), scheme='http:') - - if url.startswith('https://'): - api_url = re.sub('^http://', 'https://', api_url) - - video_info = self._download_json( - api_url + 'vod/ajax-detail&platform_flag_label=web&product_id=' + video_id, - video_id, note='Downloading video info').get('data', {}) - - ccs_product_id = video_info.get('current_product', {}).get('ccs_product_id') - - if not ccs_product_id: - raise ExtractorError('This video is not available in your region.', expected=True) - - stream_info = self._download_json( - stream_info_url + ccs_product_id, video_id, - note='Downloading stream info').get('data', {}).get('stream', {}) - - formats = [] - for vid_format, stream_url in stream_info.get('url', {}).items(): - br = int_or_none(self._search_regex( - r's(?P
[0-9]+)p', vid_format, 'bitrate', group='br')) - formats.append({ - 'format_id': vid_format, - 'url': stream_url, - 'vbr': br, - 'ext': 'mp4', - 'filesize': stream_info.get('size', {}).get(vid_format) - }) + m3u8_url = None + url_path = video_data.get('urlpathd') or video_data.get('urlpath') + tdirforwhole = video_data.get('tdirforwhole') + hls_file = video_data.get('hlsfile') + if url_path and tdirforwhole and hls_file: + m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) + else: + m3u8_url = re.sub( + r'(/hlsc_)[a-z]+(\d+\.m3u8)', + r'\1whe\2', video_data['href']) + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') self._sort_formats(formats) subtitles = {} - if video_info.get('current_product', {}).get('subtitle', []): - for sub in video_info.get('current_product', {}).get('subtitle', []): - subtitles[sub.get('name')] = [{ - 'url': sub.get('url'), - 'ext': 'srt', - }] - - episode_info = next( - p for p in video_info.get('series', {}).get('product', []) - if p.get('product_id') == video_id) - - title = '%s - Episode %s' % (video_info.get('series', {}).get('name'), - episode_info.get('number')) - description = episode_info.get('description') - thumbnail = episode_info.get('cover_image_url') - duration = int_or_none(stream_info.get('duration')) - series = video_info.get('series', {}).get('name') - episode_title = episode_info.get('synopsis') - episode_num = int_or_none(episode_info.get('number')) + for key, value in video_data.items(): + mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key) + if not mobj: + continue + subtitles.setdefault(mobj.group('lang'), []).append({ + 'url': value, + 'ext': mobj.group('ext') + }) return { 'id': video_id, 'title': title, - 'description': description, - 'series': series, - 'episode': episode_title, - 'episode_number': episode_num, - 'duration': duration, - 'thumbnail': thumbnail, + 'description': video_data.get('description'), + 'series': video_data.get('moviealbumshowname'), + 'episode': title, + 'episode_number': int_or_none(video_data.get('episodeno')), + 'duration': int_or_none(video_data.get('duration')), 'formats': formats, 'subtitles': subtitles, } @@ -224,11 +121,11 @@ class ViuIE(ViuBaseIE): class ViuPlaylistIE(ViuBaseIE): IE_NAME = 'viu:playlist' - _VALID_URL = r'https?://www\.viu\.com/.+/listing/(?Pplaylist\-[0-9]+)' + _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P\d+)' _TEST = { 'url': 'https://www.viu.com/en/listing/playlist-22461380', 'info_dict': { - 'id': 'playlist-22461380', + 'id': '22461380', 'title': 'The Good Wife', }, 'playlist_count': 16, @@ -237,17 +134,112 @@ class ViuPlaylistIE(ViuBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - playlist_info = self._download_json( - 'https://www.viu.com/api/container/load', playlist_id, - headers={'X-VIU-AUTH': self._get_viu_auth(playlist_id)}, - query={'appid': 'viu_desktop', 'fmt': 'json', 'id': playlist_id}, - note='Downloading playlist info').get('response', {}).get('container') + playlist_data = self._call_api( + 'container/load', playlist_id, + 'Downloading playlist info', query={ + 'appid': 'viu_desktop', + 'fmt': 'json', + 'id': 'playlist-' + playlist_id + })['container'] - name = playlist_info['title'] - entries = [ - self.url_result( - 'https://www.viu.com/en/media/%s' % item['id'], - 'Viu', item['id']) - for item in playlist_info['item'] if item['id']] + entries = [] + for item in playlist_data.get('item', []): + item_id = item.get('id') + if not item_id: + continue + item_id = compat_str(item_id) + entries.append(self.url_result( + 'viu:' + item_id, 'Viu', item_id)) - return self.playlist_result(entries, playlist_id, name) + return self.playlist_result( + entries, playlist_id, playlist_data.get('title')) + + +class ViuOTTIE(InfoExtractor): + IE_NAME = 'viu:ott' + _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I', + 'info_dict': { + 'id': '3421', + 'ext': 'mp4', + 'title': 'A New Beginning', + 'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Singapore', + }, { + 'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90', + 'info_dict': { + 'id': '7123', + 'ext': 'mp4', + 'title': '這就是我的生活之道', + 'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + 'skip': 'Geo-restricted to Hong Kong', + }] + + def _real_extract(self, url): + country_code, video_id = re.match(self._VALID_URL, url).groups() + + product_data = self._download_json( + 'http://www.viu.com/ott/%s/index.php' % country_code, video_id, + 'Downloading video info', query={ + 'r': 'vod/ajax-detail', + 'platform_flag_label': 'web', + 'product_id': video_id, + })['data'] + + video_data = product_data.get('current_product') + if not video_data: + raise ExtractorError('This video is not available in your region.', expected=True) + + stream_data = self._download_json( + 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, + video_id, 'Downloading stream info', query={ + 'ccs_product_id': video_data['ccs_product_id'], + })['data']['stream'] + + stream_sizes = stream_data.get('size', {}) + formats = [] + for vid_format, stream_url in stream_data.get('url', {}).items(): + height = int_or_none(self._search_regex( + r's(\d+)p', vid_format, 'height', default=None)) + formats.append({ + 'format_id': vid_format, + 'url': stream_url, + 'height': height, + 'ext': 'mp4', + 'filesize': int_or_none(stream_sizes.get(vid_format)) + }) + self._sort_formats(formats) + + subtitles = {} + for sub in video_data.get('subtitle', []): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('name'), []).append({ + 'url': sub_url, + 'ext': 'srt', + }) + + title = video_data['synopsis'].strip() + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'series': product_data.get('series', {}).get('name'), + 'episode': title, + 'episode_number': int_or_none(video_data.get('number')), + 'duration': int_or_none(stream_data.get('duration')), + 'thumbnail': video_data.get('cover_image_url'), + 'formats': formats, + 'subtitles': subtitles, + } From ed7b333fbfe7bfb0ac0986c6be15d223341a7ac7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 18:24:01 +0100 Subject: [PATCH 52/81] [viu] extract supported hls manifest --- youtube_dl/extractor/viu.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index 1a81b4845..d4861a2fe 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -86,13 +86,17 @@ class ViuIE(ViuBaseIE): m3u8_url = None url_path = video_data.get('urlpathd') or video_data.get('urlpath') tdirforwhole = video_data.get('tdirforwhole') - hls_file = video_data.get('hlsfile') + # #EXT-X-BYTERANGE is not supported by native hls downloader + # and ffmpeg (#10955) + # hls_file = video_data.get('hlsfile') + hls_file = video_data.get('jwhlsfile') if url_path and tdirforwhole and hls_file: m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) else: - m3u8_url = re.sub( - r'(/hlsc_)[a-z]+(\d+\.m3u8)', - r'\1whe\2', video_data['href']) + # m3u8_url = re.sub( + # r'(/hlsc_)[a-z]+(\d+\.m3u8)', + # r'\1whe\2', video_data['href']) + m3u8_url = video_data['href'] formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') self._sort_formats(formats) From 954529c10fd847d58374dda2a3661f0df2c1d5f6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 18 Dec 2016 21:39:59 +0100 Subject: [PATCH 53/81] [brightcove:new] skip widevine classic videos --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 945cf19e8..ac5f32541 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -548,7 +548,7 @@ class BrightcoveNewIE(InfoExtractor): container = source.get('container') ext = mimetype2ext(source.get('type')) src = source.get('src') - if ext == 'ism': + if ext == 'ism' or container == 'WVM': continue elif ext == 'm3u8' or container == 'M2TS': if not src: From 5aaf012a4eacc50bb5b131f6c26027e391fc379a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 19 Dec 2016 16:27:12 +0100 Subject: [PATCH 54/81] [pbs] fix extraction for geo restricted videos(#7095) --- youtube_dl/extractor/pbs.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index b490ef74c..f1c0cd068 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -350,6 +350,15 @@ class PBSIE(InfoExtractor): 410: 'This video has expired and is no longer available for online streaming.', } + def _real_initialize(self): + cookie = (self._download_json( + 'http://localization.services.pbs.org/localize/auto/cookie/', + None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie') + if cookie: + station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station') + if station: + self._set_cookie('.pbs.org', 'pbsol.station', station) + def _extract_webpage(self, url): mobj = re.match(self._VALID_URL, url) @@ -476,7 +485,8 @@ class PBSIE(InfoExtractor): redirect_info = self._download_json( '%s?format=json' % redirect['url'], display_id, - 'Downloading %s video url info' % (redirect_id or num)) + 'Downloading %s video url info' % (redirect_id or num), + headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': raise ExtractorError( From c80db5d3988b31a1f17b2e894099ff16b6b777c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Dec 2016 23:47:45 +0700 Subject: [PATCH 55/81] [nrktv:direkte] Add support for live streams (#11488) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nrk.py | 32 +++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d75ea0c92..e44cf5d85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -655,6 +655,7 @@ from .nrk import ( NRKPlaylistIE, NRKSkoleIE, NRKTVIE, + NRKTVDirekteIE, ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c89aac63e..776c40b94 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor): entries = [] + conviva = data.get('convivaStatistics') or {} + live = (data.get('mediaElementType') == 'Live' or + data.get('isLive') is True or conviva.get('isLive')) + + def make_title(t): + return self._live_title(t) if live else t + media_assets = data.get('mediaAssets') if media_assets and isinstance(media_assets, list): def video_id_and_title(idx): @@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor): if not formats: continue self._sort_formats(formats) + + # Some f4m streams may not work with hdcore in fragments' URLs + for f in formats: + extra_param = f.get('extra_param_to_segment_url') + if extra_param and 'hdcore' in extra_param: + del f['extra_param_to_segment_url'] + entry_id, entry_title = video_id_and_title(num) duration = parse_duration(asset.get('duration')) subtitles = {} @@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor): }) entries.append({ 'id': asset.get('carrierId') or entry_id, - 'title': entry_title, + 'title': make_title(entry_title), 'duration': duration, 'subtitles': subtitles, 'formats': formats, @@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor): duration = parse_duration(data.get('duration')) entries = [{ 'id': video_id, - 'title': title, + 'title': make_title(title), 'duration': duration, 'formats': formats, }] @@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor): message_type, message_type)), expected=True) - conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') @@ -260,6 +273,19 @@ class NRKTVIE(NRKBaseIE): }] +class NRKTVDirekteIE(NRKTVIE): + IE_DESC = 'NRK TV Direkte and NRK Radio Direkte' + _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'https://tv.nrk.no/direkte/nrk1', + 'only_matching': True, + }, { + 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus', + 'only_matching': True, + }] + + class NRKPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P[^/]+)' From 8ab7e6c4cc93d998a39fda9733587b58f5252999 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 20 Dec 2016 18:45:52 +0800 Subject: [PATCH 56/81] [kaltura] Improve widget ID extraction (closes #11480) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 14 ++++++++++++++ youtube_dl/extractor/kaltura.py | 5 ++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3ed5da7fe..da0d37f80 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [kaltura] Fix wrong widget ID in some cases (#11480) + + version 2016.12.18 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3949c8bf7..a6a5f193e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -972,6 +972,20 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + { + # Kaltura embedded, some fileExt broken (#11480) + 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics', + 'info_dict': { + 'id': '1_sgtvehim', + 'ext': 'mp4', + 'title': 'Our "Standard Models" of particle physics and cosmology', + 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861', + 'timestamp': 1321158993, + 'upload_date': '20111113', + 'uploader_id': 'kps1', + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 91bc3a0a7..c0ddad6f9 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor): (?P['\"])wid(?P=q1)\s*:\s* (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4), + (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) or re.search( r'''(?xs) @@ -266,6 +266,9 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue + if not f.get('fileExt') and f.get('containerFormat') == 'qt': + # QT indicates QuickTime; some videos have broken fileExt + f['fileExt'] = 'mov' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g From 7fe1592073c0a775dcd3ea7fcb400fbcfad624f7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:23:16 +0100 Subject: [PATCH 57/81] [common] fix dash codec information for mixed videos and fragment url construction(#11490) --- test/test_utils.py | 1 + youtube_dl/extractor/common.py | 11 +++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 1cdac82fc..3092db5c1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -461,6 +461,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', None), None) self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 40f3e2323..58da27025 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -59,6 +59,7 @@ from ..utils import ( parse_m3u8_attributes, extract_attributes, parse_codecs, + urljoin, ) @@ -1631,11 +1632,6 @@ class InfoExtractor(object): extract_Initialization(segment_template) return ms_info - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1685,12 +1681,11 @@ class InfoExtractor(object): 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), - 'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'), - 'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, 'format_note': 'DASH %s' % content_type, 'filesize': filesize, } + f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: @@ -1774,7 +1769,7 @@ class InfoExtractor(object): f['fragments'].append({'url': initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) for fragment in f['fragments']: - fragment['url'] = combine_url(base_url, fragment['url']) + fragment['url'] = urljoin(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From d8c507c9e2879b16a429cdd7bded5d308a4cdb10 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:25:05 +0100 Subject: [PATCH 58/81] [vimeo] fix extraction for hls formats and add support for dash formats(closes #11490) --- youtube_dl/extractor/vimeo.py | 39 ++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 51c69a80c..c35cafcc6 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -92,29 +92,30 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _vimeo_sort_formats(self, formats): # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) def _parse_config(self, config, video_id): + video_data = config['video'] # Extract title - video_title = config['video']['title'] + video_title = video_data['title'] # Extract uploader, uploader_url and uploader_id - video_uploader = config['video'].get('owner', {}).get('name') - video_uploader_url = config['video'].get('owner', {}).get('url') + video_uploader = video_data.get('owner', {}).get('name') + video_uploader_url = video_data.get('owner', {}).get('url') video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None # Extract video thumbnail - video_thumbnail = config['video'].get('thumbnail') + video_thumbnail = video_data.get('thumbnail') if video_thumbnail is None: - video_thumbs = config['video'].get('thumbs') + video_thumbs = video_data.get('thumbs') if video_thumbs and isinstance(video_thumbs, dict): _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video duration - video_duration = int_or_none(config['video'].get('duration')) + video_duration = int_or_none(video_data.get('duration')) formats = [] - config_files = config['video'].get('files') or config['request'].get('files', {}) + config_files = video_data.get('files') or config['request'].get('files', {}) for f in config_files.get('progressive', []): video_url = f.get('url') if not video_url: @@ -127,10 +128,24 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'fps': int_or_none(f.get('fps')), 'tbr': int_or_none(f.get('bitrate')), }) - m3u8_url = config_files.get('hls', {}).get('url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + for files_type in ('hls', 'dash'): + for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): + manifest_url = cdn_data.get('url') + if not manifest_url: + continue + format_id = '%s-%s' % (files_type, cdn_name) + if files_type == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', + 'm3u8_native', m3u8_id=format_id, + note='Downloading %s m3u8 information' % cdn_name, + fatal=False)) + elif files_type == 'dash': + formats.extend(self._extract_mpd_formats( + manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id, + 'Downloading %s MPD information' % cdn_name, + fatal=False)) subtitles = {} text_tracks = config['request'].get('text_tracks') From b1c357975d554920720db971ea4695259218d8d8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:34:46 +0100 Subject: [PATCH 59/81] [piksel] Add new extractor(closes #11246) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 6 ++ youtube_dl/extractor/piksel.py | 106 +++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 youtube_dl/extractor/piksel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e44cf5d85..11dcaf668 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -699,6 +699,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE from .playfm import PlayFMIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a6a5f193e..87daf83f8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -75,6 +75,7 @@ from .facebook import FacebookIE from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE +from .piksel import PikselIE class GenericIE(InfoExtractor): @@ -2225,6 +2226,11 @@ class GenericIE(InfoExtractor): if arkena_url: return self.url_result(arkena_url, ArkenaIE.ie_key()) + # Look for Piksel embeds + piksel_url = PikselIE._extract_url(webpage) + if piksel_url: + return self.url_result(piksel_url, PikselIE.ie_key()) + # Look for Limelight embeds mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage) if mobj: diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py new file mode 100644 index 000000000..d44edcdfb --- /dev/null +++ b/youtube_dl/extractor/piksel.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + dict_get, + int_or_none, + unescapeHTML, + parse_iso8601, +) + + +class PikselIE(InfoExtractor): + _VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)' + _TEST = { + 'url': 'http://player.piksel.com/v/nv60p12f', + 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'info_dict': { + 'id': 'nv60p12f', + 'ext': 'mp4', + 'title': 'فن الحياة - الحلقة 1', + 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', + 'timestamp': 1465231790, + 'upload_date': '20160606', + } + } + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src=["\'](?P(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + app_token = self._search_regex( + r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token') + response = self._download_json( + 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, + video_id, query={ + 'v': video_id + })['response'] + failure = response.get('failure') + if failure: + raise ExtractorError(response['failure']['reason'], expected=True) + video_data = response['WsProgramResponse']['program']['asset'] + title = video_data['title'] + + formats = [] + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + asset_type = dict_get(video_data, ['assetType', 'asset_type']) + for asset_file in video_data.get('assetFiles', []): + # TODO: extract rtmp formats + http_url = asset_file.get('http_url') + if not http_url: + continue + tbr = None + vbr = int_or_none(asset_file.get('videoBitrate'), 1024) + abr = int_or_none(asset_file.get('audioBitrate'), 1024) + if asset_type == 'video': + tbr = vbr + abr + elif asset_type == 'audio': + tbr = abr + + format_id = ['http'] + if tbr: + format_id.append(compat_str(tbr)) + + formats.append({ + 'format_id': '-'.join(format_id), + 'url': unescapeHTML(http_url), + 'vbr': vbr, + 'abr': abr, + 'width': int_or_none(asset_file.get('videoWidth')), + 'height': int_or_none(asset_file.get('videoHeight')), + 'filesize': int_or_none(asset_file.get('filesize')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailUrl'), + 'timestamp': parse_iso8601(video_data.get('dateadd')), + 'formats': formats, + } From f59d1146c0ca523ec03a4c7df8987e82ee5054b2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:52:46 +0100 Subject: [PATCH 60/81] [uktvplay] Add new extractor(closes #11027) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/uktvplay.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/uktvplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 11dcaf668..fcfe87f6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1024,6 +1024,7 @@ from .udemy import ( UdemyCourseIE ) from .udn import UDNEmbedIE +from .uktvplay import UKTVPlayIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .uol import UOLIE diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dl/extractor/uktvplay.py new file mode 100644 index 000000000..2137502a1 --- /dev/null +++ b/youtube_dl/extractor/uktvplay.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class UKTVPlayIE(InfoExtractor): + _VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P\d+)' + _TEST = { + 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001', + 'md5': '', + 'info_dict': { + 'id': '2117008346001', + 'ext': 'mp4', + 'title': 'Pincers', + 'description': 'Pincers', + 'uploader_id': '1242911124001', + 'upload_date': '20130124', + 'timestamp': 1359049267, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download MPD manifest'] + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % video_id, + 'BrightcoveNew', video_id) From 3d6761ba92b96934bdbf1792dfacd0368d7b236c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 21:51:11 +0700 Subject: [PATCH 61/81] [vbox7] Fix extraction (closes #11494) --- youtube_dl/extractor/vbox7.py | 75 ++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index a1e0851b7..7fb7574ad 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -4,11 +4,22 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import urlencode_postdata +from ..utils import ExtractorError class Vbox7IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P[\da-fA-F]+)' + _VALID_URL = r'''(?x) + https?:// + (?:[^/]+\.)?vbox7\.com/ + (?: + play:| + (?: + emb/external\.php| + player/ext\.swf + )\?.*?\bvid= + ) + (?P[\da-fA-F]+) + ''' _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor): 'id': '0946fff23c', 'ext': 'mp4', 'title': 'Борисов: Притеснен съм за бъдещето на България', + 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1470982814, + 'upload_date': '20160812', + 'uploader': 'zdraveibulgaria', + }, + 'params': { + 'proxy': '127.0.0.1:8118', }, }, { 'url': 'http://vbox7.com/play:249bb972c2', @@ -29,6 +48,9 @@ class Vbox7IE(InfoExtractor): }, { 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', 'only_matching': True, + }, { + 'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1', + 'only_matching': True, }] @staticmethod @@ -42,33 +64,40 @@ class Vbox7IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://vbox7.com/play:%s' % video_id, video_id) + response = self._download_json( + 'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id, + video_id) - title = self._html_search_regex( - r'(.+?)', webpage, 'title').split('/')[0].strip() + if 'error' in response: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']), expected=True) - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P.+?.mp4.*?)\1', - webpage, 'video url', default=None, group='url') + video = response['options'] - thumbnail_url = self._og_search_thumbnail(webpage) - - if not video_url: - info_response = self._download_webpage( - 'http://vbox7.com/play/magare.do', video_id, - 'Downloading info webpage', - data=urlencode_postdata({'as3': '1', 'vid': video_id}), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - final_url, thumbnail_url = map( - lambda x: x.split('=')[1], info_response.split('&')) + title = video['title'] + video_url = video['src'] if '/na.mp4' in video_url: self.raise_geo_restricted() - return { + uploader = video.get('uploader') + + webpage = self._download_webpage( + 'http://vbox7.com/play:%s' % video_id, video_id, fatal=None) + + info = {} + + if webpage: + info = self._search_json_ld( + webpage.replace('"/*@context"', '"@context"'), video_id) + + info.update({ 'id': video_id, - 'url': self._proto_relative_url(video_url, 'http:'), 'title': title, - 'thumbnail': thumbnail_url, - } + 'url': video_url, + 'uploader': uploader, + 'thumbnail': self._proto_relative_url( + info.get('thumbnail') or self._og_search_thumbnail(webpage), + 'http:'), + }) + return info From 1f6a79b0af356a800fd878ef4e8fb180071fa5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 22:36:06 +0700 Subject: [PATCH 62/81] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index da0d37f80..25eda233d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,21 @@ version +Core +* [extractor/common] Improve fragment URL construction for DASH media +* [extractor/common] Fix codec information extraction for mixed audio/video + DASH media (#11490) + Extractors +* [vbox7] Fix extraction (#11494) ++ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027) ++ [piksel] Add support for player.piksel.com (#11246) ++ [vimeo] Add support for DASH formats +* [vimeo] Fix extraction for HLS formats (#11490) * [kaltura] Fix wrong widget ID in some cases (#11480) ++ [nrktv:direkte] Add support for live streams (#11488) +* [pbs] Fix extraction for geo restricted videos (#7095) +* [brightcove:new] Skip widevine classic videos ++ [viu] Add support for viu.com (#10607, #11329) version 2016.12.18 From 90352a80412eabf639046348c0acd5669005120f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 20 Dec 2016 22:39:39 +0700 Subject: [PATCH 63/81] release 2016.12.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 6 ++++++ youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fffdefa45..5f95a149e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.18 +[debug] youtube-dl version 2016.12.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 25eda233d..c14bce7d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.20 Core * [extractor/common] Improve fragment URL construction for DASH media diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b55044520..955aa5c68 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -517,6 +517,7 @@ - **NRKPlaylist** - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio + - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **ntv.ru** - **Nuvid** - **NYTimes** @@ -551,6 +552,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Piksel** - **Pinkbike** - **Pladform** - **play.fm** @@ -803,6 +805,7 @@ - **udemy** - **udemy:course** - **UDNEmbed**: 聯合影音 + - **UKTVPlay** - **Unistra** - **uol.com.br** - **uplynk** @@ -867,6 +870,9 @@ - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** + - **Viu** + - **viu:ott** + - **viu:playlist** - **Vivo**: vivo.sx - **vk**: VK - **vk:uservideos**: VK - User's Videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f06d4b52..a0c5c35da 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.18' +__version__ = '2016.12.20' From e029c43bd43ddde448b150b1e0f226e4dd8c9b90 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 18:22:57 +0100 Subject: [PATCH 64/81] [laola1] add support for another extraction scenario(closes #11460) --- youtube_dl/extractor/laola1tv.py | 56 +++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 37e38dba0..3190b187c 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -9,18 +9,41 @@ from ..utils import ( xpath_element, xpath_text, urljoin, + update_url_query, ) class Laola1TvEmbedIE(InfoExtractor): + IE_NAME = 'laola1tv:embed' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P\d+)' + _TEST = { + # flashvars.premium = "false"; + 'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024', + 'info_dict': { + 'id': '708065', + 'ext': 'mp4', + 'title': 'MA Long CHN - FAN Zhendong CHN', + 'uploader': 'ITTF - International Table Tennis Federation', + 'upload_date': '20161211', + }, + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) flash_vars = self._search_regex( r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars') - get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x) + + def get_flashvar(x, *args, **kwargs): + flash_var = self._search_regex( + r'%s\s*:\s*"([^"]+)"' % x, + flash_vars, x, default=None) + if not flash_var: + flash_var = self._search_regex([ + r'flashvars\.%s\s*=\s*"([^"]+)"' % x, + r'%s\s*=\s*"([^"]+)"' % x], + webpage, x, *args, **kwargs) + return flash_var hd_doc = self._download_xml( 'http://www.laola1.tv/server/hd_video.php', video_id, query={ @@ -34,16 +57,26 @@ class Laola1TvEmbedIE(InfoExtractor): _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) title = _v('title', fatal=True) - data_abo = urlencode_postdata( - dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) - token_url = self._download_json( - 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', - video_id, query={ - 'videoId': _v('id'), - 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), - 'label': _v('label'), - 'area': _v('area'), - }, data=data_abo)['data']['stream-access'][0] + token_url = None + premium = get_flashvar('premium', default=None) + if premium: + token_url = update_url_query( + _v('url', fatal=True), { + 'timestamp': get_flashvar('timestamp'), + 'auth': get_flashvar('auth'), + }) + else: + data_abo = urlencode_postdata( + dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))) + token_url = self._download_json( + 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', + video_id, query={ + 'videoId': _v('id'), + 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'), + 'label': _v('label'), + 'area': _v('area'), + }, data=data_abo)['data']['stream-access'][0] + token_doc = self._download_xml( token_url, video_id, 'Downloading token', headers=self.geo_verification_headers()) @@ -75,6 +108,7 @@ class Laola1TvEmbedIE(InfoExtractor): class Laola1TvIE(InfoExtractor): + IE_NAME = 'laola1tv' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', From bfa1073e113cb7fa8a362112d4eae6dede197efa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 19:49:45 +0100 Subject: [PATCH 65/81] [uplynk] force downloading using hls native downloader(closes #11496) --- youtube_dl/downloader/hls.py | 3 +++ youtube_dl/extractor/uplynk.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7373ec05f..4989abce1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -65,6 +65,9 @@ class HlsFD(FragmentFD): s = manifest.decode('utf-8', 'ignore') if not self.can_download(s, info_dict): + if info_dict.get('extra_param_to_segment_url'): + self.report_error('pycrypto not found. Please install it.') + return False self.report_warning( 'hlsnative has detected features it does not support, ' 'extraction will be delegated to ffmpeg') diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index 2cd22cf8a..f06bf5b12 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -30,7 +30,9 @@ class UplynkIE(InfoExtractor): def _extract_uplynk_info(self, uplynk_content_url): path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() display_id = video_id or external_id - formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') + formats = self._extract_m3u8_formats( + 'http://content.uplynk.com/%s.m3u8' % path, + display_id, 'mp4', 'm3u8_native') if session_id: for f in formats: f['extra_param_to_segment_url'] = 'pbs=' + session_id From ae806db6286dc76de6ee53f8f7351ed99bf29bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 21 Dec 2016 22:39:05 +0700 Subject: [PATCH 66/81] [vbox7] Skip malformed JSON-LD (closes #11501) --- youtube_dl/extractor/vbox7.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 7fb7574ad..429893e38 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -89,7 +89,8 @@ class Vbox7IE(InfoExtractor): if webpage: info = self._search_json_ld( - webpage.replace('"/*@context"', '"@context"'), video_id) + webpage.replace('"/*@context"', '"@context"'), video_id, + fatal=False) info.update({ 'id': video_id, From 9c5b5f211535a832af4437930afcb6f4b64748c4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 18:45:01 +0100 Subject: [PATCH 67/81] [rtl2] extract more formats and metadata --- youtube_dl/extractor/rtl2.py | 57 +++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index cb4ee8803..721ee733c 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor +from ..utils import int_or_none class RTL2IE(InfoExtractor): @@ -13,7 +15,7 @@ class RTL2IE(InfoExtractor): 'id': 'folge-203-0', 'ext': 'f4v', 'title': 'GRIP sucht den Sommerkönig', - 'description': 'Matthias, Det und Helge treten gegeneinander an.' + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' }, 'params': { # rtmp download @@ -25,7 +27,7 @@ class RTL2IE(InfoExtractor): 'id': '21040-anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', - 'description': 'Anna ist Alex\' Tochter bei Köln 50667.' + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' }, 'params': { # rtmp download @@ -52,34 +54,47 @@ class RTL2IE(InfoExtractor): r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') vivi_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') - info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id - info = self._download_json(info_url, video_id) + info = self._download_json( + 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php', + video_id, query={ + 'vico_id': vico_id, + 'vivi_id': vivi_id, + }) video_info = info['video'] title = video_info['titel'] - description = video_info.get('beschreibung') - thumbnail = video_info.get('image') - download_url = video_info['streamurl'] - download_url = download_url.replace('\\', '') - stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL') - rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + formats = [] + + rtmp_url = video_info.get('streamurl') + if rtmp_url: + rtmp_url = rtmp_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') + rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': stream_url, + 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + 'preference': 1, + }) + + m3u8_url = video_info.get('streamurl_hls') + if m3u8_url: + formats.extend(self._extract_akamai_formats(m3u8_url, video_id)) - formats = [{ - 'url': download_url, - 'play_path': stream_url, - 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', - 'page_url': url, - 'flash_version': 'LNX 11,2,202,429', - 'rtmp_conn': rtmp_conn, - 'no_resume': True, - }] self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, - 'description': description, + 'thumbnail': video_info.get('image'), + 'description': video_info.get('beschreibung'), + 'duration': int_or_none(video_info.get('duration')), 'formats': formats, } From f120646f044db8c93976afcfcc7f76f221cb0241 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 20:50:10 +0100 Subject: [PATCH 68/81] [viu] pass geo verification headers to auth request --- youtube_dl/extractor/viu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index d4861a2fe..3fd889c8e 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -25,7 +25,7 @@ class ViuBaseIE(InfoExtractor): 'userid': 'guest', 'useridtype': 'guest', 'ver': '1.0' - }) + }, headers=self.geo_verification_headers()) self._auth_token = viu_auth_res.info()['X-VIU-AUTH'] def _call_api(self, path, *args, **kwargs): From f5a723a78a2d4e395fca89e5b3bed53334b9385e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 21 Dec 2016 20:59:03 +0100 Subject: [PATCH 69/81] [theplatform] pass geo verification headers to smil request(closes #10146) --- youtube_dl/extractor/theplatform.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index cfbf7f4e1..0405bd6b0 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -33,7 +33,9 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformBaseIE(OnceIE): def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): - meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'}) + meta = self._download_xml( + smil_url, video_id, note=note, query={'format': 'SMIL'}, + headers=self.geo_verification_headers()) error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') if error_element is not None and error_element.attrib['src'].startswith( 'http://link.theplatform.com/s/errorFiles/Unavailable.'): From a07588369fd330c747464d9ea75ddb861e322a2d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 22 Dec 2016 10:02:56 +0100 Subject: [PATCH 70/81] [common] improve detection for video only formats and m3u8 manifest(fixes #11507) --- youtube_dl/extractor/common.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 58da27025..07d101aef 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1225,7 +1225,7 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] - audio_groups = set() + audio_in_video_stream = {} last_info = {} last_media = {} for line in m3u8_doc.splitlines(): @@ -1235,10 +1235,11 @@ class InfoExtractor(object): media = parse_m3u8_attributes(line) media_type = media.get('TYPE') if media_type in ('VIDEO', 'AUDIO'): + group_id = media.get('GROUP-ID') media_url = media.get('URI') if media_url: format_id = [] - for v in (media.get('GROUP-ID'), media.get('NAME')): + for v in (group_id, media.get('NAME')): if v: format_id.append(v) f = { @@ -1251,12 +1252,15 @@ class InfoExtractor(object): } if media_type == 'AUDIO': f['vcodec'] = 'none' - audio_groups.add(media['GROUP-ID']) + if group_id and not audio_in_video_stream.get(group_id): + audio_in_video_stream[group_id] = False formats.append(f) else: # When there is no URI in EXT-X-MEDIA let this tag's # data be used by regular URI lines below last_media = media + if media_type == 'AUDIO' and group_id: + audio_in_video_stream[group_id] = True elif line.startswith('#') or not line.strip(): continue else: @@ -1300,7 +1304,7 @@ class InfoExtractor(object): 'abr': abr, }) f.update(parse_codecs(last_info.get('CODECS'))) - if last_info.get('AUDIO') in audio_groups: + if audio_in_video_stream.get(last_info.get('AUDIO')) is False: # TODO: update acodec for for audio only formats with the same GROUP-ID f['acodec'] = 'none' formats.append(f) From ab3091feda7da5d04f1757dd0a2e26aa919086b5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 22 Dec 2016 10:02:56 +0100 Subject: [PATCH 71/81] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index c14bce7d8..6a08ce230 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +* [extractor/common] Improve detection of video-only formats in m3u8 + manifests (#11507) + +Extractors ++ [theplatform] Pass geo verification headers to SMIL request (#10146) ++ [viu] Pass geo verification headers to auth request +* [rtl2] Extract more formats and metadata +* [vbox7] Skip malformed JSON-LD (#11501) +* [uplynk] Force downloading using native HLS downloader (#11496) ++ [laola1] Add support for another extraction scenario (#11460) + + version 2016.12.20 Core From 5e77c0b58eb4bae0804f639fa509d91b29a273ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 22 Dec 2016 22:52:54 +0700 Subject: [PATCH 72/81] release 2016.12.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5f95a149e..693d787e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.12.20 +[debug] youtube-dl version 2016.12.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6a08ce230..c45441345 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.12.22 Core * [extractor/common] Improve detection of video-only formats in m3u8 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 955aa5c68..0b3d794c6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -365,8 +365,8 @@ - **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:song**: 酷我音乐 - **la7.it** - - **Laola1Tv** - - **Laola1TvEmbed** + - **laola1tv** + - **laola1tv:embed** - **LCI** - **Lcp** - **LcpPlay** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a0c5c35da..3082ebf66 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.12.20' +__version__ = '2016.12.22' From 19f37ce4b1e4251a3f53f8a5d3d0605d2526bc81 Mon Sep 17 00:00:00 2001 From: hub2git Date: Thu, 22 Dec 2016 18:25:39 -0800 Subject: [PATCH 73/81] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 840932298..e85fa1555 100644 --- a/README.md +++ b/README.md @@ -932,7 +932,7 @@ If you want to create a build of youtube-dl yourself, you'll need If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. -After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) 2. Check out the source code with: From e7ac722d6276198c8b88986f06a4e3c55366cb58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Dec 2016 22:01:22 +0700 Subject: [PATCH 74/81] [README.md] Add missing protocols to format selection section --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e85fa1555..71d37e8b0 100644 --- a/README.md +++ b/README.md @@ -638,7 +638,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native` + - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`) - `format_id`: A short description of the format Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. From 12da830993f6f42ca309037da0eea161dcca90ec Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Fri, 23 Dec 2016 23:58:09 +0100 Subject: [PATCH 75/81] [acast] Fix broken audio URL and timestamp extraction Before first bling was used now we look for the first bling with type BlingAudio. Before publishingDate was a ms unix timestamp now it is iso8601. --- youtube_dl/extractor/acast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 94ce88c83..eb8d1b669 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + parse_iso8601, OnDemandPagedList, ) @@ -22,7 +23,8 @@ class ACastIE(InfoExtractor): 'id': '57de3baa-4bb0-487e-9418-2692c1277a34', 'ext': 'mp3', 'title': '"Where Are You?": Taipei 101, Taiwan', - 'timestamp': 1196172000000, + 'timestamp': 1196172000, + 'upload_date': '20071127', 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, } @@ -35,11 +37,11 @@ class ACastIE(InfoExtractor): return { 'id': compat_str(cast_data['id']), 'display_id': display_id, - 'url': cast_data['blings'][0]['audio'], + 'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0], 'title': cast_data['name'], 'description': cast_data.get('description'), 'thumbnail': cast_data.get('image'), - 'timestamp': int_or_none(cast_data.get('publishingDate')), + 'timestamp': parse_iso8601(cast_data.get('publishingDate')), 'duration': int_or_none(cast_data.get('duration')), } From 846fd69bacfa739375922491f8ba6cee99941335 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Sat, 24 Dec 2016 11:59:43 +0100 Subject: [PATCH 76/81] [acast] Add test with multiple blings --- youtube_dl/extractor/acast.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index eb8d1b669..6dace3051 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -16,7 +16,8 @@ from ..utils import ( class ACastIE(InfoExtractor): IE_NAME = 'acast' _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)' - _TEST = { + _TESTS = [{ + # test with one bling 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan', 'md5': 'ada3de5a1e3a2a381327d749854788bb', 'info_dict': { @@ -28,7 +29,20 @@ class ACastIE(InfoExtractor): 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e', 'duration': 211, } - } + }, { + # test with multiple blings + 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', + 'md5': '55c0097badd7095f494c99a172f86501', + 'info_dict': { + 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', + 'ext': 'mp3', + 'title': '2. Raggarmordet - Röster ur det förflutna', + 'timestamp': 1477346700, + 'upload_date': '20161024', + 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', + 'duration': 2797, + } + }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() From d1cd7e0ed9a562dbee6dfcb3601fdecc4158640b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 24 Dec 2016 15:00:23 +0100 Subject: [PATCH 77/81] Credit @wader for #11521 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4a6f7e13f..9e092cccc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -190,3 +190,4 @@ John Hawkinson Rich Leeper Zhong Jianxin Thor77 +Mattias Wadman From 264e77c406a3b14f15aafcd036524cb6fe86aa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Dec 2016 22:10:54 +0700 Subject: [PATCH 78/81] [twitch] Add support for rechat messages (closes #11524) --- youtube_dl/extractor/twitch.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8de8ec65b..bbf071da3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -22,6 +22,7 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, + update_url_query, urlencode_postdata, ) @@ -279,6 +280,18 @@ class TwitchVodIE(TwitchItemBaseIE): if 't' in query: info['start_time'] = parse_duration(query['t'][0]) + if info.get('timestamp') is not None: + info['subtitles'] = { + 'rechat': [{ + 'url': update_url_query( + 'https://rechat.twitch.tv/rechat-messages', { + 'video_id': 'v%s' % item_id, + 'start': info['timestamp'], + }), + 'ext': 'json', + }], + } + return info From 53a664edf4bf713df0159e604bbc131dde5ed1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Dec 2016 22:46:27 +0700 Subject: [PATCH 79/81] [brightcove:legacy] Improve embeds detection (closes #11523) --- youtube_dl/extractor/brightcove.py | 13 ++++++++----- youtube_dl/extractor/generic.py | 24 +++++++++++++++++++++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ac5f32541..aa2923ccf 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor): """Return a list of all Brightcove URLs from the webpage """ url_m = re.search( - r']+ + content=([\'"])(?Phttps?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 + ''', webpage) if url_m: - url = unescapeHTML(url_m.group(1)) + url = unescapeHTML(url_m.group('url')) # Some sites don't add it, we can't download with this url, for example: # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ - if 'playerKey' in url or 'videoId' in url: + if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: return [url] matches = re.findall( @@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) # Change the 'videoId' and others field to '@videoPlayer' - url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) # Change bckey (used by bcove.me urls) to playerKey url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 87daf83f8..79d10a1d1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -344,10 +344,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -361,6 +361,24 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', From 4606c34e19af395a1ddd31d2941d4ccd90e5e279 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 25 Dec 2016 01:50:50 +0800 Subject: [PATCH 80/81] [extractor/common] Allow non-lang in subtitles' keys See 264e77c406a3b14f15aafcd036524cb6fe86aa20 --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 07d101aef..6fa7c334e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -189,9 +189,10 @@ class InfoExtractor(object): uploader_url: Full URL to a personal webpage of the video uploader. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format - {language: subformats}. "subformats" is a list sorted from - lower to higher preference, each element is a dictionary - with the "ext" entry and one of: + {tag: subformats}. "tag" is usually a language code, and + "subformats" is a list sorted from lower to higher + preference, each element is a dictionary with the "ext" + entry and one of: * "data": The subtitles file contents * "url": A URL pointing to the subtitles file "ext" will be calculated from URL if missing From b63005f5afb164f8660c23ab62962287eb1e1c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Dec 2016 04:02:29 +0700 Subject: [PATCH 81/81] [rtve:live] Fix extraction (closes #11529) --- youtube_dl/extractor/rtve.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 6a43b036e..746677a24 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -209,7 +209,10 @@ class RTVELiveIE(InfoExtractor): title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time) vidplayer_id = self._search_regex( - r'playerId=player([0-9]+)', webpage, 'internal video ID') + (r'playerId=player([0-9]+)', + r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', + r'data-id=["\'](\d+)'), + webpage, 'internal video ID') png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id png = self._download_webpage(png_url, video_id, 'Downloading url information') m3u8_url = _decrypt_url(png)