From 25bcd3550ee67bb521173d7a43dbc91178a11cfc Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Tue, 24 May 2016 12:13:05 +0300 Subject: [PATCH 01/14] [vlive] Address site update Changes: * Fix video params extraction * Don't make status request since status info now available on the page * Remove unneeded code * Fix test --- youtube_dl/extractor/vlive.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index a672ea9c5..147f52d45 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,7 @@ # coding: utf-8 -from __future__ import division, unicode_literals +from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( @@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor): 'info_dict': { 'id': '1326', 'ext': 'mp4', - 'title': "[V] Girl's Day's Broadcast", + 'title': "[V LIVE] Girl's Day's Broadcast", 'creator': "Girl's Day", 'view_count': int, }, @@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - # UTC+x - UTC+9 (KST) - tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone - tz_offset = -tz // 60 - 9 * 60 - self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - - status_params = self._download_json( - 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, - video_id, 'Downloading JSON status', - headers={'Referer': url.encode('utf-8')}) - status = status_params.get('status') - air_start = status_params.get('onAirStartAt', '') - is_live = status_params.get('isLive') - video_params = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + r'\bvlive\.video\.init\(([^)]+)\)', webpage, 'video params') - live_params, long_video_id, key = re.split( - r'"\s*,\s*"', video_params)[1:4] + status, _, _, live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[2:8] if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) @@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor): elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': if long_video_id and key: return self._replay(video_id, webpage, long_video_id, key) - elif is_live: - status = 'LIVE_END' else: status = 'COMING_SOON' @@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor): raise ExtractorError('Uploading for replay. Please wait...', expected=True) elif status == 'COMING_SOON': - raise ExtractorError('Coming soon! %s' % air_start, expected=True) + raise ExtractorError('Coming soon!', expected=True) elif status == 'CANCELED': raise ExtractorError('We are sorry, ' 'but the live broadcast has been canceled.', From 6f748df43ff3476e4dbd29c7464837ea63d78b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 May 2016 20:51:17 +0600 Subject: [PATCH 02/14] [eporner] Make test only_matching --- youtube_dl/extractor/eporner.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 581276694..ac5d0fe24 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -24,20 +24,10 @@ class EpornerIE(InfoExtractor): 'view_count': int, 'age_limit': 18, }, - }, - # New (May 2016) URL layout - { + }, { + # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', - 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', - 'info_dict': { - 'id': '3YRUtzMcWn0', - 'display_id': 'Star-Wars-XXX-Parody', - 'ext': 'mp4', - 'title': 'Star Wars XXX Parody', - 'duration': 361.0, - 'view_count': int, - 'age_limit': 18, - }, + 'only_matching': True, }] def _real_extract(self, url): From 0a5685b26fae0940f14cb063a6e4fc6986f9c124 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 21:41:47 +0800 Subject: [PATCH 03/14] [common] Support non-bootstraped streams in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4bfa610c1..7eb7464ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1029,8 +1029,13 @@ class InfoExtractor(object): 'base URL', default=None) if base_url: base_url = base_url.strip() + + bootstrap_info = xpath_text( + manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], + 'bootstrap info', default=None) + for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0': + if manifest_version == '2.0' or assume_f4mv2: media_url = media_el.attrib.get('href') or media_el.attrib.get('url') if not media_url: continue @@ -1050,7 +1055,7 @@ class InfoExtractor(object): formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, - 'ext': 'flv', + 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, 'width': int_or_none(media_el.attrib.get('width')), 'height': int_or_none(media_el.attrib.get('height')), From 85b0fe7d6442d4ddb056fb5a5d15e51e8a625ae7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 21:43:35 +0800 Subject: [PATCH 04/14] [playwire] Use _extract_f4m_formats Related: #9531 --- youtube_dl/extractor/playwire.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 6d138ef25..7580e4a85 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -4,9 +4,8 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, + dict_get, float_or_none, - int_or_none, ) @@ -23,6 +22,7 @@ class PlaywireIE(InfoExtractor): 'duration': 145.94, }, }, { + # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'only_matching': True, }, { @@ -48,25 +48,10 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - f4m = self._download_xml(src, video_id) - base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True) - formats = [] - for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'): - media_url = media.get('url') - if not media_url: - continue - tbr = int_or_none(media.get('bitrate')) - width = int_or_none(media.get('width')) - height = int_or_none(media.get('height')) - f = { - 'url': '%s/%s' % (base_url, media.attrib['url']), - 'tbr': tbr, - 'width': width, - 'height': height, - } - if not (tbr or width or height): - f['quality'] = 1 if '-hd.' in media_url else 0 - formats.append(f) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 self._sort_formats(formats) return { From 240b60453e1237473dfd8deff40c9dc54661668c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 21:55:43 +0800 Subject: [PATCH 05/14] [common] Support m3u8 in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7eb7464ec..b5bea5904 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,12 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, + m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1046,11 +1047,17 @@ class InfoExtractor(object): # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested # bitrate in f4m downloader - if determine_ext(manifest_url) == 'f4m': + ext = determine_ext(manifest_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', preference=preference, + m3u8_id=m3u8_id, fatal=False)) + continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), From 761052db922a525d6ccaf250f9914841c9d3d66f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 21:57:06 +0800 Subject: [PATCH 06/14] [playwire] Add the test (closed #9531) --- youtube_dl/extractor/playwire.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 7580e4a85..2ee5c5aa3 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -21,6 +21,18 @@ class PlaywireIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.png$', 'duration': 145.94, }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', @@ -48,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 5950cb1d6d8d27f7a7272895100da9652212fad6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 22:44:00 +0800 Subject: [PATCH 07/14] [utils] Support a new form of date Found in dw.com (#9475) --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d65f5e833..316a307e0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1035,6 +1035,7 @@ def unified_strdate(date_str, day_first=True): format_expressions.extend([ '%d-%m-%Y', '%d.%m.%Y', + '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', @@ -1049,6 +1050,8 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: + print(expression) + print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass @@ -1910,7 +1913,7 @@ def parse_age_limit(s): def strip_jsonp(code): return re.sub( - r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) + r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) def js_to_json(code): From ac88d2316ebef5b00cf5c94d94f01c9f7e17ce51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 22:48:47 +0800 Subject: [PATCH 08/14] [dw] Support documentaries (closes #9475) --- youtube_dl/extractor/dw.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index ae7c571bd..0f0f0b8d3 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -2,13 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + unified_strdate, +) from ..compat import compat_urlparse class DWIE(InfoExtractor): IE_NAME = 'dw' - _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P\d+)' _TESTS = [{ # video 'url': 'http://www.dw.com/en/intelligent-light/av-19112290', @@ -31,6 +34,16 @@ class DWIE(InfoExtractor): 'description': 'md5:bc9ca6e4e063361e21c920c53af12405', 'upload_date': '20160311', } + }, { + 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', + 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', + 'info_dict': { + 'id': '19274438', + 'ext': 'mp4', + 'title': 'Welcome to the 90s – Hip Hop', + 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', + 'upload_date': '20160521', + }, }] def _real_extract(self, url): @@ -38,6 +51,7 @@ class DWIE(InfoExtractor): webpage = self._download_webpage(url, media_id) hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] + media_id = hidden_inputs.get('media_id') or media_id if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( @@ -49,13 +63,20 @@ class DWIE(InfoExtractor): else: formats = [{'url': hidden_inputs['file_name']}] + upload_date = hidden_inputs.get('display_date') + if not upload_date: + upload_date = self._html_search_regex( + r']+class="date">([0-9.]+)\s*\|', webpage, + 'upload date', default=None) + upload_date = unified_strdate(upload_date) + return { 'id': media_id, 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': hidden_inputs.get('preview_image'), 'duration': int_or_none(hidden_inputs.get('file_duration')), - 'upload_date': hidden_inputs.get('display_date'), + 'upload_date': upload_date, 'formats': formats, } From 293c2556886c34d11919eb0af6760c52bd6a2632 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 26 May 2016 22:54:16 +0800 Subject: [PATCH 09/14] [utils] Remove debugging codes --- youtube_dl/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 316a307e0..cfb2d1bf5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1050,8 +1050,6 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: - print(expression) - print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass From 448bb5f333c6c4c8084e479e1035ff674e4f8fd4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 27 May 2016 00:03:03 +0800 Subject: [PATCH 10/14] [common] Fix non-bootstrapped support in f4m --- youtube_dl/extractor/common.py | 19 +++++++++++++------ youtube_dl/extractor/playwire.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b5bea5904..e53b7ad64 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,12 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, - m3u8_id=m3u8_id) + transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1036,8 +1035,16 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0' or assume_f4mv2: - media_url = media_el.attrib.get('href') or media_el.attrib.get('url') + # If is present, the specified f4m is a + # stream-level manifest, and only set-level manifests may refer to + # external resources. See section 11.4 and section 4 of F4M spec + if bootstrap_info is None: + media_url = None + # @href is introduced in 2.0, see section 11.6 of F4M spec + if manifest_version == '2.0': + media_url = media_el.attrib.get('href') + if media_url is None: + media_url = media_el.attrib.get('url') if not media_url: continue manifest_url = ( diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 2ee5c5aa3..0bc743118 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -60,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 6f8cb2421948fd128b3004fde7eebaa2463f5f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 May 2016 22:21:55 +0600 Subject: [PATCH 11/14] [tvp] Expand _VALID_URL and improve naming (Closes #9602) --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/tvp.py | 47 ++++++++++++++++-------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 05561149a..ddf62139e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -833,7 +833,10 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE -from .tvp import TvpIE, TvpSeriesIE +from .tvp import ( + TVPIE, + TVPSeriesIE, +) from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f57d609d4..a4997cb89 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,20 +6,13 @@ import re from .common import InfoExtractor -class TvpIE(InfoExtractor): - IE_NAME = 'tvp.pl' - _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P\d+)$' +class TVPIE(InfoExtractor): + IE_NAME = 'tvp' + IE_DESC = 'Telewizja Polska' + _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P\d+)' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035', - 'md5': 'cdd98303338b8a7f7abab5cd14092bf2', - 'info_dict': { - 'id': '4278035', - 'ext': 'wmv', - 'title': 'Ogniem i mieczem, odc. 2', - }, - }, { - 'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536', + 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 'info_dict': { 'id': '194536', @@ -36,12 +29,22 @@ class TvpIE(InfoExtractor): }, }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', - 'md5': 'c3b15ed1af288131115ff17a17c19dda', - 'info_dict': { - 'id': '17834272', - 'ext': 'mp4', - 'title': 'Na sygnale, odc. 39', - }, + 'only_matching': True, + }, { + 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', + 'only_matching': True, + }, { + 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', + 'only_matching': True, + }, { + 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', + 'only_matching': True, + }, { + 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', + 'only_matching': True, + }, { + 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', + 'only_matching': True, }] def _real_extract(self, url): @@ -92,8 +95,8 @@ class TvpIE(InfoExtractor): } -class TvpSeriesIE(InfoExtractor): - IE_NAME = 'tvp.pl:Series' +class TVPSeriesIE(InfoExtractor): + IE_NAME = 'tvp:series' _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P[^/]+)/?$' _TESTS = [{ @@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor): videos_paths = re.findall( '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key()) + self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) for v_path in videos_paths] return { From fac2af3c51c92b7f9abc4f229bc9351e8a301b29 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 27 May 2016 01:41:27 +0800 Subject: [PATCH 12/14] [common] Fix m3u8 extraction in f4m manifests --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e53b7ad64..0029c3694 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1063,7 +1063,7 @@ class InfoExtractor(object): elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, - m3u8_id=m3u8_id, fatal=False)) + m3u8_id=m3u8_id, fatal=fatal)) continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ From 2615fa758422deaaf11049e71f0c183e655c0b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 May 2016 01:46:12 +0600 Subject: [PATCH 13/14] [downloader/f4m] Simply select format when it's the only one --- youtube_dl/downloader/f4m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 314def4cb..8f88b0241 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -319,7 +319,7 @@ class F4mFD(FragmentFD): doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None: + if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] From 77b8b4e696dd5ffb1330a2de328eb9c3ecd09a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 May 2016 01:47:44 +0600 Subject: [PATCH 14/14] [extractor/common] Borrow quality metadata from parent set-level manifest for f4m --- youtube_dl/extractor/common.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0029c3694..57793537b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1035,6 +1035,10 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): + tbr = int_or_none(media_el.attrib.get('bitrate')) + width = int_or_none(media_el.attrib.get('width')) + height = int_or_none(media_el.attrib.get('height')) + format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])) # If is present, the specified f4m is a # stream-level manifest, and only set-level manifests may refer to # external resources. See section 11.4 and section 4 of F4M spec @@ -1056,23 +1060,35 @@ class InfoExtractor(object): # bitrate in f4m downloader ext = determine_ext(manifest_url) if ext == 'f4m': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal)) + transform_source=transform_source, fatal=fatal) + # Sometimes stream-level manifest contains single media entry that + # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player). + # At the same time parent's media entry in set-level manifest may + # contain it. We will copy it from parent in such cases. + if len(f4m_formats) == 1: + f = f4m_formats[0] + f.update({ + 'tbr': f.get('tbr') or tbr, + 'width': f.get('width') or width, + 'height': f.get('height') or height, + 'format_id': f.get('format_id') if not tbr else format_id, + }) + formats.extend(f4m_formats) continue elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, m3u8_id=m3u8_id, fatal=fatal)) continue - tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), + 'format_id': format_id, 'url': manifest_url, 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, - 'width': int_or_none(media_el.attrib.get('width')), - 'height': int_or_none(media_el.attrib.get('height')), + 'width': width, + 'height': height, 'preference': preference, }) return formats