From 44451f22d5d1a5bed5f5851b27a963860813ecd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Oct 2015 13:41:52 +0200 Subject: [PATCH 01/20] [naver] Remove unused import --- youtube_dl/extractor/naver.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 35cbb3e6d..1f5fc2145 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -10,7 +10,6 @@ from ..compat import ( ) from ..utils import ( ExtractorError, - clean_html, ) From 7d0ada5ff907824c66c466ee9b83008210250d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Oct 2015 13:42:11 +0200 Subject: [PATCH 02/20] [test/helper] Fix style Use the correct indentation to please flake8 --- test/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index 28fd135b2..bdd7acca4 100644 --- a/test/helper.py +++ b/test/helper.py @@ -136,7 +136,7 @@ def expect_value(self, got, expected, field): self.assertEqual( type_expected, type_got, 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( - index, field, type_expected, type_got)) + index, field, type_expected, type_got)) expect_value(self, item_got, item_expected, field) else: if isinstance(expected, compat_str) and expected.startswith('md5:'): From 0facd2af3ebfda68b79c7e2e1c575d73f9680802 Mon Sep 17 00:00:00 2001 From: fluks Date: Fri, 2 Oct 2015 04:08:13 +0300 Subject: [PATCH 03/20] Fix ruutu extractor bug If there's no resolution attribute in xml, only width gets a value, height doesn't and ValueError is raised. --- youtube_dl/extractor/ruutu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index c67ad25ce..7720f1383 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -74,7 +74,7 @@ class RuutuIE(InfoExtractor): preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) - width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')] + width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')] formats.append({ 'format_id': '%s-%s' % (proto, label if label else tbr), 'url': video_url, From 59a9efe85b15e53c5928b7fdb810c150f5bf4b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Oct 2015 20:48:39 +0600 Subject: [PATCH 04/20] [ruutu] Limit resolution split to 2 pieces (Closes #7037, closes #7042) --- youtube_dl/extractor/ruutu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 7720f1383..a16b73ff4 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -74,7 +74,7 @@ class RuutuIE(InfoExtractor): preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) - width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')] + width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] formats.append({ 'format_id': '%s-%s' % (proto, label if label else tbr), 'url': video_url, From 3bb3f0410822d3d21c6199bb8915b598990628e6 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 16:59:04 +0200 Subject: [PATCH 05/20] [europa] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/europa.py | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/europa.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a73a1317e..495a18c17 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE from .escapist import EscapistIE from .espn import ESPNIE from .esri import EsriVideoIE +from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py new file mode 100644 index 000000000..c437c4886 --- /dev/null +++ b/youtube_dl/extractor/europa.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + xpath_text +) + + +class EuropaIE(InfoExtractor): + _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P[A-Za-z0-9]+)' + _TEST = { + 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', + 'md5': '728cca2fd41d5aa7350cec1141fbe620', + 'info_dict': { + 'id': 'I107758', + 'ext': 'mp4', + 'title': 'TRADE - Wikileaks on TTIP', + 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', + 'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + lang = query.get('sitelang', ['en'])[0] + + playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id) + videos = {} + formats = [] + + for item in playlist.findall('info/title/item'): + videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()} + + for item in playlist.findall('info/description/item'): + videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip() + + for item in playlist.findall('files/file'): + lg = xpath_text(item, 'lg') + vid = videos[lg] + vid['format_note'] = xpath_text(item, 'lglabel') + vid['url'] = xpath_text(item, 'url') + + if lg == lang: + vid['language_preference'] = 10 + + formats.append(vid) + + formats.reverse() + def_video = videos.get(lang, videos['int']) + + return { + 'id': video_id, + 'title': def_video['title'], + 'description': def_video['description'], + 'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'), + 'formats': formats + } From af17794c654bd24bbd5f47997596430b201ea08e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Oct 2015 22:29:15 +0600 Subject: [PATCH 06/20] [europa] Improve extraction --- youtube_dl/extractor/europa.py | 94 ++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py index c437c4886..02ba8d63c 100644 --- a/youtube_dl/extractor/europa.py +++ b/youtube_dl/extractor/europa.py @@ -2,59 +2,89 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, + int_or_none, + orderedSet, + parse_duration, + qualities, + unified_strdate, xpath_text ) class EuropaIE(InfoExtractor): - _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P[A-Za-z0-9]+)' - _TEST = { + _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?.*?\bref=(?P[A-Za-z0-9]+)' + _TESTS = [{ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', - 'md5': '728cca2fd41d5aa7350cec1141fbe620', + 'md5': '574f080699ddd1e19a675b0ddf010371', 'info_dict': { 'id': 'I107758', 'ext': 'mp4', 'title': 'TRADE - Wikileaks on TTIP', 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', - 'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150811', + 'duration': 34, + 'view_count': int, + 'formats': 'mincount:3', } - } + }, { + 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) + + playlist = self._download_xml( + 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) + + def get_item(type_, preference): + items = {} + for item in playlist.findall('./info/%s/item' % type_): + lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) + if lang and label: + items[lang] = label.strip() + for p in preference: + if items.get(p): + return items[p] + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - lang = query.get('sitelang', ['en'])[0] + preferred_lang = query.get('sitelang', ('en', ))[0] + + preferred_langs = orderedSet((preferred_lang, 'en', 'int')) + + title = get_item('title', preferred_langs) or video_id + description = get_item('description', preferred_langs) + thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') + upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) + duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) + view_count = int_or_none(xpath_text(playlist,'./info/views', 'views')) + + language_preference = qualities(preferred_langs[::-1]) - playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id) - videos = {} formats = [] - - for item in playlist.findall('info/title/item'): - videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()} - - for item in playlist.findall('info/description/item'): - videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip() - - for item in playlist.findall('files/file'): - lg = xpath_text(item, 'lg') - vid = videos[lg] - vid['format_note'] = xpath_text(item, 'lglabel') - vid['url'] = xpath_text(item, 'url') - - if lg == lang: - vid['language_preference'] = 10 - - formats.append(vid) - - formats.reverse() - def_video = videos.get(lang, videos['int']) + for file_ in playlist.findall('./files/file'): + video_url = xpath_text(file_, './url') + if not video_url: + continue + lang = xpath_text(file_, './lg') + formats.append({ + 'url': video_url, + 'format_id': lang, + 'format_note': xpath_text(file_, './lglabel'), + 'language_preference': language_preference(lang) + }) + self._sort_formats(formats) return { 'id': video_id, - 'title': def_video['title'], - 'description': def_video['description'], - 'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'), + 'title': title, + 'description': description, + 'thumbnail': thumbnmail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, 'formats': formats } From f3b098fb90b985484d800fcdbfe18add2360e4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Oct 2015 23:22:53 +0600 Subject: [PATCH 07/20] [europa] Add support for audio URLs --- youtube_dl/extractor/europa.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py index 02ba8d63c..9e33cacff 100644 --- a/youtube_dl/extractor/europa.py +++ b/youtube_dl/extractor/europa.py @@ -14,7 +14,7 @@ from ..utils import ( class EuropaIE(InfoExtractor): - _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?.*?\bref=(?P[A-Za-z0-9]+)' + _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P[A-Za-z0-9-]+)' _TESTS = [{ 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', 'md5': '574f080699ddd1e19a675b0ddf010371', @@ -32,6 +32,9 @@ class EuropaIE(InfoExtractor): }, { 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', 'only_matching': True, + }, { + 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en', + 'only_matching': True, }] def _real_extract(self, url): From b203095d4c95d471bc2ac7045693c6938ee914d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Oct 2015 22:40:35 +0200 Subject: [PATCH 08/20] [europa] Style fix: add whitespace after comma --- youtube_dl/extractor/europa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py index 9e33cacff..adc43919e 100644 --- a/youtube_dl/extractor/europa.py +++ b/youtube_dl/extractor/europa.py @@ -63,7 +63,7 @@ class EuropaIE(InfoExtractor): thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) - view_count = int_or_none(xpath_text(playlist,'./info/views', 'views')) + view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) language_preference = qualities(preferred_langs[::-1]) From 5495937f461268a850a6a54d3fe19ed1f0f01eef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 13 Sep 2015 20:00:10 +0800 Subject: [PATCH 09/20] [options] Cleanup double spaces in help texts --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5eccc0a70..3dd6d290b 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -276,7 +276,7 @@ def parseOpts(overrideArguments=None): 'For example, to only match videos that have been liked more than ' '100 times and disliked less than 50 times (or the dislike ' 'functionality is not available at the given service), but who ' - 'also have a description, use --match-filter ' + 'also have a description, use --match-filter ' '"like_count > 100 & dislike_count Date: Sun, 13 Sep 2015 20:04:27 +0800 Subject: [PATCH 10/20] [compat] Allow overriding by only COLUMNS or LINES in compat_get_terminal_size Now the semantic of this function is identical to shutil.get_terminal_size() in Python 3.3+. The new behavior also corresponds to the old get_term_width(), which is removed in 003c69a84b68cadb46aeb8e03115848a722fd675 --- youtube_dl/compat.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1ff42d94b..c36c9c23f 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -416,7 +416,7 @@ if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 else: _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) - def compat_get_terminal_size(): + def compat_get_terminal_size(fallback=(80, 24)): columns = compat_getenv('COLUMNS', None) if columns: columns = int(columns) @@ -428,14 +428,20 @@ else: else: lines = None - try: - sp = subprocess.Popen( - ['stty', 'size'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = sp.communicate() - lines, columns = map(int, out.split()) - except Exception: - pass + if columns <= 0 or lines <= 0: + try: + sp = subprocess.Popen( + ['stty', 'size'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = sp.communicate() + _columns, _lines = map(int, out.split()) + except Exception: + _columns, _lines = _terminal_size(*fallback) + + if columns <= 0: + columns = _columns + if lines <= 0: + lines = _lines return _terminal_size(columns, lines) try: From bad84757eb135b85d5a1b29524a064d23ab4e1e9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 13 Sep 2015 20:10:23 +0800 Subject: [PATCH 11/20] [doc] Better formatting of youtube-dl.1 (closes #6510) --- devscripts/prepare_manpage.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 7ece37754..776e6556e 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -8,6 +8,35 @@ import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') + +def filter_options(readme): + ret = '' + in_options = False + for line in readme.split('\n'): + if line.startswith('# '): + if line[2:].startswith('OPTIONS'): + in_options = True + else: + in_options = False + + if in_options: + if line.lstrip().startswith('-'): + option, description = re.split(r'\s{2,}', line.lstrip()) + split_option = option.split(' ') + + if not split_option[-1].startswith('-'): # metavar + option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) + + # Pandoc's definition_lists. See http://pandoc.org/README.html + # for more information. + ret += '\n%s\n: %s\n' % (option, description) + else: + ret += line.lstrip() + '\n' + else: + ret += line + '\n' + + return ret + with io.open(README_FILE, encoding='utf-8') as f: readme = f.read() @@ -26,6 +55,8 @@ readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) readme = PREFIX + readme +readme = filter_options(readme) + if sys.version_info < (3, 0): print(readme.encode('utf-8')) else: From 97d5bfcba65c8575ab06a34e91fae30a5fda3161 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 3 Oct 2015 14:17:17 +0100 Subject: [PATCH 12/20] [engadget] accept short video urls --- youtube_dl/extractor/engadget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index 4ea37ebd9..e4180701d 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -10,7 +10,7 @@ from ..utils import ( class EngadgetIE(InfoExtractor): _VALID_URL = r'''(?x)https?://www.engadget.com/ - (?:video/5min/(?P\d+)| + (?:video(?:/5min)?/(?P\d+)| [\d/]+/.*?) ''' From 60d23e5e592aebe4a77dfb4ab70e87337967721c Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sat, 3 Oct 2015 16:25:33 +0300 Subject: [PATCH 13/20] [tapely] Improve _VALID_URL --- youtube_dl/extractor/tapely.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py index f1f43d0a7..744f9db38 100644 --- a/youtube_dl/extractor/tapely.py +++ b/youtube_dl/extractor/tapely.py @@ -16,7 +16,7 @@ from ..utils import ( class TapelyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P[A-Za-z0-9\-_]+)(?:/(?P\d+))?' + _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P[A-Za-z0-9\-_]+)(?:/(?P\d+))?' _API_URL = 'http://tape.ly/showtape?id={0:}' _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}' _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}' @@ -42,6 +42,10 @@ class TapelyIE(InfoExtractor): 'ext': 'm4a', }, }, + { + 'url': 'https://tapely.com/my-grief-as-told-by-water', + 'only_matching': True, + }, ] def _real_extract(self, url): From ef5acfe32de4c995625f9800cfe0776237961436 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 1 Sep 2015 23:05:19 +0100 Subject: [PATCH 14/20] [limelight] Add new extractor --- youtube_dl/extractor/__init__.py | 5 + youtube_dl/extractor/limelight.py | 176 ++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 youtube_dl/extractor/limelight.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 495a18c17..20cc3660c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -295,6 +295,11 @@ from .lifenews import ( LifeNewsIE, LifeEmbedIE, ) +from .limelight import ( + LimeLightMediaIE, + LimeLightChannelIE, + LimeLightChannelListIE, +) from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py new file mode 100644 index 000000000..dcfc215c7 --- /dev/null +++ b/youtube_dl/extractor/limelight.py @@ -0,0 +1,176 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + determine_ext, +) + + +class LimeLightBaseIE(InfoExtractor): + + def get_playlist_service(self, id, method): + return self._download_json(self.PLAYLIST_SERVICE_URL % (id, method), id) + + def get_api(self, orgId, id, method): + return self._download_json(self.API_URL % (orgId, id, method), id) + + def process_data(self, mobileUrls, streams, properties): + video_id = properties['media_id'] + formats = [] + + for mobileUrl in mobileUrls: + if '.m3u8' in mobileUrl['mobileUrl']: + formats.extend(self._extract_m3u8_formats(mobileUrl['mobileUrl'], video_id)) + else: + formats.append({'url': mobileUrl['mobileUrl']}) + + for stream in streams: + if '.f4m' in stream['url']: + formats.extend(self._extract_f4m_formats(stream['url'], video_id)) + else: + fmt = { + 'url': stream.get('url'), + 'abr': stream.get('audioBitRate'), + 'vbr': stream.get('videoBitRate'), + 'fps': stream.get('videoFrameRate'), + 'width': stream.get('videoWidthInPixels'), + 'height': stream.get('videoHeightInPixels'), + 'ext': determine_ext(stream.get('url')) + } + rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', stream['url']) + if rtmp: + fmt.update({ + 'url': rtmp.group('url'), + 'play_path': rtmp.group('playpath'), + 'app': rtmp.group('app'), + }) + formats.append(fmt) + + self._sort_formats(formats) + + title = properties['title'] + description = properties.get('description') + timestamp = properties.get('create_date') + duration = int_or_none(properties.get('duration_in_milliseconds')) + filesize = properties.get('total_storage_in_bytes') + categories = [properties.get('category')] + thumbnails = [{ + 'url': thumbnail.get('url'), + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + } for thumbnail in properties.get('thumbnails')] + subtitles = {caption.get('language_code'): [{'url': caption.get('url')}] for caption in properties.get('captions')} + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'timestamp': timestamp, + 'duration': duration, + 'filesize': filesize, + 'categories': categories, + 'thumbnails': thumbnails, + 'subtitles': subtitles, + } + + +class LimeLightMediaIE(LimeLightBaseIE): + IE_NAME = 'limelight' + _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*mediaId=(?P[a-z0-9]{32})' + _TEST = { + 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', + 'md5': '3213605088be599705677ef785db6972', + 'info_dict': { + 'id': '3ffd040b522b4485b6d84effc750cd86', + 'ext': 'mp4', + 'title': 'HaP and the HB Prince Trailer', + 'description': 'As Harry Potter begins his 6th year at Hogwarts School of Witchcraft and Wizardry, he discovers an old book marked mysteriously "This book is the property of the Half-Blood Prince" and begins to learn more about Lord Voldemort\'s dark past.', + 'thumbnail': 're:^https?://.*\.jpeg$', + 'duration': 144230, + 'timestamp': 1244136834, + "upload_date": "20090604", + } + } + PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/media/%s/%s' + API_URL = 'http://api.video.limelight.com/rest/organizations/%s/media/%s/%s.json' + + def _real_extract(self, url): + video_id = self._match_id(url) + + mobile_json_data = self.get_playlist_service(video_id, 'getMobilePlaylistByMediaId') + pc_json_data = self.get_playlist_service(video_id, 'getPlaylistByMediaId') + properties = self.get_api(pc_json_data['orgId'], video_id, 'properties') + + return self.process_data(mobile_json_data['mediaList'][0]['mobileUrls'], pc_json_data['playlistItems'][0]['streams'], properties) + + +class LimeLightChannelIE(LimeLightBaseIE): + IE_NAME = 'limelight:channel' + _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*channelId=(?P[a-z0-9]{32})' + _TEST = { + 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', + 'info_dict': { + 'id': 'ab6a524c379342f9b23642917020c082', + 'title': 'Javascript Sample Code', + }, + 'playlist_mincount': 3, + } + PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/channel/%s/%s' + API_URL = 'http://api.video.limelight.com/rest/organizations/%s/channels/%s/%s.json' + + def _real_extract(self, url): + channel_id = self._match_id(url) + + mobile_json_data = self.get_playlist_service(channel_id, 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1') + pc_json_data = self.get_playlist_service(channel_id, 'getPlaylistByChannelId') + medias = self.get_api(pc_json_data['orgId'], channel_id, 'media') + + entries = [] + for i in range(len(medias['media_list'])): + entries.append(self.process_data(mobile_json_data['mediaList'][i]['mobileUrls'], pc_json_data['playlistItems'][i]['streams'], medias['media_list'][i])) + + return { + 'id': channel_id, + 'title': pc_json_data['title'], + 'entries': entries, + '_type': 'playlist', + } + + +class LimeLightChannelListIE(LimeLightBaseIE): + IE_NAME = 'limelight:channel_list' + _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*channelListId=(?P[a-z0-9]{32})' + _TEST = { + 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', + 'info_dict': { + 'id': '301b117890c4465c8179ede21fd92e2b', + 'title': 'Website - Hero Player', + }, + 'playlist_mincount': 2, + } + PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/channel_list/%s/%s' + + def _real_extract(self, url): + channel_list_id = self._match_id(url) + + json_data = self.get_playlist_service(channel_list_id, 'getMobileChannelListById') + + entries = [] + for channel in json_data['channelList']: + entries.append({ + 'url': 'http://link.videoplatform.limelight.com/media/?channelId=%s' % channel['id'], + '_type': 'url', + 'ie_key': 'LimeLightChannel', + }) + + return { + 'id': channel_list_id, + 'title': json_data['title'], + 'entries': entries, + '_type': 'playlist', + } From 4bba371644818d79b5f8481b5b31c53ea8ecbcc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 20:33:42 +0600 Subject: [PATCH 15/20] [YoutubeDL] Autocalculate ext for subtitles when missing --- youtube_dl/YoutubeDL.py | 11 +++++++++-- youtube_dl/extractor/common.py | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d65253882..adf70d658 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1232,13 +1232,20 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + subtitles = info_dict.get('subtitles') + if subtitles: + for _, subtitle in subtitles.items(): + for subtitle_format in subtitle: + if 'ext' not in subtitle_format: + subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() + if self.params.get('listsubtitles', False): if 'automatic_captions' in info_dict: self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') - self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') + self.list_subtitles(info_dict['id'], subtitles, 'subtitles') return info_dict['requested_subtitles'] = self.process_subtitles( - info_dict['id'], info_dict.get('subtitles'), + info_dict['id'], subtitles, info_dict.get('automatic_captions')) # We now pick which formats have to be downloaded diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4fe2307cd..dbae75406 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -165,6 +165,7 @@ class InfoExtractor(object): with the "ext" entry and one of: * "data": The subtitles file contents * "url": A URL pointing to the subtitles file + "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles', used by the YoutubeIE for automatically generated captions duration: Length of the video in seconds, as an integer. From d7fc56318b72607758e7484c22076ec2999f10b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 20:41:57 +0600 Subject: [PATCH 16/20] [limelight] Fix python 2.6, simplify, make more robust (Closes #6734) --- youtube_dl/extractor/__init__.py | 6 +- youtube_dl/extractor/limelight.py | 185 ++++++++++++++++++------------ 2 files changed, 113 insertions(+), 78 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 20cc3660c..3ace1cc2c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -296,9 +296,9 @@ from .lifenews import ( LifeEmbedIE, ) from .limelight import ( - LimeLightMediaIE, - LimeLightChannelIE, - LimeLightChannelListIE, + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, ) from .liveleak import LiveLeakIE from .livestream import ( diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index dcfc215c7..599d8413d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -5,65 +5,105 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, determine_ext, + float_or_none, + int_or_none, ) -class LimeLightBaseIE(InfoExtractor): +class LimelightBaseIE(InfoExtractor): + _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' + _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' - def get_playlist_service(self, id, method): - return self._download_json(self.PLAYLIST_SERVICE_URL % (id, method), id) + def _call_playlist_service(self, item_id, method, fatal=True): + return self._download_json( + self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) - def get_api(self, orgId, id, method): - return self._download_json(self.API_URL % (orgId, id, method), id) + def _call_api(self, organization_id, item_id, method): + return self._download_json( + self._API_URL % (organization_id, self._API_PATH, item_id, method), + item_id, 'Downloading API %s JSON' % method) - def process_data(self, mobileUrls, streams, properties): + def _extract(self, item_id, pc_method, mobile_method, meta_method): + pc = self._call_playlist_service(item_id, pc_method) + metadata = self._call_api(pc['orgId'], item_id, meta_method) + mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) + return pc, mobile, metadata + + def _extract_info(self, streams, mobile_urls, properties): video_id = properties['media_id'] formats = [] - for mobileUrl in mobileUrls: - if '.m3u8' in mobileUrl['mobileUrl']: - formats.extend(self._extract_m3u8_formats(mobileUrl['mobileUrl'], video_id)) - else: - formats.append({'url': mobileUrl['mobileUrl']}) - for stream in streams: - if '.f4m' in stream['url']: - formats.extend(self._extract_f4m_formats(stream['url'], video_id)) + stream_url = stream.get('url') + if not stream_url: + continue + if '.f4m' in stream_url: + formats.extend(self._extract_f4m_formats(stream_url, video_id)) else: fmt = { - 'url': stream.get('url'), - 'abr': stream.get('audioBitRate'), - 'vbr': stream.get('videoBitRate'), - 'fps': stream.get('videoFrameRate'), - 'width': stream.get('videoWidthInPixels'), - 'height': stream.get('videoHeightInPixels'), - 'ext': determine_ext(stream.get('url')) + 'url': stream_url, + 'abr': float_or_none(stream.get('audioBitRate')), + 'vbr': float_or_none(stream.get('videoBitRate')), + 'fps': float_or_none(stream.get('videoFrameRate')), + 'width': int_or_none(stream.get('videoWidthInPixels')), + 'height': int_or_none(stream.get('videoHeightInPixels')), + 'ext': determine_ext(stream_url) } - rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', stream['url']) + rtmp = re.search(r'^(?Prtmpe?://[^/]+/(?P.+))/(?Pmp4:.+)$', stream_url) if rtmp: + format_id = 'rtmp' + if stream.get('videoBitRate'): + format_id += '-%d' % int_or_none(stream['videoBitRate']) fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), 'app': rtmp.group('app'), + 'ext': 'flv', + 'format_id': format_id, }) formats.append(fmt) + for mobile_url in mobile_urls: + media_url = mobile_url.get('mobileUrl') + if not media_url: + continue + format_id = mobile_url.get('targetMediaPlatform') + if determine_ext(media_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + preference=-1, m3u8_id=format_id)) + else: + formats.append({ + 'url': media_url, + 'format_id': format_id, + 'preference': -1, + }) + self._sort_formats(formats) title = properties['title'] description = properties.get('description') - timestamp = properties.get('create_date') - duration = int_or_none(properties.get('duration_in_milliseconds')) - filesize = properties.get('total_storage_in_bytes') + timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date')) + duration = float_or_none(properties.get('duration_in_milliseconds'), 1000) + filesize = int_or_none(properties.get('total_storage_in_bytes')) categories = [properties.get('category')] + tags = properties.get('tags', []) thumbnails = [{ - 'url': thumbnail.get('url'), + 'url': thumbnail['url'], 'width': int_or_none(thumbnail.get('width')), 'height': int_or_none(thumbnail.get('height')), - } for thumbnail in properties.get('thumbnails')] - subtitles = {caption.get('language_code'): [{'url': caption.get('url')}] for caption in properties.get('captions')} + } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] + + subtitles = {} + for caption in properties.get('captions', {}): + lang = caption.get('language_code') + subtitles_url = caption.get('url') + if lang and subtitles_url: + subtitles[lang] = [{ + 'url': subtitles_url, + }] return { 'id': video_id, @@ -74,44 +114,50 @@ class LimeLightBaseIE(InfoExtractor): 'duration': duration, 'filesize': filesize, 'categories': categories, + 'tags': tags, 'thumbnails': thumbnails, 'subtitles': subtitles, } -class LimeLightMediaIE(LimeLightBaseIE): +class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*mediaId=(?P[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', - 'md5': '3213605088be599705677ef785db6972', 'info_dict': { 'id': '3ffd040b522b4485b6d84effc750cd86', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'HaP and the HB Prince Trailer', 'description': 'As Harry Potter begins his 6th year at Hogwarts School of Witchcraft and Wizardry, he discovers an old book marked mysteriously "This book is the property of the Half-Blood Prince" and begins to learn more about Lord Voldemort\'s dark past.', 'thumbnail': 're:^https?://.*\.jpeg$', - 'duration': 144230, + 'duration': 144.23, 'timestamp': 1244136834, - "upload_date": "20090604", - } + 'upload_date': '20090604', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, } - PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/media/%s/%s' - API_URL = 'http://api.video.limelight.com/rest/organizations/%s/media/%s/%s.json' + _PLAYLIST_SERVICE_PATH = 'media' + _API_PATH = 'media' def _real_extract(self, url): video_id = self._match_id(url) - mobile_json_data = self.get_playlist_service(video_id, 'getMobilePlaylistByMediaId') - pc_json_data = self.get_playlist_service(video_id, 'getPlaylistByMediaId') - properties = self.get_api(pc_json_data['orgId'], video_id, 'properties') + pc, mobile, metadata = self._extract( + video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') - return self.process_data(mobile_json_data['mediaList'][0]['mobileUrls'], pc_json_data['playlistItems'][0]['streams'], properties) + return self._extract_info( + pc['playlistItems'][0].get('streams', []), + mobile['mediaList'][0].get('mobileUrls', []) if mobile else [], + metadata) -class LimeLightChannelIE(LimeLightBaseIE): +class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*channelId=(?P[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { @@ -120,31 +166,29 @@ class LimeLightChannelIE(LimeLightBaseIE): }, 'playlist_mincount': 3, } - PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/channel/%s/%s' - API_URL = 'http://api.video.limelight.com/rest/organizations/%s/channels/%s/%s.json' + _PLAYLIST_SERVICE_PATH = 'channel' + _API_PATH = 'channels' def _real_extract(self, url): channel_id = self._match_id(url) - mobile_json_data = self.get_playlist_service(channel_id, 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1') - pc_json_data = self.get_playlist_service(channel_id, 'getPlaylistByChannelId') - medias = self.get_api(pc_json_data['orgId'], channel_id, 'media') + pc, mobile, medias = self._extract( + channel_id, 'getPlaylistByChannelId', + 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') - entries = [] - for i in range(len(medias['media_list'])): - entries.append(self.process_data(mobile_json_data['mediaList'][i]['mobileUrls'], pc_json_data['playlistItems'][i]['streams'], medias['media_list'][i])) + entries = [ + self._extract_info( + pc['playlistItems'][i].get('streams', []), + mobile['mediaList'][i].get('mobileUrls', []) if mobile else [], + medias['media_list'][i]) + for i in range(len(medias['media_list']))] - return { - 'id': channel_id, - 'title': pc_json_data['title'], - 'entries': entries, - '_type': 'playlist', - } + return self.playlist_result(entries, channel_id, pc['title']) -class LimeLightChannelListIE(LimeLightBaseIE): +class LimelightChannelListIE(LimelightBaseIE): IE_NAME = 'limelight:channel_list' - _VALID_URL = r'http://link\.videoplatform\.limelight\.com/media/?.*channelListId=(?P[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'info_dict': { @@ -153,24 +197,15 @@ class LimeLightChannelListIE(LimeLightBaseIE): }, 'playlist_mincount': 2, } - PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/channel_list/%s/%s' + _PLAYLIST_SERVICE_PATH = 'channel_list' def _real_extract(self, url): channel_list_id = self._match_id(url) - json_data = self.get_playlist_service(channel_list_id, 'getMobileChannelListById') + channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById') - entries = [] - for channel in json_data['channelList']: - entries.append({ - 'url': 'http://link.videoplatform.limelight.com/media/?channelId=%s' % channel['id'], - '_type': 'url', - 'ie_key': 'LimeLightChannel', - }) + entries = [ + self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') + for channel in channel_list['channelList']] - return { - 'id': channel_list_id, - 'title': json_data['title'], - 'entries': entries, - '_type': 'playlist', - } + return self.playlist_result(entries, channel_list_id, channel_list['title']) From 9c544e2537abda1d65e96f2b33a79984f3ab7c10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 20:48:44 +0600 Subject: [PATCH 17/20] [limelight] Add test video with subtitles --- youtube_dl/extractor/limelight.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 599d8413d..fb03dd527 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,13 +123,13 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P[a-z0-9]{32})' - _TEST = { + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { 'id': '3ffd040b522b4485b6d84effc750cd86', 'ext': 'flv', 'title': 'HaP and the HB Prince Trailer', - 'description': 'As Harry Potter begins his 6th year at Hogwarts School of Witchcraft and Wizardry, he discovers an old book marked mysteriously "This book is the property of the Half-Blood Prince" and begins to learn more about Lord Voldemort\'s dark past.', + 'description': 'md5:8005b944181778e313d95c1237ddb640', 'thumbnail': 're:^https?://.*\.jpeg$', 'duration': 144.23, 'timestamp': 1244136834, @@ -139,7 +139,25 @@ class LimelightMediaIE(LimelightBaseIE): # rtmp download 'skip_download': True, }, - } + }, { + # video with subtitles + 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335', + 'info_dict': { + 'id': 'a3e00274d4564ec4a9b29b9466432335', + 'ext': 'flv', + 'title': '3Play Media Overview Video', + 'description': '', + 'thumbnail': 're:^https?://.*\.jpeg$', + 'duration': 78.101, + 'timestamp': 1338929955, + 'upload_date': '20120605', + 'subtitles': 'mincount:9', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }] _PLAYLIST_SERVICE_PATH = 'media' _API_PATH = 'media' From 0659dfccfea9df3206c476e83a2b090456c25a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 21:13:13 +0600 Subject: [PATCH 18/20] [pbs] Improve player regex (Closes #7059) --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 683c81de3..0bca3152b 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -167,7 +167,7 @@ class PBSIE(InfoExtractor): return media_id, presumptive_id, upload_date url = self._search_regex( - r']*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']', + r'(?s)]+?(?:[a-z-]+?=["\'].+?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', webpage, 'player URL') mobj = re.match(self._VALID_URL, url) From 96229998c29705c8ee4230915ec7ff050bcfecf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 21:19:47 +0600 Subject: [PATCH 19/20] [pbs] Allow empty attribute in player regex --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 0bca3152b..66b3dda47 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -167,7 +167,7 @@ class PBSIE(InfoExtractor): return media_id, presumptive_id, upload_date url = self._search_regex( - r'(?s)]+?(?:[a-z-]+?=["\'].+?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', + r'(?s)]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', webpage, 'player URL') mobj = re.match(self._VALID_URL, url) From 90ab741e909c949039e31805da04f5e546a1a8c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Oct 2015 21:37:49 +0600 Subject: [PATCH 20/20] [pbs] Add test for #7059 --- youtube_dl/extractor/pbs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 66b3dda47..6923c6094 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -134,6 +134,24 @@ class PBSIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, + { + # Video embedded in iframe containing angle brackets as attribute's value (e.g. + # "