From 754d8a035e3e1d0f3340aef662ae0df76a0be91d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 21 Jul 2014 18:06:21 +0200 Subject: [PATCH 1/9] [nbcnews] Look in all playlists for video --- youtube_dl/extractor/nbc.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index aa34665d1..70aa98aee 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -85,11 +85,25 @@ class NBCNewsIE(InfoExtractor): flags=re.MULTILINE) bootstrap = json.loads(bootstrap_json) info = bootstrap['results'][0]['video'] - playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' mpxid = info['mpxId'] - all_videos = self._download_json(playlist_url, title)['videos'] - # The response contains additional videos - info = next(v for v in all_videos if v['mpxId'] == mpxid) + + base_urls = [ + info['fallbackPlaylistUrl'], + info['associatedPlaylistUrl'], + ] + + for base_url in base_urls: + playlist_url = base_url + '?form=MPXNBCNewsAPI' + all_videos = self._download_json(playlist_url, title)['videos'] + + try: + info = next(v for v in all_videos if v['mpxId'] == mpxid) + break + except StopIteration: + continue + + if info is None: + raise ExtractorError('Could not find video in playlists') return { '_type': 'url', From 1e8ac8364b09259b5bfa277304f5fb31906b8801 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 21 Jul 2014 18:06:51 +0200 Subject: [PATCH 2/9] release 2014.07.21 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e2e0ee25c..0ce4a6c10 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.20.2' +__version__ = '2014.07.21' From 9dcb8f3fc7927a6a3e6f4747f64c6f8c3900cdc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 21 Jul 2014 20:42:20 +0200 Subject: [PATCH 3/9] [br] Allow '_' in the url (fixes #3311) --- youtube_dl/extractor/br.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 993360714..f7f2f713a 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -12,7 +12,7 @@ from ..utils import ( class BRIE(InfoExtractor): IE_DESC = 'Bayerischer Rundfunk Mediathek' - _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P[a-z0-9\-]+)\.html' + _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P[a-z0-9\-_]+)\.html' _BASE_URL = 'http://www.br.de' _TESTS = [ From 53eb217661ee95fc14d3f801118b2d69070d0bbe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 04:53:06 +0200 Subject: [PATCH 4/9] Add another great example for the --extractor-descriptions output --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0e7b9ddaf..c6a5b2b5b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -633,7 +633,7 @@ def _real_main(argv=None): if desc is False: continue if hasattr(ie, 'SEARCH_KEY'): - _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') + _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny') _COUNTS = (u'', u'5', u'10', u'all') desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) compat_print(desc) From 8904e979dfe489e37bda369a5863bceecb56d490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Jul 2014 20:37:33 +0700 Subject: [PATCH 5/9] [vodlocker] Fix _VALID_URL --- youtube_dl/extractor/vodlocker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 68c59364b..6d3b78749 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -10,7 +10,7 @@ from ..utils import ( class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' _TESTS = [{ 'url': 'http://vodlocker.com/e8wvyzz4sl42', From e00fc35dbe7cdba20d78ccbf7a2fb471d5356529 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 15:52:01 +0200 Subject: [PATCH 6/9] [kickstarter] Support embedded videos (Fixes #3322) --- youtube_dl/extractor/kickstarter.py | 37 +++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 961dd1aa6..56a76380c 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class KickStarterIE(InfoExtractor): _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P[^/]*)/.*' - _TEST = { + _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location', 'md5': 'c81addca81327ffa66c642b5d8b08cab', 'info_dict': { @@ -18,22 +18,45 @@ class KickStarterIE(InfoExtractor): 'description': 'A unique motocross documentary that examines the ' 'life and mind of one of sports most elite athletes: Josh Grant.', }, - } + }, { + 'note': 'Embedded video (not using the native kickstarter video service)', + 'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178', + 'playlist': [ + { + 'info_dict': { + 'id': '78704821', + 'ext': 'mp4', + 'uploader_id': 'pebble', + 'uploader': 'Pebble Technology', + 'title': 'Pebble iOS Notifications', + } + } + ], + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'data-video-url="(.*?)"', - webpage, 'video URL') - video_title = self._html_search_regex(r'(.*?)', - webpage, 'title').rpartition('— Kickstarter')[0].strip() + title = self._html_search_regex( + r'\s*(.*?)(?:\s*— Kickstarter)?\s*', + webpage, 'title') + video_url = self._search_regex( + r'data-video-url="(.*?)"', + webpage, 'video URL', default=None) + if video_url is None: # No native kickstarter, look for embedded videos + return { + '_type': 'url_transparent', + 'ie_key': 'Generic', + 'url': url, + 'title': title, + } return { 'id': video_id, 'url': video_url, - 'title': video_title, + 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } From 1771ddd85db7acda1e4174ccd186acd40a881fbc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 16:59:40 +0200 Subject: [PATCH 7/9] release 2014.07.22 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0ce4a6c10..e5fcec839 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.21' +__version__ = '2014.07.22' From 2871d489a91b6de1a5849243e4d827123dd564ef Mon Sep 17 00:00:00 2001 From: Jason Terk Date: Tue, 22 Jul 2014 07:56:42 -0700 Subject: [PATCH 8/9] Support Alternative cbs.com URL Format Adds support for cbs.com URLs containing "/artist" instead of "/video". E.g.: http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/ --- youtube_dl/extractor/cbs.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index ac0315853..44d23aef6 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,9 +4,9 @@ from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P[^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(video|artist)/(?P[^/]+)/.*' - _TEST = { + _TESTS = [{ u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', u'file': u'4JUVEwq3wUT7.flv', u'info_dict': { @@ -18,7 +18,19 @@ class CBSIE(InfoExtractor): # rtmp download u'skip_download': True, }, - } + }, { + u'url': u'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', + u'file': u'P9gjWjelt6iP.flv', + u'info_dict': { + u'title': u'Live on Letterman - St. Vincent', + u'description': u'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', + u'duration': 3221, + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From e42a692f003eabdb1efad7b9f4b10ce97c712d32 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 17:34:34 +0200 Subject: [PATCH 9/9] [cbs] Modernize Also add threatening skip blocks in there - access is only possible from the US. We may want to find a better geolocation restriction method for tests. --- youtube_dl/extractor/cbs.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 44d23aef6..822f9a7be 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,35 +1,41 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(video|artist)/(?P[^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P[^/]+)/.*' _TESTS = [{ - u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', - u'file': u'4JUVEwq3wUT7.flv', - u'info_dict': { - u'title': u'Connect Chat feat. Garth Brooks', - u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', - u'duration': 1495, + 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', + 'info_dict': { + 'id': '4JUVEwq3wUT7', + 'ext': 'flv', + 'title': 'Connect Chat feat. Garth Brooks', + 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', + 'duration': 1495, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, + '_skip': 'Blocked outside the US', }, { - u'url': u'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', - u'file': u'P9gjWjelt6iP.flv', - u'info_dict': { - u'title': u'Live on Letterman - St. Vincent', - u'description': u'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', - u'duration': 3221, + 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', + 'info_dict': { + 'id': 'P9gjWjelt6iP', + 'ext': 'flv', + 'title': 'Live on Letterman - St. Vincent', + 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', + 'duration': 3221, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, + '_skip': 'Blocked outside the US', }] def _real_extract(self, url): @@ -38,5 +44,5 @@ class CBSIE(InfoExtractor): webpage = self._download_webpage(url, video_id) real_id = self._search_regex( r"video\.settings\.pid\s*=\s*'([^']+)';", - webpage, u'real video ID') + webpage, 'real video ID') return self.url_result(u'theplatform:%s' % real_id)