From c32f94f92d085677d763376c6b421ba077964e61 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 08:52:24 +0100 Subject: [PATCH 01/18] [theplatform] prefer hls streams --- youtube_dl/extractor/theplatform.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 83d833e30..a76afa470 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -110,7 +110,7 @@ class ThePlatformIE(InfoExtractor): config = self._download_json(config_url, video_id, 'Downloading config') smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' else: - smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path + smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true&manifest=m3u' % path sig = smuggled_data.get('sig') if sig: @@ -144,17 +144,20 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is None: - f4m_node = body.find(_x('smil:seq/smil:video')) - if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: - f4m_url = f4m_node.attrib['src'] - if 'manifest.f4m?' not in f4m_url: - f4m_url += '?' - # the parameters are from syfy.com, other sites may use others, - # they also work for nbc.com - f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = self._extract_f4m_formats(f4m_url, video_id) + node = body.find(_x('smil:seq//smil:video')) + if node is None: + node = body.find(_x('smil:seq/smil:video')) + if node is not None: + if '.m3u8' in node.attrib['src']: + formats = self._extract_m3u8_formats(node.attrib['src'], video_id) + if '.f4m' in node.attrib['src']: + f4m_url = node.attrib['src'] + if 'manifest.f4m?' not in f4m_url: + f4m_url += '?' + # the parameters are from syfy.com, other sites may use others, + # they also work for nbc.com + f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' + formats = self._extract_f4m_formats(f4m_url, video_id) else: formats = [] switch = body.find(_x('smil:switch')) From b70df836b69f154cfdd219cd0b55de0c586240ac Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 11:06:20 +0100 Subject: [PATCH 02/18] [theplatform] fix direct link download --- youtube_dl/extractor/theplatform.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a76afa470..4e884a8e2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -147,10 +147,9 @@ class ThePlatformIE(InfoExtractor): node = body.find(_x('smil:seq//smil:video')) if node is None: node = body.find(_x('smil:seq/smil:video')) - if node is not None: - if '.m3u8' in node.attrib['src']: + if node is not None and '.m3u8' in node.attrib['src']: formats = self._extract_m3u8_formats(node.attrib['src'], video_id) - if '.f4m' in node.attrib['src']: + if node is not None and '.f4m' in node.attrib['src']: f4m_url = node.attrib['src'] if 'manifest.f4m?' not in f4m_url: f4m_url += '?' From ddebc0f8534187283a148005db2451e04e109724 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 11:18:13 +0100 Subject: [PATCH 03/18] fix indentation --- youtube_dl/extractor/theplatform.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 4e884a8e2..7875549b9 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -148,15 +148,15 @@ class ThePlatformIE(InfoExtractor): if node is None: node = body.find(_x('smil:seq/smil:video')) if node is not None and '.m3u8' in node.attrib['src']: - formats = self._extract_m3u8_formats(node.attrib['src'], video_id) + formats = self._extract_m3u8_formats(node.attrib['src'], video_id) if node is not None and '.f4m' in node.attrib['src']: - f4m_url = node.attrib['src'] - if 'manifest.f4m?' not in f4m_url: - f4m_url += '?' - # the parameters are from syfy.com, other sites may use others, - # they also work for nbc.com - f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = self._extract_f4m_formats(f4m_url, video_id) + f4m_url = node.attrib['src'] + if 'manifest.f4m?' not in f4m_url: + f4m_url += '?' + # the parameters are from syfy.com, other sites may use others, + # they also work for nbc.com + f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' + formats = self._extract_f4m_formats(f4m_url, video_id) else: formats = [] switch = body.find(_x('smil:switch')) From 7d5f8e538e5643154523be894be2f4a2c4b2fdac Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 18 Jul 2015 11:44:10 +0100 Subject: [PATCH 04/18] [theplatform] combine the results of different extraction methods --- youtube_dl/extractor/theplatform.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 7875549b9..7bd530d2d 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -144,11 +144,12 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) + formats = [] node = body.find(_x('smil:seq//smil:video')) if node is None: node = body.find(_x('smil:seq/smil:video')) if node is not None and '.m3u8' in node.attrib['src']: - formats = self._extract_m3u8_formats(node.attrib['src'], video_id) + formats.extend(self._extract_m3u8_formats(node.attrib['src'], video_id)) if node is not None and '.f4m' in node.attrib['src']: f4m_url = node.attrib['src'] if 'manifest.f4m?' not in f4m_url: @@ -156,9 +157,8 @@ class ThePlatformIE(InfoExtractor): # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = self._extract_f4m_formats(f4m_url, video_id) + formats.extend(self._extract_f4m_formats(f4m_url, video_id)) else: - formats = [] switch = body.find(_x('smil:switch')) if switch is None: switch = body.find(_x('smil:par//smil:switch')) From 9445ac808dc46abdf7b463693ea89f520d06f068 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 19 Jul 2015 16:57:42 +0100 Subject: [PATCH 05/18] [theplatform] fix fallback condition --- youtube_dl/extractor/theplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 7bd530d2d..be67a68a8 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -150,7 +150,7 @@ class ThePlatformIE(InfoExtractor): node = body.find(_x('smil:seq/smil:video')) if node is not None and '.m3u8' in node.attrib['src']: formats.extend(self._extract_m3u8_formats(node.attrib['src'], video_id)) - if node is not None and '.f4m' in node.attrib['src']: + elif node is not None and '.f4m' in node.attrib['src']: f4m_url = node.attrib['src'] if 'manifest.f4m?' not in f4m_url: f4m_url += '?' From 20dd02cabf35d9476cd6a11da43fa549485f33b6 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 19 Jul 2015 17:07:50 +0100 Subject: [PATCH 06/18] [theplatform] change all manifests to m3u8 --- youtube_dl/extractor/theplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index be67a68a8..8ee6ae706 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -108,7 +108,7 @@ class ThePlatformIE(InfoExtractor): config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') config = self._download_json(config_url, video_id, 'Downloading config') - smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' + smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=m3u' else: smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true&manifest=m3u' % path From 801cfb8fa9e221d2e729d8b0d44446fe4281df03 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 18:21:19 +0100 Subject: [PATCH 07/18] [hls] pass the cookies and user-agent to ffmpeg --- youtube_dl/downloader/hls.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..0a9eea717 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -28,9 +28,12 @@ class HlsFD(FileDownloader): return False ffpp.check_version() + cookies = '' + for cookie in self.ydl.cookiejar: + cookies += '%s=%s; path=%s; domain=%s;\r\n' % (cookie.name, cookie.value, cookie.path, cookie.domain) args = [ encodeArgument(opt) - for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] + for opt in (ffpp.executable, '-y', '-user-agent', info_dict['http_headers']['User-Agent'], '-cookies', cookies,'-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) retval = subprocess.call(args) From b0b371461864fa8a55cb7e4c989527d0931c42a5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 21:15:01 +0100 Subject: [PATCH 08/18] change f4m tests to m3u8 --- youtube_dl/extractor/foxsports.py | 4 ++-- youtube_dl/extractor/nationalgeographic.py | 16 ++++++++++++---- youtube_dl/extractor/theplatform.py | 8 ++++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index df7665176..9942b7a73 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -11,7 +11,7 @@ class FoxSportsIE(InfoExtractor): 'url': 'http://www.foxsports.com/video?vid=432609859715', 'info_dict': { 'id': 'gA0bHB3Ladz3', - 'ext': 'flv', + 'ext': 'm3u8', 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 'description': 'Courtney Lee talks about Memphis being focused.', }, @@ -29,4 +29,4 @@ class FoxSportsIE(InfoExtractor): video_id) return self.url_result(smuggle_url( - config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) + config['releaseURL'] + '&manifest=m3u', {'force_smil_url': True})) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 6fc9e7b05..e56e15dcc 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -15,21 +15,29 @@ class NationalGeographicIE(InfoExtractor): 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', 'info_dict': { 'id': '4DmDACA6Qtk_', - 'ext': 'flv', + 'ext': 'm3u8', 'title': 'Mating Crabs Busted by Sharks', 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', }, 'add_ie': ['ThePlatform'], + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', 'info_dict': { 'id': '_JeBD_D7PlS5', - 'ext': 'flv', + 'ext': 'm3u8', 'title': 'The Real Jaws', 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', }, 'add_ie': ['ThePlatform'], + 'params': { + # m3u8 download + 'skip_download': True, + }, }, ] @@ -48,7 +56,7 @@ class NationalGeographicIE(InfoExtractor): theplatform_id = url_basename(content.attrib.get('url')) return self.url_result(smuggle_url( - 'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, + 'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=m3u' % theplatform_id, # For some reason, the normal links don't work and we must force - # the use of f4m + # the use of m3u8 {'force_smil_url': True})) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 8ee6ae706..720237cd2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -60,10 +60,14 @@ class ThePlatformIE(InfoExtractor): 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD', 'info_dict': { 'id': 'yMBg9E8KFxZD', - 'ext': 'mp4', + 'ext': 'm3u8', 'description': 'md5:644ad9188d655b742f942bf2e06b002d', 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', 'only_matching': True, From b5ea5999998d16d5538f3dfde7dfa63a964fc32e Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 21:22:53 +0100 Subject: [PATCH 09/18] [foxsport] skip m3u8 test --- youtube_dl/extractor/foxsports.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 9942b7a73..b7c3670ce 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -16,6 +16,10 @@ class FoxSportsIE(InfoExtractor): 'description': 'Courtney Lee talks about Memphis being focused.', }, 'add_ie': ['ThePlatform'], + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From 81e42a1bba056e7ced3afa301020f0382d370d3d Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 21:41:08 +0100 Subject: [PATCH 10/18] [cnet] change rtmp test to m3u8 --- youtube_dl/extractor/cnet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py index 5dd69bff7..7b8c76821 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cnet.py @@ -29,14 +29,15 @@ class CNETIE(InfoExtractor): 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', 'info_dict': { 'id': '56527b93-d25d-44e3-b738-f989ce2e49ba', - 'ext': 'flv', + 'ext': 'm3u8', 'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole', 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', 'uploader': 'Ashley Esqueda', 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', }, 'params': { - 'skip_download': True, # requires rtmpdump + # m3u8 download + 'skip_download': True, }, }] From 25dfcade60b651409ed8b07125a73bead5c0b5c8 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 21:55:48 +0100 Subject: [PATCH 11/18] [sbs] change f4m test to m3u8 --- youtube_dl/extractor/sbs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index d6ee2d9e2..41b43ce21 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -15,12 +15,16 @@ class SBSIE(InfoExtractor): 'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'info_dict': { 'id': '320403011771', - 'ext': 'mp4', + 'ext': 'm3u8', 'title': 'Dingo Conservation (The Feed)', 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', 'thumbnail': 're:http://.*\.jpg', 'duration': 308, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', 'only_matching': True, From 2c65dc5503c9e7b61345b67a19439df4a5143acc Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 20 Jul 2015 22:02:46 +0100 Subject: [PATCH 12/18] [nbc] change f4m test to m3u8 --- youtube_dl/extractor/nbc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index dc2091be0..dc1113949 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -24,10 +24,14 @@ class NBCIE(InfoExtractor): # md5 checksum is not stable 'info_dict': { 'id': 'c9xnCo0YPOPH', - 'ext': 'flv', + 'ext': 'm3u8', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', From cf7a91ee501311cdef54ac6eb9fd24c766dfe282 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 00:15:17 +0100 Subject: [PATCH 13/18] [nbc] change f4m test for NBCSportsVPlayer and NBCSports --- youtube_dl/extractor/nbc.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index dc1113949..f09abb286 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -81,14 +81,19 @@ class NBCSportsVPlayerIE(InfoExtractor): 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', 'info_dict': { 'id': '9CsDKds0kvHI', - 'ext': 'flv', + 'ext': 'm3u8', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', 'only_matching': True, - }] + }, + ] @staticmethod def _extract_url(webpage): @@ -112,10 +117,14 @@ class NBCSportsIE(InfoExtractor): 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', 'info_dict': { 'id': 'PHJSaFWbrTY9', - 'ext': 'flv', + 'ext': 'm3u8', 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From ca673139009a67ca0848b2eb83790cacd1626884 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 08:51:56 +0100 Subject: [PATCH 14/18] [cbs] fix downloading and change rtmp test to f4m --- youtube_dl/extractor/cbs.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 75fffb156..96147a8da 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor - +from ..utils import smuggle_url class CBSIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P[^/]+)' @@ -10,31 +10,21 @@ class CBSIE(InfoExtractor): 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'info_dict': { 'id': '4JUVEwq3wUT7', - 'display_id': 'connect-chat-feat-garth-brooks', 'ext': 'flv', 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', 'duration': 1495, }, - 'params': { - # rtmp download - 'skip_download': True, - }, '_skip': 'Blocked outside the US', }, { 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', 'info_dict': { 'id': 'WWF_5KqY3PK1', - 'display_id': 'st-vincent', 'ext': 'flv', 'title': 'Live on Letterman - St. Vincent', 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', 'duration': 3221, }, - 'params': { - # rtmp download - 'skip_download': True, - }, '_skip': 'Blocked outside the US', }, { 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', @@ -50,9 +40,4 @@ class CBSIE(InfoExtractor): real_id = self._search_regex( [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], webpage, 'real video ID') - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': 'theplatform:%s' % real_id, - 'display_id': display_id, - } + return self.url_result(smuggle_url('http://link.theplatform.com/s/dJ5BDC/%s/meta.smil?format=smil&mbr=true&manifest=f4m' % real_id, {'force_smil_url': True})) From 83d10c9ec8b5aa99503844661685bb2f2dd98bd2 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 10:27:41 +0100 Subject: [PATCH 15/18] [generic] change NBC Sports vplayer embed test to m3u8 --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6d2efb22e..670204f5c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -814,10 +814,14 @@ class GenericIE(InfoExtractor): 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', 'info_dict': { 'id': 'ln7x1qSThw4k', - 'ext': 'flv', + 'ext': 'm3u8', 'title': "PFT Live: New leader in the 'new-look' defense", 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', }, + 'params': { + # m3u8 downloads + 'skip_download': True, + } }, # UDN embed { From ce4dd93a5ebe58f30e8921c66375d8f391b68f12 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 12:12:52 +0100 Subject: [PATCH 16/18] [syfy] fix the extraction logic and make the test pass --- youtube_dl/extractor/syfy.py | 47 ++++++++++++------------------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 5ca079f88..21c3bfcf2 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -1,46 +1,31 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor class SyfyIE(InfoExtractor): - _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P[0-9]+)|(?!videos)(?P[^/]+)(?:$|[?#]))' + _VALID_URL = r'https?://www\.syfy\.com/[^/]+/videos/(?:\d+-)?(?P[A-Za-z0-9-]+)' _TESTS = [{ - 'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', + 'url': 'http://www.syfy.com/sharknado3/videos/sharknado-3-trailer', 'info_dict': { - 'id': 'NmqMrGnXvmO1', - 'ext': 'flv', - 'title': 'George Lucas has Advice for his Daughter', - 'description': 'Listen to what insights George Lucas give his daughter Amanda.', - }, - 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.syfy.com/wilwheaton', - 'md5': '94dfa54ee3ccb63295b276da08c415f6', - 'info_dict': { - 'id': '4yoffOOXC767', - 'ext': 'flv', - 'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.', - 'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.', + 'id': 'Sueh0V5Eh6L6', + 'ext': 'm3u8', + 'title': 'Sharknado 3: Trailer', + 'description': 'This time, the entire east coast isn\'t safe. Sharknado 3 premieres July 22 at 9/8c on Syfy.', }, 'add_ie': ['ThePlatform'], 'skip': 'Blocked outside the US', + 'params': { + # rtmp download + 'skip_download': True, + } }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_name = mobj.group('video_name') - if video_name: - generic_webpage = self._download_webpage(url, video_name) - video_id = self._search_regex( - r'', - generic_webpage, 'video ID') - url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( - video_name, video_name, video_id) - else: - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage)) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + releaseURL = self._parse_json(self._html_search_regex( + r'"syfy_mpx"\s*:\s*({[^}]+?})', + webpage, 'syfy_mpx'), display_id)['releaseURL'] + return self.url_result(releaseURL) From 3c8ae5fbce8c1960ba5a84e3d6f10f1842cd7ea6 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 12:41:08 +0100 Subject: [PATCH 17/18] [cbssports] change the test --- youtube_dl/extractor/cbssports.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index ae47e74cc..7913bd1bd 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -9,12 +9,16 @@ class CBSSportsIE(InfoExtractor): _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P
[^/]+)/(?P[^/]+)' _TEST = { - 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', + 'url': 'http://www.cbssports.com/video/player/tennis/482755139719/0/serena-williams-wins-2015-wimbledon-championship', 'info_dict': { - 'id': '_d5_GbO8p1sT', + 'id': 'GQGZp_4tBqW6', 'ext': 'flv', - 'title': 'US Open flashbacks: 1990s', - 'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', + 'title': 'Serena Williams wins 2015 Wimbledon Championship', + 'description': 'Serena Williams completed the Serena Slam on Saturday and now holds all four major titles. Jamie Erdahl has the latest on what the win means for Serena.', + }, + 'params': { + # rtmp download + 'skip_download': True, }, } From c8c847d4478484e51a2f5294682711741b854c2f Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 21 Jul 2015 16:59:43 +0100 Subject: [PATCH 18/18] [hls] pass all headers to ffmpeg --- youtube_dl/downloader/hls.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 0a9eea717..eeb321aa0 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -28,12 +28,12 @@ class HlsFD(FileDownloader): return False ffpp.check_version() - cookies = '' - for cookie in self.ydl.cookiejar: - cookies += '%s=%s; path=%s; domain=%s;\r\n' % (cookie.name, cookie.value, cookie.path, cookie.domain) + headers = '' + for key, val in info_dict['http_headers'].items(): + headers += '%s: %s\r\n' % (key, val) args = [ encodeArgument(opt) - for opt in (ffpp.executable, '-y', '-user-agent', info_dict['http_headers']['User-Agent'], '-cookies', cookies,'-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] + for opt in (ffpp.executable, '-y', '-headers', headers, '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) retval = subprocess.call(args)