From 055f0d3d0636e343354a19cd558a3aac3cf31399 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 17 May 2016 15:38:57 +0800 Subject: [PATCH 01/10] [abcnews] Added a new extractor (closes #3992) Related: #6108, #8664, #9459 --- youtube_dl/extractor/abcnews.py | 135 +++++++++++++++++++++++++++++ youtube_dl/extractor/amp.py | 4 +- youtube_dl/extractor/extractors.py | 4 + 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/abcnews.py diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py new file mode 100644 index 000000000..b61a6327c --- /dev/null +++ b/youtube_dl/extractor/abcnews.py @@ -0,0 +1,135 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import re +import time + +from .amp import AMPIE +from .common import InfoExtractor +from ..compat import compat_urlparse + + +class AbcNewsVideoIE(AMPIE): + IE_NAME = 'abcnews:video' + _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P[0-9a-z-]+)-(?P\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', + 'info_dict': { + 'id': '20411932', + 'ext': 'mp4', + 'display_id': 'week-exclusive-irans-foreign-minister-zarif', + 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', + 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', + 'duration': 180, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + info_dict = self._extract_feed_info( + 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + info_dict.update({ + 'id': video_id, + 'display_id': display_id, + }) + return info_dict + + +class AbcNewsIE(InfoExtractor): + IE_NAME = 'abcnews' + _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P[0-9a-z-]+)/story\?id=(?P\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', + 'info_dict': { + 'id': '10498713', + 'ext': 'flv', + 'display_id': 'dramatic-video-rare-death-job-america', + 'title': 'Occupational Hazards', + 'description': 'Nightline investigates the dangers that lurk at various jobs.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20100428', + 'timestamp': 1272412800, + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', + 'info_dict': { + 'id': '39125818', + 'ext': 'mp4', + 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', + 'title': 'Justin Timberlake Drops Hints For Secret Single', + 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', + 'upload_date': '20160515', + 'timestamp': 1463329500, + }, + 'params': { + # m3u8 download + 'skip_download': True, + # The embedded YouTube video is blocked due to copyright issues + 'playlist_items': '1', + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex( + r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') + full_video_url = compat_urlparse.urljoin(url, video_url) + + youtube_url = self._html_search_regex( + r']+src="(https://www\.youtube\.com/embed/[^"]+)"', + webpage, 'YouTube URL', default=None) + + timestamp = None + date_str = self._html_search_regex( + r']+class="timestamp">([^<]+)', + webpage, 'timestamp', fatal=False) + if date_str: + tz_offset = 0 + if date_str.endswith(' ET'): # Eastern Time + tz_offset = -5 + date_str = date_str[:-3] + date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] + for date_format in date_formats: + try: + timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) + except ValueError: + continue + if timestamp is not None: + timestamp -= tz_offset * 3600 + + entry = { + '_type': 'url_transparent', + 'ie_key': AbcNewsVideoIE.ie_key(), + 'url': full_video_url, + 'id': video_id, + 'display_id': display_id, + 'timestamp': timestamp, + } + + if youtube_url: + entries = [entry, self.url_result(youtube_url, 'Youtube')] + return self.playlist_result(entries) + + return entry diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 138fa0808..8545681be 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -52,7 +52,7 @@ class AMPIE(InfoExtractor): for media_data in media_content: media = media_data['@attributes'] media_type = media['type'] - if media_type == 'video/f4m': + if media_type in ('video/f4m', 'application/f4m+xml'): formats.extend(self._extract_f4m_formats( media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)) @@ -61,7 +61,7 @@ class AMPIE(InfoExtractor): media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.append({ - 'format_id': media_data['media-category']['@attributes']['label'], + 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), 'url': media['url'], 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ca9d85e33..861701f4c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from .abc import ABCIE from .abc7news import Abc7NewsIE +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, From 15cda1ef774e9dbc538765f59dff5b10a492eca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 May 2016 23:46:47 +0600 Subject: [PATCH 02/10] [nfb] Fix uploader extraction --- youtube_dl/extractor/nfb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 51e4a34f7..234e49047 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -37,8 +37,7 @@ class NFBIE(InfoExtractor): uploader_id = self._html_search_regex(r'([^<]+)', - page, 'director name', fatal=False) + uploader = self._og_search_property('video:director', page, 'director name') request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, From 11e6a0b64130f9b4aea1a6115a3ebaad73f2f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 May 2016 00:25:15 +0600 Subject: [PATCH 03/10] [nfb] Modernize and extract subtitles --- youtube_dl/extractor/nfb.py | 110 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 234e49047..adcc636bc 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -2,8 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - sanitized_Request, + clean_html, + determine_ext, + int_or_none, + qualities, urlencode_postdata, + xpath_text, ) @@ -16,12 +20,12 @@ class NFBIE(InfoExtractor): 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 'info_dict': { 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', + 'description': 'md5:6b8e32dde3abf91e58857b174916620c', 'duration': 3128, + 'creator': 'Mark Sandiford', 'uploader': 'Mark Sandiford', - 'uploader_id': 'mark-sandiford', }, 'params': { # rtmp download @@ -31,64 +35,78 @@ class NFBIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( - 'https://www.nfb.ca/film/%s' % video_id, video_id, - 'Downloading film page') - uploader_id = self._html_search_regex(r' Date: Wed, 18 May 2016 22:24:46 +0100 Subject: [PATCH 04/10] [formula1] Add new extractor(closes #3617) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/formula1.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 youtube_dl/extractor/formula1.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 861701f4c..efbe970fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -242,6 +242,7 @@ from .fktv import FKTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE +from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py new file mode 100644 index 000000000..726393fcc --- /dev/null +++ b/youtube_dl/extractor/formula1.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Formula1IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P.+?)\.html' + _TEST = { + 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', + 'md5': '8c79e54be72078b26b89e0e111c0502b', + 'info_dict': { + 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', + 'ext': 'flv', + 'title': 'Race highlights - Spain 2016', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + ooyala_embed_code = self._search_regex( + r'data-videoid="([^"]+)"', webpage, 'ooyala embed code') + return self.url_result( + 'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) From 46bc9b7d7cea2e161670e65abe42ef01d39e8957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 May 2016 04:31:30 +0600 Subject: [PATCH 05/10] [utils] Allow None in remove_{start,end} --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 8 ++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 520d32ff5..a697232a8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -50,6 +50,8 @@ from youtube_dl.utils import ( sanitize_path, prepend_extension, replace_extension, + remove_start, + remove_end, remove_quotes, shell_quote, smuggle_url, @@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_remove_start(self): + self.assertEqual(remove_start(None, 'A - '), None) + self.assertEqual(remove_start('A - B', 'A - '), 'B') + self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + + def test_remove_end(self): + self.assertEqual(remove_end(None, ' - B'), None) + self.assertEqual(remove_end('A - B', ' - B'), 'A') + self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes('"'), '"') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ac60ba18c..5301d0740 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1549,15 +1549,11 @@ def setproctitle(title): def remove_start(s, start): - if s.startswith(start): - return s[len(start):] - return s + return s[len(start):] if s is not None and s.startswith(start) else s def remove_end(s, end): - if s.endswith(end): - return s[:-len(end)] - return s + return s[:-len(end)] if s is not None and s.endswith(end) else s def remove_quotes(s): From dd81769c62661d168fb87b896ffb8a80dacbe45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 May 2016 04:34:19 +0600 Subject: [PATCH 06/10] [ndtv] Fix extraction --- youtube_dl/extractor/ndtv.py | 40 ++++++++++-------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index 2a1ca80df..96528f649 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -1,19 +1,18 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - month_by_name, int_or_none, + remove_end, + unified_strdate, ) class NDTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P\d+)' _TEST = { - 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', + 'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710', 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', 'info_dict': { 'id': '300710', @@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor): 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', 'upload_date': '20131208', 'duration': 1327, - 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', + 'thumbnail': 're:https?://.*\.jpg', }, } @@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' - NDTV') + filename = self._search_regex( r"__filename='([^']+)'", webpage, 'video filename') - video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % - filename) + video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename duration = int_or_none(self._search_regex( r"__duration='([^']+)'", webpage, 'duration', fatal=False)) - date_m = re.search(r'''(?x) - \s* - Published\s+On:\s* - (?P[A-Za-z]+)\s+(?P[0-9]+),\s*(?P[0-9]+) - ''', webpage) - upload_date = None + upload_date = unified_strdate(self._html_search_meta( + 'publish-date', webpage, 'upload date', fatal=False)) - if date_m is not None: - month = month_by_name(date_m.group('monthname')) - if month is not None: - upload_date = '%s%02d%02d' % ( - date_m.group('year'), month, int(date_m.group('day'))) - - description = self._og_search_description(webpage) - READ_MORE = ' (Read more)' - if description.endswith(READ_MORE): - description = description[:-len(READ_MORE)] - - title = self._og_search_title(webpage) - TITLE_SUFFIX = ' - NDTV' - if title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] + description = remove_end(self._og_search_description(webpage), ' (Read more)') return { 'id': video_id, From 8585dc4cdc735eb8a077dffb68affa81e1a98693 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 19 May 2016 01:18:01 +0200 Subject: [PATCH 07/10] [Makefile] delete thumbnails --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5d7cd5a7e..d760e4576 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From a00129670390c241d097afd873b4ee226ca7d550 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 19 May 2016 18:18:03 +0100 Subject: [PATCH 08/10] [learnr] Add new extractor(closes #4284) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/learnr.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/learnr.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index efbe970fe..74aba2d5c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -370,6 +370,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .learnr import LearnrIE from .lecture2go import Lecture2GoIE from .lemonde import LemondeIE from .leeco import ( diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py new file mode 100644 index 000000000..1435e090e --- /dev/null +++ b/youtube_dl/extractor/learnr.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LearnrIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', + 'md5': '3719fdf0a68397f49899e82c308a89de', + 'info_dict': { + 'id': '51624', + 'ext': 'mp4', + 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', + 'description': 'md5:b36dbfa92350176cdf12b4d388485503', + 'uploader': 'LearnCode.academy', + 'uploader_id': 'learncodeacademy', + 'upload_date': '20131021', + }, + 'add_ie': ['Youtube'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + '_type': 'url_transparent', + 'url': self._search_regex( + r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), + 'id': video_id, + } From f6e588afc0b12ebec2bc65551e882e6d99467499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 08:53:04 +0600 Subject: [PATCH 09/10] [24video] Fix description extraction --- youtube_dl/extractor/twentyfourvideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index e03e2dbaa..4025edf02 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor): title = self._og_search_title(webpage) description = self._html_search_regex( - r'([^<]+)', webpage, 'description', fatal=False) + r'<(p|span)[^>]+itemprop="description"[^>]*>(?P[^<]+)', + webpage, 'description', fatal=False, group='description') thumbnail = self._og_search_thumbnail(webpage) duration = int_or_none(self._og_search_property( 'duration', webpage, 'duration', fatal=False)) From 52f7c75cff3d7f7923deda469f9d2a551742c193 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 20 May 2016 06:53:14 +0100 Subject: [PATCH 10/10] [cbc] extract http formats and update tests --- youtube_dl/extractor/cbc.py | 63 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 68a0633b6..581928f7d 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + js_to_json, + smuggle_url, +) class CBCIE(InfoExtractor): @@ -12,57 +15,54 @@ class CBCIE(InfoExtractor): _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', + 'md5': '97e24d09672fc4cf56256d6faa6c25bc', 'info_dict': { 'id': '2682904050', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Don Cherry – All-Stars', 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', - 'timestamp': 1454475540, + 'timestamp': 1454463000, 'upload_date': '20160203', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 'info_dict': { 'id': '2487345465', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Robin Williams freestyles on 90 Minutes Live', 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', - 'upload_date': '19700101', + 'upload_date': '19780210', 'uploader': 'CBCC-NEW', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'timestamp': 255977160, }, }, { # multiple iframes 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 'playlist': [{ + 'md5': '377572d0b49c4ce0c9ad77470e0b96b4', 'info_dict': { 'id': '2680832926', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', - 'upload_date': '19700101', + 'upload_date': '20160201', + 'timestamp': 1454342820, + 'uploader': 'CBCC-NEW', }, }, { + 'md5': '415a0e3f586113894174dfb31aa5bb1a', 'info_dict': { 'id': '2658915080', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Fly like an eagle!', 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', - 'upload_date': '19700101', + 'upload_date': '20150315', + 'timestamp': 1426443984, + 'uploader': 'CBCC-NEW', }, }], - 'params': { - # rtmp download - 'skip_download': True, - }, }] @classmethod @@ -95,20 +95,23 @@ class CBCPlayerIE(InfoExtractor): 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Gerry Runs a Sweat Shop', 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', - 'timestamp': 1455067800, + 'timestamp': 1455071400, 'upload_date': '20160210', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, } def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result( - 'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id, - 'ThePlatformFeed', video_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'force_smil_url': True + }), + 'id': video_id, + }