From b1eee324b37d6c7331cbd89db46060fabe19774d Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 19 Jun 2017 16:02:53 +0200 Subject: [PATCH 1/4] [theplatform] Fix feed extractor --- youtube_dl/extractor/theplatform.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index de236bbba..c4619a95d 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -343,6 +343,16 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) + if item.get('plfile$assetTypes') is None: + query = { + 'mbr': 'true', + 'formats': item['plfile$format'], + } + cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( + main_smil_url or smil_url, query), video_id, 'Downloading SMIL data') + formats.extend(cur_formats) + subtitles = self._merge_subtitles(subtitles, cur_subtitles) + continue for asset_type in item['plfile$assetTypes']: if asset_type in asset_types: continue From 411324c1ab57ddfffbcf61ff9dfa262fe69a7fe8 Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 19 Jun 2017 16:08:16 +0200 Subject: [PATCH 2/4] [globalnews] Add new extractor (close #13430) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/globalnews.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/globalnews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index edbb4bdde..0c831bdd9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -389,6 +389,7 @@ from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE +from .globalnews import GlobalNewsIE from .globo import ( GloboIE, GloboArticleIE, diff --git a/youtube_dl/extractor/globalnews.py b/youtube_dl/extractor/globalnews.py new file mode 100644 index 000000000..f8dc934cf --- /dev/null +++ b/youtube_dl/extractor/globalnews.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class GlobalNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?globalnews\.ca/video/(?P\d+)' + _TEST = { + 'url': "http://globalnews.ca/video/2066998/focus-montreal-doulia-hamad-and-nebras-m-warsi", + 'info_dict': { + 'title': "Focus Montreal: Doulia Hamad and Nebras M. Warsi", + 'id': '469088323881', + 'ext': 'mp4', + 'upload_date': '20150621', + 'description': 'md5:2998a348701a91ddf70fbb773b016a7f', + 'timestamp': 1434908705, + 'uploader': 'SHWM-NEW', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage( + url, + display_id + ) + + account_id = self._search_regex(( + r'svp\.platformAccount\s*=\s*(["\']+)(?P.+?)\1', + r'svp\.(?:videoSMILUrl|metadataUrl)\s*=\s*(["\']+).*?theplatform[^/]+/(?:s|f)/(?P[^/]+?/).*?\1'), + webpage, + 'account id', + group='account' + )[:-1] + feed_id = self._search_regex(( + r'svp\.feedId\s*=\s*(["\']+)(?P.+?)\1', + r'svp\.metadataUrl\s*=\s*(["\']+).*?theplatform[^/]+/f/[^/]+?/(?P[^?/&#]+).*?\1'), + webpage, + 'feed id', + group='feed' + ) + platform_id = self._search_regex(( + r'\d+)\1\s+?data-v_count_id=\1\2\1', + r'svp\.setContentId\(\s*([\'"])(?P\d+)\1.*?loadCallback'), + webpage, + 'platform id', + group='platformId' + ) + + return { + 'display_id': display_id, + '_type': 'url_transparent', + 'url': 'http://feed.theplatform.com/f/%s/%s?byId=%s' % (account_id, feed_id, platform_id), + 'ie_key': 'ThePlatformFeed' + } From 2139db52c7faf211e9327581681e10dffb2d432b Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 19 Jun 2017 17:34:07 +0200 Subject: [PATCH 3/4] [globalnews] Valid url regex updated --- youtube_dl/extractor/globalnews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globalnews.py b/youtube_dl/extractor/globalnews.py index f8dc934cf..d3e61eebf 100644 --- a/youtube_dl/extractor/globalnews.py +++ b/youtube_dl/extractor/globalnews.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class GlobalNewsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?globalnews\.ca/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?globalnews\.ca/video/(?:embed/)?(?P\d+)' _TEST = { 'url': "http://globalnews.ca/video/2066998/focus-montreal-doulia-hamad-and-nebras-m-warsi", 'info_dict': { From 137d609a51cb67cfc8aa6a29ecaf039e08c3a318 Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 19 Jun 2017 20:06:38 +0200 Subject: [PATCH 4/4] Fix regex --- youtube_dl/extractor/globalnews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globalnews.py b/youtube_dl/extractor/globalnews.py index d3e61eebf..2c19fb145 100644 --- a/youtube_dl/extractor/globalnews.py +++ b/youtube_dl/extractor/globalnews.py @@ -41,7 +41,7 @@ class GlobalNewsIE(InfoExtractor): group='feed' ) platform_id = self._search_regex(( - r'\d+)\1\s+?data-v_count_id=\1\2\1', + r']+class=(["\'])[^\'"]+?the_platform_id_(?P\d+)\1\s+?data-v_count_id=\1\2\1', r'svp\.setContentId\(\s*([\'"])(?P\d+)\1.*?loadCallback'), webpage, 'platform id',