From db5fe4b1c9091fcb45cb4ab6cc7d039a903a1c90 Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 18:15:20 +0200 Subject: [PATCH 1/2] [cbnc] fix extraction --- youtube_dl/extractor/cnbc.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..77400a180 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,13 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import smuggle_url class CNBCIE(InfoExtractor): - _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www|video)?\.cnbc\.com/(?:gallery|video)/(?:\?video=(?P[0-9]+)|.*/(?P[^.]+))' + _TESTS = [{ 'url': 'http://video.cnbc.com/gallery/?video=3000503714', 'info_dict': { 'id': '3000503714', @@ -22,10 +24,33 @@ class CNBCIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000033068', + 'ext': 'mp4', + 'title': 'Full interview with Brian Belski and Tobias Levkovich', + 'description': 'md5:958012776b16f68bad3008587dd0a03a', + 'timestamp': 1532908800, + 'upload_date': '20180730', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + if not video_id: + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r']+?data-VideoID=[\'"]\s*([0-9]+)\s*', + webpage, display_id + ) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', From a25fa57d344cf358bfbafd4451d059cee96bf232 Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 20:19:45 +0200 Subject: [PATCH 2/2] [cnbc] added separate extractor --- youtube_dl/extractor/cnbc.py | 72 ++++++++++++++++++------------ youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 77400a180..35c0b6124 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,15 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import re from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + js_to_json, + smuggle_url, +) class CNBCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|video)?\.cnbc\.com/(?:gallery|video)/(?:\?video=(?P[0-9]+)|.*/(?P[^.]+))' - _TESTS = [{ + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P[0-9]+)' + _TEST = { 'url': 'http://video.cnbc.com/gallery/?video=3000503714', 'info_dict': { 'id': '3000503714', @@ -24,33 +26,10 @@ class CNBCIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - }, { - 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', - 'info_dict': { - 'id': '7000033068', - 'ext': 'mp4', - 'title': 'Full interview with Brian Belski and Tobias Levkovich', - 'description': 'md5:958012776b16f68bad3008587dd0a03a', - 'timestamp': 1532908800, - 'upload_date': '20180730', - 'uploader': 'NBCU-CNBC', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] + } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if not video_id: - display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex( - r']+?data-VideoID=[\'"]\s*([0-9]+)\s*', - webpage, display_id - ) + video_id = self._match_id(url) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', @@ -59,3 +38,38 @@ class CNBCIE(InfoExtractor): {'force_smil_url': True}), 'id': video_id, } + + +class CNBCNewIE(InfoExtractor): + IE_NAME = 'CNBC:new' + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._parse_json( + self._search_regex( + r'(?s).*]*>.*?({.+?content_id.+?}).*?', + webpage, display_id), + display_id, transform_source=js_to_json + )['content_id'] + + return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 29fab5b9a..a54201af1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -204,7 +204,10 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import CNBCIE +from .cnbc import ( + CNBCIE, + CNBCNewIE, +) from .cnn import ( CNNIE, CNNBlogsIE,