From db5fe4b1c9091fcb45cb4ab6cc7d039a903a1c90 Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 18:15:20 +0200 Subject: [PATCH] [cbnc] fix extraction --- youtube_dl/extractor/cnbc.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..77400a180 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,13 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import smuggle_url class CNBCIE(InfoExtractor): - _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www|video)?\.cnbc\.com/(?:gallery|video)/(?:\?video=(?P[0-9]+)|.*/(?P[^.]+))' + _TESTS = [{ 'url': 'http://video.cnbc.com/gallery/?video=3000503714', 'info_dict': { 'id': '3000503714', @@ -22,10 +24,33 @@ class CNBCIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000033068', + 'ext': 'mp4', + 'title': 'Full interview with Brian Belski and Tobias Levkovich', + 'description': 'md5:958012776b16f68bad3008587dd0a03a', + 'timestamp': 1532908800, + 'upload_date': '20180730', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + if not video_id: + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r']+?data-VideoID=[\'"]\s*([0-9]+)\s*', + webpage, display_id + ) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform',