From cc1c263c95987df80a37a9446ab907a6df1ef49f Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Mon, 12 Sep 2016 11:53:37 -0700 Subject: [PATCH] Fix Bloomberg extractor. (closes #10630) The JSON being sent from Bloomberg uses single quotes, which is invalid JSON, and contains function calls. Regex both of those out. Additionally, the API endpoint requires an additional parameter, and must be called over HTTPS now. Change that as well. Signed-off-by: Adam Buchbinder --- youtube_dl/extractor/bloomberg.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2a8cd64b9..9b07ca2d4 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -48,13 +48,20 @@ class BloombergIE(InfoExtractor): r'["\']bmmrId["\']\s*:\s*(["\'])(?P.+?)\1', webpage, 'id', group='url', default=None) if not video_id: - bplayer_data = self._parse_json(self._search_regex( - r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) + bplayer_json = self._search_regex(r'BPlayer\(null,\s*({[^;]+})\);', + webpage, 'id') + # It's not good JSON; it uses single quotes and contains function + # calls. Sweep that under the rug. + bplayer_json = bplayer_json.replace("\'", '"') + bplayer_json = re.sub("\w+\(([^)]+)\)", '"FUNCTION"', bplayer_json) + bplayer_data = self._parse_json(bplayer_json, name) video_id = bplayer_data['id'] + video_id_type = bplayer_data['idType'] title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + 'https://www.bloomberg.com/api/embed?id=%s&idType=%s' % + (video_id, video_id_type), video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url')