From 037e1d83e0f0247ffa43baf33c9a8873aa0710e4 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Mon, 12 Sep 2016 11:53:37 -0700 Subject: [PATCH] Fix Bloomberg extractor. (closes #10630) The JSON being sent from Bloomberg uses single quotes, which is invalid JSON, and contains function calls. Regex both of those out. Additionally, the API endpoint requires an additional parameter, and must be called over HTTPS now. Change that as well. Signed-off-by: Adam Buchbinder --- youtube_dl/extractor/bloomberg.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2a8cd64b9..9b07ca2d4 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -48,13 +48,20 @@ class BloombergIE(InfoExtractor): r'["\']bmmrId["\']\s*:\s*(["\'])(?P.+?)\1', webpage, 'id', group='url', default=None) if not video_id: - bplayer_data = self._parse_json(self._search_regex( - r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) + bplayer_json = self._search_regex(r'BPlayer\(null,\s*({[^;]+})\);', + webpage, 'id') + # It's not good JSON; it uses single quotes and contains function + # calls. Sweep that under the rug. + bplayer_json = bplayer_json.replace("\'", '"') + bplayer_json = re.sub("\w+\(([^)]+)\)", '"FUNCTION"', bplayer_json) + bplayer_data = self._parse_json(bplayer_json, name) video_id = bplayer_data['id'] + video_id_type = bplayer_data['idType'] title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + 'https://www.bloomberg.com/api/embed?id=%s&idType=%s' % + (video_id, video_id_type), video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url')