Fix Bloomberg extractor. (closes #10630)

The JSON being sent from Bloomberg uses single quotes, which is invalid
JSON, and contains function calls. Regex both of those out.

Additionally, the API endpoint requires an additional parameter, and
must be called over HTTPS now. Change that as well.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
This commit is contained in:
Adam Buchbinder 2016-09-12 11:53:37 -07:00
parent 7a7309219c
commit 037e1d83e0

View File

@ -48,13 +48,20 @@ class BloombergIE(InfoExtractor):
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
webpage, 'id', group='url', default=None) webpage, 'id', group='url', default=None)
if not video_id: if not video_id:
bplayer_data = self._parse_json(self._search_regex( bplayer_json = self._search_regex(r'BPlayer\(null,\s*({[^;]+})\);',
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) webpage, 'id')
# It's not good JSON; it uses single quotes and contains function
# calls. Sweep that under the rug.
bplayer_json = bplayer_json.replace("\'", '"')
bplayer_json = re.sub("\w+\(([^)]+)\)", '"FUNCTION"', bplayer_json)
bplayer_data = self._parse_json(bplayer_json, name)
video_id = bplayer_data['id'] video_id = bplayer_data['id']
video_id_type = bplayer_data['idType']
title = re.sub(': Video$', '', self._og_search_title(webpage)) title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json( embed_info = self._download_json(
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) 'https://www.bloomberg.com/api/embed?id=%s&idType=%s' %
(video_id, video_id_type), video_id)
formats = [] formats = []
for stream in embed_info['streams']: for stream in embed_info['streams']:
stream_url = stream.get('url') stream_url = stream.get('url')