2016-07-31 14:46:54 +08:00
# coding: utf-8
2014-03-29 11:55:12 +01:00
from __future__ import unicode_literals
2013-09-16 19:39:39 +02:00
import re
from . common import InfoExtractor
class BloombergIE ( InfoExtractor ) :
2015-11-28 22:39:36 +06:00
_VALID_URL = r ' https?://(?:www \ .)?bloomberg \ .com/(?:[^/]+/)*(?P<id>[^/?#]+) '
2013-09-16 19:39:39 +02:00
2015-11-19 22:55:06 +06:00
_TESTS = [ {
2015-04-03 15:01:17 +02:00
' url ' : ' http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2 ' ,
2014-07-28 15:25:56 +02:00
# The md5 checksum changes
2014-03-29 11:55:12 +01:00
' info_dict ' : {
' id ' : ' qurhIVlJSB6hzkVi229d8g ' ,
' ext ' : ' flv ' ,
' title ' : ' Shah \' s Presentation on Foreign-Exchange Strategies ' ,
2015-04-03 15:01:17 +02:00
' description ' : ' md5:a8ba0302912d03d246979735c17d2761 ' ,
2013-09-16 19:39:39 +02:00
} ,
2016-05-12 20:05:43 +08:00
' params ' : {
' format ' : ' best[format_id^=hds] ' ,
} ,
2016-07-31 14:46:54 +08:00
} , {
# video ID in BPlayer(...)
' url ' : ' http://www.bloomberg.com/features/2016-hello-world-new-zealand/ ' ,
' info_dict ' : {
' id ' : ' 938c7e72-3f25-4ddb-8b85-a9be731baa74 ' ,
' ext ' : ' flv ' ,
' title ' : ' Meet the Real-Life Tech Wizards of Middle Earth ' ,
' description ' : ' Hello World, Episode 1: New Zealand’ s freaky AI babies, robot exoskeletons, and a virtual you. ' ,
} ,
' params ' : {
' format ' : ' best[format_id^=hds] ' ,
} ,
2017-02-10 22:16:20 +07:00
} , {
2017-12-05 16:03:15 -08:00
' url ' : ' https://www.bloomberg.com/news/videos/2017-12-04/humes-cryptocurrency-an-option-for-venezuela-video ' ,
' info_dict ' : {
" id " : " ~EiNJNLwSCKD~FuCPpgGzA " ,
' ext ' : ' mp4 ' ,
' title ' : ' Greylock \' s Humes: Cryptocurrency an Option for Venezuela ' ,
' description ' : " Hans Humes, chairman and chief executive officer at Greylock Capital Management, explains how cryptocurrencies can help Venezuela work around sanctions. He speaks with Bloomberg ' s Francine Lacqua on \" Bloomberg Surveillance. \" (Source: Bloomberg) "
}
} , {
2017-02-10 22:16:20 +07:00
# data-bmmrid=
' url ' : ' https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money ' ,
' only_matching ' : True ,
2015-11-19 22:55:06 +06:00
} , {
' url ' : ' http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets ' ,
' only_matching ' : True ,
2015-11-28 22:39:36 +06:00
} , {
' url ' : ' http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump ' ,
' only_matching ' : True ,
2015-11-19 22:55:06 +06:00
} ]
2013-09-16 19:39:39 +02:00
def _real_extract ( self , url ) :
2015-02-24 11:08:00 +01:00
name = self . _match_id ( url )
2013-09-16 19:39:39 +02:00
webpage = self . _download_webpage ( url , name )
2015-11-28 22:41:39 +06:00
video_id = self . _search_regex (
2017-02-10 22:16:20 +07:00
( r ' [ " \' ]bmmrId[ " \' ] \ s*: \ s*([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' ,
r ' videoId \ s*: \ s*([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' ,
2017-12-05 16:03:15 -08:00
r ' data-bmmrid=([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' ,
r ' " video " \ s*: \ s* { \ s* " compressedIds " \ s*: " \ s*(?P<id>.+?) " ' ) ,
2017-02-10 22:16:20 +07:00
webpage , ' id ' , group = ' id ' , default = None )
2016-07-31 14:46:54 +08:00
if not video_id :
bplayer_data = self . _parse_json ( self . _search_regex (
r ' BPlayer \ (null, \ s*( { [^;]+}) \ ); ' , webpage , ' id ' ) , name )
video_id = bplayer_data [ ' id ' ]
2014-03-29 11:55:12 +01:00
title = re . sub ( ' : Video$ ' , ' ' , self . _og_search_title ( webpage ) )
2015-04-03 15:01:17 +02:00
embed_info = self . _download_json (
' http://www.bloomberg.com/api/embed?id= %s ' % video_id , video_id )
formats = [ ]
for stream in embed_info [ ' streams ' ] :
2015-11-28 22:45:19 +06:00
stream_url = stream . get ( ' url ' )
if not stream_url :
continue
2015-11-28 22:40:29 +06:00
if stream [ ' muxing_format ' ] == ' TS ' :
2015-12-29 00:58:24 +06:00
formats . extend ( self . _extract_m3u8_formats (
stream_url , video_id , ' mp4 ' , m3u8_id = ' hls ' , fatal = False ) )
2015-04-03 15:01:17 +02:00
else :
2015-12-29 00:58:24 +06:00
formats . extend ( self . _extract_f4m_formats (
stream_url , video_id , f4m_id = ' hds ' , fatal = False ) )
2015-04-03 15:01:17 +02:00
self . _sort_formats ( formats )
2014-03-29 11:55:12 +01:00
return {
2015-04-03 15:01:17 +02:00
' id ' : video_id ,
2014-03-29 11:55:12 +01:00
' title ' : title ,
2015-04-03 15:01:17 +02:00
' formats ' : formats ,
2014-03-29 11:55:12 +01:00
' description ' : self . _og_search_description ( webpage ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}