2017-02-13 23:04:28 +01:00
# -*- coding: utf-8 -*-
2014-06-13 23:44:44 +07:00
from __future__ import unicode_literals
2015-02-18 20:14:42 +01:00
from . common import InfoExtractor
2016-07-03 15:39:24 +08:00
from . . compat import compat_urlparse
2014-06-13 23:44:44 +07:00
from . . utils import (
2015-12-25 15:38:12 +01:00
determine_ext ,
2016-07-03 15:39:24 +08:00
ExtractorError ,
find_xpath_attr ,
fix_xml_ampersands ,
int_or_none ,
2014-06-13 23:44:44 +07:00
parse_duration ,
unified_strdate ,
2016-07-03 15:39:24 +08:00
update_url_query ,
2015-12-25 15:38:12 +01:00
xpath_text ,
2014-06-13 23:44:44 +07:00
)
2016-07-03 19:22:48 +08:00
class RaiBaseIE ( InfoExtractor ) :
def _extract_relinker_formats ( self , relinker_url , video_id ) :
formats = [ ]
for platform in ( ' mon ' , ' flash ' , ' native ' ) :
relinker = self . _download_xml (
relinker_url , video_id ,
note = ' Downloading XML metadata for platform %s ' % platform ,
transform_source = fix_xml_ampersands ,
2016-07-03 23:23:48 +08:00
query = { ' output ' : 45 , ' pl ' : platform } ,
headers = self . geo_verification_headers ( ) )
2016-07-03 19:22:48 +08:00
media_url = find_xpath_attr ( relinker , ' ./url ' , ' type ' , ' content ' ) . text
if media_url == ' http://download.rai.it/video_no_available.mp4 ' :
self . raise_geo_restricted ( )
ext = determine_ext ( media_url )
if ( ext == ' m3u8 ' and platform != ' mon ' ) or ( ext == ' f4m ' and platform != ' flash ' ) :
continue
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
media_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
elif ext == ' f4m ' :
manifest_url = update_url_query (
media_url . replace ( ' manifest#live_hds.f4m ' , ' manifest.f4m ' ) ,
{ ' hdcore ' : ' 3.7.0 ' , ' plugin ' : ' aasp-3.7.0.39.44 ' } )
formats . extend ( self . _extract_f4m_formats (
manifest_url , video_id , f4m_id = ' hds ' , fatal = False ) )
else :
bitrate = int_or_none ( xpath_text ( relinker , ' bitrate ' ) )
formats . append ( {
' url ' : media_url ,
' tbr ' : bitrate if bitrate > 0 else None ,
' format_id ' : ' http- %d ' % bitrate if bitrate > 0 else ' http ' ,
} )
return formats
2016-07-03 19:44:58 +08:00
def _extract_from_content_id ( self , content_id , base_url ) :
media = self . _download_json (
' http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem- %s .html?json ' % content_id ,
content_id , ' Downloading video JSON ' )
thumbnails = [ ]
for image_type in ( ' image ' , ' image_medium ' , ' image_300 ' ) :
thumbnail_url = media . get ( image_type )
if thumbnail_url :
thumbnails . append ( {
' url ' : compat_urlparse . urljoin ( base_url , thumbnail_url ) ,
} )
formats = [ ]
media_type = media [ ' type ' ]
if ' Audio ' in media_type :
formats . append ( {
' format_id ' : media . get ( ' formatoAudio ' ) ,
' url ' : media [ ' audioUrl ' ] ,
' ext ' : media . get ( ' formatoAudio ' ) ,
} )
elif ' Video ' in media_type :
formats . extend ( self . _extract_relinker_formats ( media [ ' mediaUri ' ] , content_id ) )
self . _sort_formats ( formats )
else :
raise ExtractorError ( ' not a media file ' )
subtitles = { }
captions = media . get ( ' subtitlesUrl ' )
if captions :
STL_EXT = ' .stl '
SRT_EXT = ' .srt '
if captions . endswith ( STL_EXT ) :
captions = captions [ : - len ( STL_EXT ) ] + SRT_EXT
subtitles [ ' it ' ] = [ {
' ext ' : ' srt ' ,
' url ' : captions ,
} ]
return {
' id ' : content_id ,
' title ' : media [ ' name ' ] ,
' description ' : media . get ( ' desc ' ) ,
' thumbnails ' : thumbnails ,
' uploader ' : media . get ( ' author ' ) ,
' upload_date ' : unified_strdate ( media . get ( ' date ' ) ) ,
' duration ' : parse_duration ( media . get ( ' length ' ) ) ,
' formats ' : formats ,
' subtitles ' : subtitles ,
}
2016-07-03 19:22:48 +08:00
class RaiTVIE ( RaiBaseIE ) :
_VALID_URL = r ' https?://(?:.+? \ .)?(?:rai \ .it|rai \ .tv|rainews \ .it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[ \ da-f] {8} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {12} )(?:-.+?)? \ .html '
2014-06-13 23:44:44 +07:00
_TESTS = [
{
' url ' : ' http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html ' ,
2016-07-03 15:39:24 +08:00
' md5 ' : ' 8970abf8caf8aef4696e7b1f2adfc696 ' ,
2014-06-13 23:44:44 +07:00
' info_dict ' : {
' id ' : ' cb27157f-9dd0-4aee-b788-b1f67643a391 ' ,
2016-07-03 15:39:24 +08:00
' ext ' : ' mp4 ' ,
2014-06-13 23:44:44 +07:00
' title ' : ' Report del 07/04/2014 ' ,
' description ' : ' md5:f27c544694cacb46a078db84ec35d2d9 ' ,
' upload_date ' : ' 20140407 ' ,
' duration ' : 6160 ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2014-06-13 23:44:44 +07:00
}
} ,
{
2016-07-03 15:39:24 +08:00
# no m3u8 stream
2014-06-13 23:44:44 +07:00
' url ' : ' http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html ' ,
2016-07-03 15:39:24 +08:00
# HDS download, MD5 is unstable
2014-06-13 23:44:44 +07:00
' info_dict ' : {
' id ' : ' 04a9f4bd-b563-40cf-82a6-aad3529cb4a9 ' ,
2015-12-25 15:38:12 +01:00
' ext ' : ' flv ' ,
2014-06-13 23:44:44 +07:00
' title ' : ' TG PRIMO TEMPO ' ,
' upload_date ' : ' 20140612 ' ,
' duration ' : 1758 ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2014-07-01 19:24:18 +07:00
} ,
2016-07-03 15:39:24 +08:00
' skip ' : ' Geo-restricted to Italy ' ,
2014-06-13 23:44:44 +07:00
} ,
{
' url ' : ' http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html ' ,
' md5 ' : ' 35cf7c229f22eeef43e48b5cf923bef0 ' ,
' info_dict ' : {
' id ' : ' 7aafdea9-0e5d-49d5-88a6-7e65da67ae13 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' State of the Net, Antonella La Carpia: regole virali ' ,
' description ' : ' md5:b0ba04a324126903e3da7763272ae63c ' ,
' upload_date ' : ' 20140613 ' ,
2014-06-16 18:50:15 +07:00
} ,
' skip ' : ' Error 404 ' ,
2014-06-13 23:44:44 +07:00
} ,
{
' url ' : ' http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html ' ,
' info_dict ' : {
' id ' : ' b4a49761-e0cc-4b14-8736-2729f6f73132 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Alluvione in Sardegna e dissesto idrogeologico ' ,
' description ' : ' Edizione delle ore 20:30 ' ,
2015-12-25 15:38:12 +01:00
} ,
' skip ' : ' invalid urls ' ,
2014-06-13 23:44:44 +07:00
} ,
2015-04-10 22:41:52 +06:00
{
' url ' : ' http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html ' ,
2016-07-03 15:39:24 +08:00
' md5 ' : ' e57493e1cb8bc7c564663f363b171847 ' ,
2015-04-10 22:41:52 +06:00
' info_dict ' : {
' id ' : ' 28e5525a-b495-45e8-a7c3-bc48ba45d2b6 ' ,
2016-07-03 15:39:24 +08:00
' ext ' : ' mp4 ' ,
2015-04-10 22:41:52 +06:00
' title ' : ' Il Candidato - Primo episodio: " Le Primarie " ' ,
2015-12-25 15:38:12 +01:00
' description ' : ' md5:364b604f7db50594678f483353164fb8 ' ,
' upload_date ' : ' 20140923 ' ,
' duration ' : 386 ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2015-04-10 22:41:52 +06:00
}
2015-09-15 21:57:30 +06:00
} ,
2014-06-13 23:44:44 +07:00
]
def _real_extract ( self , url ) :
2015-12-25 15:38:12 +01:00
video_id = self . _match_id ( url )
2016-07-03 15:39:24 +08:00
2016-07-03 19:44:58 +08:00
return self . _extract_from_content_id ( video_id , url )
2014-06-13 23:44:44 +07:00
2015-12-25 15:38:12 +01:00
2016-07-03 19:22:48 +08:00
class RaiIE ( RaiBaseIE ) :
2016-03-21 21:36:32 +06:00
_VALID_URL = r ' https?://(?:.+? \ .)?(?:rai \ .it|rai \ .tv|rainews \ .it)/dl/.+?-(?P<id>[ \ da-f] {8} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {12} )(?:-.+?)? \ .html '
2015-12-25 15:38:12 +01:00
_TESTS = [
{
' url ' : ' http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html ' ,
2016-07-03 15:39:24 +08:00
' md5 ' : ' 2dd727e61114e1ee9c47f0da6914e178 ' ,
2015-12-25 15:38:12 +01:00
' info_dict ' : {
' id ' : ' 59d69d28-6bb6-409d-a4b5-ed44096560af ' ,
2016-07-03 15:39:24 +08:00
' ext ' : ' mp4 ' ,
2015-12-25 15:38:12 +01:00
' title ' : ' Il pacco ' ,
' description ' : ' md5:4b1afae1364115ce5d78ed83cd2e5b3a ' ,
' upload_date ' : ' 20141221 ' ,
} ,
2016-07-03 19:22:48 +08:00
} ,
{
2016-07-03 19:44:58 +08:00
# Direct relinker URL
2016-07-03 19:22:48 +08:00
' url ' : ' http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews ' ,
# HDS live stream, MD5 is unstable
' info_dict ' : {
' id ' : ' 1912dbbf-3f96-44c3-b4cf-523681fbacbc ' ,
' ext ' : ' flv ' ,
' title ' : ' EuroNews ' ,
} ,
' skip ' : ' Geo-restricted to Italy ' ,
2016-07-03 19:44:58 +08:00
} ,
{
# Embedded content item ID
' url ' : ' http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined ' ,
' md5 ' : ' 84c1135ce960e8822ae63cec34441d63 ' ,
' info_dict ' : {
' id ' : ' 0960e765-62c8-474a-ac4b-7eb3e2be39c8 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' TG1 ore 20:00 del 02/07/2016 ' ,
' upload_date ' : ' 20160702 ' ,
} ,
} ,
2016-07-03 19:48:50 +08:00
{
' url ' : ' http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html ' ,
# HDS live stream, MD5 is unstable
' info_dict ' : {
' id ' : ' 3156f2f2-dc70-4953-8e2f-70d7489d4ce9 ' ,
' ext ' : ' flv ' ,
' title ' : ' La diretta di Rainews24 ' ,
} ,
} ,
2015-12-25 15:38:12 +01:00
]
@classmethod
def suitable ( cls , url ) :
return False if RaiTVIE . suitable ( url ) else super ( RaiIE , cls ) . suitable ( url )
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
iframe_url = self . _search_regex (
[ r ' <iframe[^>]+src= " ([^ " ]*/dl/[^ " ]+ \ ?iframe \ b[^ " ]*) " ' ,
r ' drawMediaRaiTV \ ([ " \' ](.+?)[ " \' ] ' ] ,
2016-07-03 19:22:48 +08:00
webpage , ' iframe ' , default = None )
if iframe_url :
if not iframe_url . startswith ( ' http ' ) :
iframe_url = compat_urlparse . urljoin ( url , iframe_url )
return self . url_result ( iframe_url )
2016-07-03 19:44:58 +08:00
content_item_id = self . _search_regex (
r ' initEdizione \ ((?P<q1>[ \' " ])ContentItem-(?P<content_id>[^ \' " ]+)(?P=q1) ' ,
webpage , ' content item ID ' , group = ' content_id ' , default = None )
if content_item_id :
return self . _extract_from_content_id ( content_item_id , url )
2016-07-03 19:22:48 +08:00
relinker_url = compat_urlparse . urljoin ( url , self . _search_regex (
2016-07-03 19:48:50 +08:00
r ' (?:var \ s+videoURL|mediaInfo \ .mediaUri) \ s*= \ s*(?P<q1>[ \' " ])(?P<url>(https?:)?//mediapolis \ .rai \ .it/relinker/relinkerServlet \ .htm \ ?cont= \ d+)(?P=q1) ' ,
2016-07-03 19:22:48 +08:00
webpage , ' relinker URL ' , group = ' url ' ) )
formats = self . _extract_relinker_formats ( relinker_url , video_id )
self . _sort_formats ( formats )
title = self . _search_regex (
r ' var \ s+videoTitolo \ s*= \ s*([ \' " ])(?P<title>[^ \' " ]+) \ 1 ' ,
webpage , ' title ' , group = ' title ' , default = None ) or self . _og_search_title ( webpage )
return {
' id ' : video_id ,
' title ' : title ,
' formats ' : formats ,
}
2017-02-13 23:04:28 +01:00
class RaiPlayIE ( RaiBaseIE ) :
_VALID_URL = r ' https?://(?:.+? \ .)?raiplay \ .it/raiplay/video/(?:[^/]+/)+.+?-(?P<id>[ \ da-f] {8} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {12} )(?:-.+?)? \ .html '
_TEST = {
' url ' : ' http://www.raiplay.it/raiplay/video/2016/06/Paolo-Fresu-Il-tempo-di-un-viaggio-5bdc6f5f-74b7-426e-8192-9a1189123522.html ' ,
' info_dict ' : {
' id ' : ' 5bdc6f5f-74b7-426e-8192-9a1189123522 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 365 Paolo Fresu, il tempo di un viaggio ' ,
' description ' : ' Incontro con con Paolo Fresu, compositore e trombettista di straordinario e talento. Dall \' attaccamento alla terra della nativa Sardegna alle prime avventure musicali, dalla passione per Miles Davis alle collaborazioni con autori e musicisti di ogni nazionalità e estrazione. ' ,
' upload_date ' : ' 20160628 ' ,
' duration ' : 6014.0 ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
return self . _extract_from_content_id ( video_id , url )