2015-10-09 20:08:37 +01:00
# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
2016-12-10 10:47:19 +01:00
from . . compat import compat_urllib_parse_urlparse
2015-10-09 20:08:37 +01:00
from . . utils import (
ExtractorError ,
parse_iso8601 ,
qualities ,
)
class SRGSSRIE ( InfoExtractor ) :
2015-10-14 10:40:54 +01:00
_VALID_URL = r ' (?:https?://tp \ .srgssr \ .ch/p(?:/[^/]+)+ \ ?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f \ -] {36} | \ d+) '
2015-10-09 20:08:37 +01:00
_ERRORS = {
' AGERATING12 ' : ' To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m. ' ,
' AGERATING18 ' : ' To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m. ' ,
2015-12-05 15:57:10 +01:00
# 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.',
2015-10-09 20:08:37 +01:00
' GEOBLOCK ' : ' For legal reasons, this video is only available in Switzerland. ' ,
' LEGAL ' : ' The video cannot be transmitted for legal reasons. ' ,
' STARTDATE ' : ' This video is not yet available. Please try again later. ' ,
}
2016-12-10 10:47:19 +01:00
def _get_tokenized_src ( self , url , video_id , format_id ) :
sp = compat_urllib_parse_urlparse ( url ) . path . split ( ' / ' )
token = self . _download_json (
' http://tp.srgssr.ch/akahd/token?acl=/ %s / %s /* ' % ( sp [ 1 ] , sp [ 2 ] ) ,
video_id , ' Downloading %s token ' % format_id , fatal = False ) or { }
auth_params = token . get ( ' token ' , { } ) . get ( ' authparams ' )
if auth_params :
url + = ' ? ' + auth_params
return url
2015-12-05 15:57:10 +01:00
def get_media_data ( self , bu , media_type , media_id ) :
2015-10-09 20:08:37 +01:00
media_data = self . _download_json (
' http://il.srgssr.ch/integrationlayer/1.0/ue/ %s / %s /play/ %s .json ' % ( bu , media_type , media_id ) ,
media_id ) [ media_type . capitalize ( ) ]
if media_data . get ( ' block ' ) and media_data [ ' block ' ] in self . _ERRORS :
2015-12-29 14:58:22 +01:00
raise ExtractorError ( ' %s said: %s ' % (
self . IE_NAME , self . _ERRORS [ media_data [ ' block ' ] ] ) , expected = True )
2015-12-05 15:57:10 +01:00
return media_data
2017-01-08 23:19:28 +01:00
def _get_subtitles ( self , bu , media_data ) :
subtitles = { }
langs = {
' srf ' : ' deu ' ,
# RTS has its own InfoExtractor in rts.py
# 'rts': 'fra',
' rsi ' : ' ita ' ,
' rtr ' : ' roh ' ,
' swissinfo ' : ' eng ' # assume english for swissinfo
}
subtitle_data = media_data . get ( ' Subtitles ' )
formats = [ { ' ext ' : ' ttml ' , ' urltag ' : ' TTMLUrl ' } ,
{ ' ext ' : ' vtt ' , ' urltag ' : ' VTTUrl ' } ]
subformats = [ { ' ext ' : form [ ' ext ' ] , ' url ' : subtitle_data [ form [ ' urltag ' ] ] }
for form in formats if subtitle_data and subtitle_data . get ( form [ ' urltag ' ] ) ]
for subform in subformats :
subtitles . setdefault ( langs [ bu ] , [ ] ) . append ( subform )
return subtitles
2015-12-05 15:57:10 +01:00
def _real_extract ( self , url ) :
bu , media_type , media_id = re . match ( self . _VALID_URL , url ) . groups ( )
media_data = self . get_media_data ( bu , media_type , media_id )
2015-10-09 20:08:37 +01:00
metadata = media_data [ ' AssetMetadatas ' ] [ ' AssetMetadata ' ] [ 0 ]
title = metadata [ ' title ' ]
description = metadata . get ( ' description ' )
created_date = media_data . get ( ' createdDate ' ) or metadata . get ( ' createdDate ' )
timestamp = parse_iso8601 ( created_date )
2015-12-29 11:36:04 +01:00
thumbnails = [ {
' id ' : image . get ( ' id ' ) ,
' url ' : image [ ' url ' ] ,
} for image in media_data . get ( ' Image ' , { } ) . get ( ' ImageRepresentations ' , { } ) . get ( ' ImageRepresentation ' , [ ] ) ]
2015-10-09 20:08:37 +01:00
2017-01-08 23:19:28 +01:00
subtitles = self . _get_subtitles ( bu , media_data )
2015-10-09 20:08:37 +01:00
preference = qualities ( [ ' LQ ' , ' MQ ' , ' SD ' , ' HQ ' , ' HD ' ] )
formats = [ ]
2015-12-29 11:36:04 +01:00
for source in media_data . get ( ' Playlists ' , { } ) . get ( ' Playlist ' , [ ] ) + media_data . get ( ' Downloads ' , { } ) . get ( ' Download ' , [ ] ) :
2015-10-09 20:08:37 +01:00
protocol = source . get ( ' @protocol ' )
2015-12-29 16:38:06 +01:00
for asset in source [ ' url ' ] :
asset_url = asset [ ' text ' ]
quality = asset [ ' @quality ' ]
format_id = ' %s - %s ' % ( protocol , quality )
2016-12-10 10:47:19 +01:00
if protocol . startswith ( ' HTTP-HDS ' ) or protocol . startswith ( ' HTTP-HLS ' ) :
asset_url = self . _get_tokenized_src ( asset_url , media_id , format_id )
if protocol . startswith ( ' HTTP-HDS ' ) :
formats . extend ( self . _extract_f4m_formats (
asset_url + ( ' ? ' if ' ? ' not in asset_url else ' & ' ) + ' hdcore=3.4.0 ' ,
media_id , f4m_id = format_id , fatal = False ) )
elif protocol . startswith ( ' HTTP-HLS ' ) :
formats . extend ( self . _extract_m3u8_formats (
asset_url , media_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = format_id , fatal = False ) )
2015-12-29 16:38:06 +01:00
else :
2015-10-09 20:08:37 +01:00
formats . append ( {
2015-12-29 16:38:06 +01:00
' format_id ' : format_id ,
2015-10-09 20:08:37 +01:00
' url ' : asset_url ,
2015-12-29 16:38:06 +01:00
' preference ' : preference ( quality ) ,
2016-02-02 23:09:50 +01:00
' ext ' : ' flv ' if protocol == ' RTMP ' else None ,
2015-10-09 20:08:37 +01:00
} )
self . _sort_formats ( formats )
return {
' id ' : media_id ,
' title ' : title ,
' description ' : description ,
' timestamp ' : timestamp ,
' thumbnails ' : thumbnails ,
2017-01-08 23:19:28 +01:00
' subtitles ' : subtitles ,
2015-10-09 20:08:37 +01:00
' formats ' : formats ,
}
class SRGSSRPlayIE ( InfoExtractor ) :
2015-12-29 12:01:22 +01:00
IE_DESC = ' srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites '
2015-10-09 20:34:03 +01:00
_VALID_URL = r ' https?://(?:(?:www|play) \ .)?(?P<bu>srf|rts|rsi|rtr|swissinfo) \ .ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+ \ ?id=(?P<id>[0-9a-f \ -] {36} | \ d+) '
2015-10-09 20:08:37 +01:00
_TESTS = [ {
' url ' : ' http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5 ' ,
2016-12-10 10:47:19 +01:00
' md5 ' : ' da6b5b3ac9fa4761a942331cef20fcb3 ' ,
2015-10-09 20:08:37 +01:00
' info_dict ' : {
' id ' : ' 28e1a57d-5b76-4399-8ab3-9097f071e6c5 ' ,
2016-12-10 10:47:19 +01:00
' ext ' : ' mp4 ' ,
2015-10-09 20:08:37 +01:00
' upload_date ' : ' 20130701 ' ,
' title ' : ' Snowden beantragt Asyl in Russland ' ,
' timestamp ' : 1372713995 ,
2017-01-08 23:19:28 +01:00
' subtitles ' : { } ,
2015-10-09 20:08:37 +01:00
}
} , {
# No Speichern (Save) button
2017-01-08 23:19:28 +01:00
' url ' : ' http://www.srf.ch/play/tv/wort-zum-sonntag/video/vorsaetzlich-gross-denken?id=a2d82e8a-1916-4c29-aade-eec0930ceeeb ' ,
' md5 ' : ' 592bef4eb1e7db6418720c8275231ce0 ' ,
' info_dict ' : {
' id ' : ' a2d82e8a-1916-4c29-aade-eec0930ceeeb ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20170107 ' ,
' title ' : ' Vorsätzlich gross denken ' ,
' description ' : ' Das Wort zum Sonntag spricht der römisch-katholische Theologe Arnold Landtwing. ' ,
' timestamp ' : 1483815898 ,
' subtitles ' : {
' deu ' : [ { ' ext ' : ' ttml ' , ' url ' : ' https://ws.srf.ch/subtitles/urn:srf:ais:video:a2d82e8a-1916-4c29-aade-eec0930ceeeb/subtitle.ttml ' } ,
{ ' ext ' : ' vtt ' , ' url ' : ' https://ws.srf.ch/subtitles/urn:srf:ais:video:a2d82e8a-1916-4c29-aade-eec0930ceeeb/subtitle.vtt ' } ]
}
}
} , {
' url ' : ' http://www.srf.ch/play/tv/rundschau/video/wundermittel-olympia-jon-pult-pistole-im-anschlag-eu-zittert-vor-le-pen?id=b664a25c-8ec1-4904-885d-dd9e140ca245 ' ,
' md5 ' : ' 734dafee62eb8c03ad7e7969799f55fc ' ,
2015-10-09 20:08:37 +01:00
' info_dict ' : {
2017-01-08 23:19:28 +01:00
' id ' : ' b664a25c-8ec1-4904-885d-dd9e140ca245 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20170104 ' ,
' title ' : ' Wundermittel Olympia, Jon Pult, Pistole im Anschlag, EU zittert vor Le Pen ' ,
' description ' : ' Wundermittel Olympia soll Winter-Tourismus retten/ Jon Pult / Pistole im Anschlag: Schweizer decken sich mit Waffen ein / Europa zittert: Frankreich steht vor radikalem Neuanfang ' ,
' timestamp ' : 1483562845 ,
' subtitles ' : {
' deu ' : [ { ' ext ' : ' ttml ' , ' url ' : ' https://ws.srf.ch/subtitles/urn:srf:ais:video:b664a25c-8ec1-4904-885d-dd9e140ca245/subtitle.ttml ' } ,
{ ' ext ' : ' vtt ' , ' url ' : ' https://ws.srf.ch/subtitles/urn:srf:ais:video:b664a25c-8ec1-4904-885d-dd9e140ca245/subtitle.vtt ' } ]
}
2015-10-09 20:08:37 +01:00
} ,
2017-01-08 23:19:28 +01:00
' params ' : {
' skip_download ' : True ,
}
} , {
' url ' : ' http://www.rsi.ch/play/tv/telegiornale/video/telegiornale?id=8500627 ' ,
' md5 ' : ' cb6c9b6bd3ce667e826ca20c0b8f0390 ' ,
' info_dict ' : {
' id ' : ' 8500627 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20170108 ' ,
' title ' : ' Telegiornale ' ,
' description ' : ' ' ,
' timestamp ' : 1483902000 ,
' subtitles ' : {
' ita ' : [ { ' ext ' : ' ttml ' , ' url ' : ' https://cdn.rsi.ch/subtitles/subt_web/rsi/production/2017/ts_20170108_i_8550811.xml ' } ]
}
} ,
' params ' : {
' skip_download ' : True ,
}
2015-12-05 15:57:10 +01:00
} , {
2015-10-14 10:40:54 +01:00
' url ' : ' http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc ' ,
' info_dict ' : {
' id ' : ' 63cb0778-27f8-49af-9284-8c7a8c6d15fc ' ,
' ext ' : ' mp3 ' ,
' upload_date ' : ' 20151013 ' ,
' title ' : ' Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem ' ,
' timestamp ' : 1444750398 ,
} ,
' params ' : {
# rtmp download
' skip_download ' : True ,
} ,
2015-12-05 15:57:10 +01:00
} , {
' url ' : ' http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260 ' ,
' md5 ' : ' 67a2a9ae4e8e62a68d0e9820cc9782df ' ,
' info_dict ' : {
' id ' : ' 6348260 ' ,
' display_id ' : ' 6348260 ' ,
2015-12-29 11:36:04 +01:00
' ext ' : ' mp4 ' ,
2015-12-05 15:57:10 +01:00
' duration ' : 1796 ,
' title ' : ' Le 19h30 ' ,
' description ' : ' ' ,
' uploader ' : ' 19h30 ' ,
' upload_date ' : ' 20141201 ' ,
' timestamp ' : 1417458600 ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .image ' ,
2015-12-05 15:57:10 +01:00
' view_count ' : int ,
} ,
2015-12-29 11:36:04 +01:00
' params ' : {
# m3u8 download
' skip_download ' : True ,
}
2015-10-09 20:08:37 +01:00
} ]
def _real_extract ( self , url ) :
bu , media_type , media_id = re . match ( self . _VALID_URL , url ) . groups ( )
2015-12-05 15:57:10 +01:00
# other info can be extracted from url + '&layout=json'
2015-10-14 10:40:54 +01:00
return self . url_result ( ' srgssr: %s : %s : %s ' % ( bu [ : 3 ] , media_type , media_id ) , ' SRGSSR ' )