2019-07-13 22:44:10 +02:00
# coding: utf-8
from __future__ import unicode_literals
import json
2019-07-18 11:43:41 +02:00
import re
2019-07-13 22:44:10 +02:00
from . common import InfoExtractor
2019-07-17 21:30:14 +02:00
from . . utils import urljoin
2019-07-13 22:44:10 +02:00
class TelevizeSeznamIE ( InfoExtractor ) :
2020-01-15 10:46:54 +01:00
_VALID_URL = r ' https?://(?:www \ .)?televizeseznam \ .cz/.+/(?P<display_id>.+)-(?P<id>[0-9]+) '
2019-07-13 22:44:10 +02:00
2020-01-14 19:09:23 +01:00
_GRAPHQL_URL = ' https://www.televizeseznam.cz/api/graphql '
2019-07-13 22:44:10 +02:00
_GRAPHQL_QUERY = ''' query LoadEpisode($urlName : String) { episode(urlName: $urlName) { ...VideoDetailFragmentOnEpisode } }
2020-01-14 19:08:07 +01:00
fragment VideoDetailFragmentOnEpisode on Episode {
id
spl
urlName
name
perex
}
2019-07-13 22:44:10 +02:00
'''
_TEST = {
' url ' : ' https://www.televizeseznam.cz/video/lajna/buh-57953890 ' ,
' md5 ' : ' 40c41ade1464a390a0b447e333df4239 ' ,
' info_dict ' : {
2019-07-18 11:43:41 +02:00
' id ' : ' 57953890 ' ,
' display_id ' : ' buh ' ,
2019-07-13 22:44:10 +02:00
' title ' : ' Bůh ' ,
2019-07-17 21:26:49 +02:00
' description ' : ' Trenér Hrouzek je plný rozporů. Na pomoc si povolá i toho nejvyššího. Kdo to ale je? Pomůže mu vyřešit několik dilemat, která se mu v poslední době v životě nahromadila? ' ,
2019-07-13 22:44:10 +02:00
' ext ' : ' mp4 ' ,
}
}
def extract_subtitles ( self , spl_url , play_list ) :
if not play_list :
return None
subtitles = { }
for k , v in play_list . items ( ) :
2020-01-15 11:35:59 +01:00
if v . get ( ' language ' ) :
for ext in v . get ( ' urls ' ) :
subtitles . setdefault ( v [ ' language ' ] , [ ] ) . append ( {
2020-01-18 22:27:22 +01:00
' ext ' : ext ,
' url ' : urljoin ( spl_url , v [ ' urls ' ] . get ( ext ) )
2020-01-15 11:35:59 +01:00
} )
2019-07-13 22:44:10 +02:00
return subtitles
2020-01-15 16:43:35 +01:00
def _extract ( self , ext , spl_url , play_list ) :
2019-07-13 22:44:10 +02:00
formats = [ ]
for r , v in play_list . items ( ) :
2020-01-15 10:43:10 +01:00
format = {
2020-01-14 18:43:35 +01:00
' format_id ' : r ,
2020-01-15 10:43:10 +01:00
' url ' : urljoin ( spl_url , v . get ( ' url ' ) ) ,
2019-07-13 22:44:10 +02:00
' protocol ' : ' https ' ,
2020-01-15 16:43:35 +01:00
' ext ' : ext
2020-01-15 10:43:10 +01:00
}
if v . get ( ' resolution ' ) :
2020-01-18 22:27:22 +01:00
format . update ( { ' width ' : v [ ' resolution ' ] [ 0 ] , ' height ' : v [ ' resolution ' ] [ 1 ] } )
2020-01-15 10:43:10 +01:00
formats . append ( format )
2020-01-15 16:43:35 +01:00
return formats
def extract_formats ( self , spl_url , play_list ) :
formats = [ ]
if play_list . get ( ' http_stream ' ) and play_list [ ' http_stream ' ] . get ( ' qualities ' ) :
formats . extend ( self . _extract ( None , spl_url , play_list [ ' http_stream ' ] [ ' qualities ' ] ) )
if play_list . get ( ' mp4 ' ) :
formats . extend ( self . _extract ( ' mp4 ' , spl_url , play_list [ ' mp4 ' ] ) )
2020-01-15 10:43:10 +01:00
2020-01-18 23:35:50 +01:00
self . _sort_formats ( formats )
2019-07-13 22:44:10 +02:00
return formats
def _real_extract ( self , url ) :
2019-07-18 11:43:41 +02:00
display_id , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
2019-07-13 22:44:10 +02:00
data = self . _download_json (
self . _GRAPHQL_URL , video_id , ' Downloading GraphQL result ' ,
data = json . dumps ( {
' query ' : self . _GRAPHQL_QUERY ,
' variables ' : { ' urlName ' : video_id }
} ) . encode ( ' utf-8 ' ) ,
headers = { ' Content-Type ' : ' application/json;charset=UTF-8 ' }
) [ ' data ' ]
2020-01-14 18:38:11 +01:00
spl_url = data [ ' episode ' ] [ ' spl ' ] + ' spl2,3 '
metadata = self . _download_json ( spl_url , video_id , ' Downloading playlist ' )
if ' Location ' in metadata and ' data ' not in metadata :
# they sometimes wants to redirect
spl_url = metadata [ ' Location ' ]
metadata = self . _download_json ( spl_url , video_id , ' Redirected -> Downloading playlist ' )
play_list = metadata [ ' data ' ]
2019-07-13 22:44:10 +02:00
return {
' id ' : video_id ,
2019-07-18 11:43:41 +02:00
' display_id ' : display_id ,
2019-07-13 22:44:10 +02:00
' title ' : data [ ' episode ' ] . get ( ' name ' ) ,
2019-07-17 21:26:49 +02:00
' description ' : data [ ' episode ' ] . get ( ' perex ' ) ,
2020-01-15 11:35:59 +01:00
' subtitles ' : self . extract_subtitles ( spl_url , play_list . get ( ' subtitles ' ) ) ,
2020-01-15 16:43:35 +01:00
' formats ' : self . extract_formats ( spl_url , play_list )
2019-07-13 22:44:10 +02:00
}