2018-09-01 19:43:34 +02:00
# coding: utf-8
from __future__ import unicode_literals
2020-04-06 02:05:06 +07:00
import re
2018-09-01 19:43:34 +02:00
from . common import InfoExtractor
2020-04-06 02:05:06 +07:00
from . jwplatform import JWPlatformIE
2018-09-01 19:43:34 +02:00
from . nexx import NexxIE
2020-04-06 02:05:06 +07:00
from . . compat import (
compat_str ,
compat_urlparse ,
)
from . . utils import (
NO_DEFAULT ,
2020-04-12 13:22:20 +02:00
smuggle_url ,
2020-04-06 02:05:06 +07:00
try_get ,
2020-04-12 13:22:20 +02:00
unsmuggle_url ,
2020-04-06 02:05:06 +07:00
)
2018-09-01 19:43:34 +02:00
class Tele5IE ( InfoExtractor ) :
2019-05-13 00:11:48 +07:00
_VALID_URL = r ' https?://(?:www \ .)?tele5 \ .de/(?:[^/]+/)*(?P<id>[^/?#&]+) '
2018-09-01 19:43:34 +02:00
_TESTS = [ {
2020-04-12 11:25:12 +02:00
' url ' : ' https://www.tele5.de/filme/schlefaz-der-polyp-die-bestie-mit-den-todesarmen-ab-13042018/ ' ,
2018-09-01 19:43:34 +02:00
' info_dict ' : {
2020-04-12 11:25:12 +02:00
' id ' : ' XSWj0xbO ' ,
2018-09-01 19:43:34 +02:00
' ext ' : ' mp4 ' ,
2020-04-12 11:25:12 +02:00
# fun fact: upload_date is not visible on the web page for this video
2020-04-12 11:28:26 +02:00
' upload_date ' : ' 20200326 ' , # this is a re-upload
2020-04-12 11:25:12 +02:00
' timestamp ' : 1585190811 ,
' duration ' : 8701.0 ,
' title ' : ' SchleFaZ: Der Polyp - Die Bestie mit den Todesarmen (ab 13.04.2018) ' ,
' description ' : ' SchleFaZ: Der Polyp - Die Bestie mit den Todesarmen (ab 13.04.2018) '
2018-09-08 16:04:39 +07:00
} ,
' params ' : {
2020-04-12 11:28:26 +02:00
' skip_download ' : True ,
} ,
2018-09-08 16:04:39 +07:00
} , {
2020-04-12 11:25:12 +02:00
' url ' : ' https://www.tele5.de/filme/schlefaz-dragon-crusaders/ ' ,
' info_dict ' : {
' id ' : ' 1F8PHGxn ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20190509 ' ,
' timestamp ' : 1557441600 ,
' duration ' : 8181.0 ,
' title ' : ' SchleFaZ: Dragon Crusaders ' ,
' description ' : ' Drachenzähmen schlecht gemacht! Oliver Kalkofe und Peter Rütten knöpfen sich mit " SchleFaZ: Dragon Crusaders " eine wahrhaft verhext-verflixte Drachen-Sause vor. Statt großer Kampf, großer Krampf. Nicht nur in den Füßen, die einem bei dem müden Fantasy-Abenteuer garantiert einschlafen! '
} ,
' params ' : {
2020-04-12 11:28:26 +02:00
' skip_download ' : True ,
} ,
2018-09-01 19:43:34 +02:00
} , {
2020-04-12 13:22:20 +02:00
' url ' : ' https://www.tele5.de/timeless/ ' ,
' info_dict ' : {
' title ' : ' Timeless ' ,
} ,
' playlist_count ' : 6 ,
2020-04-12 16:14:32 +02:00
} , {
' url ' : ' https://www.tele5.de/kalkofes-welt/best-of-clips/worst-of-internet/?ve_id=dm2hJgJp ' ,
' info_dict ' : {
' id ' : ' dm2hJgJp ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Freshtorge - Sandra trifft Frau Merkel ' ,
' upload_date ' : ' 20200326 ' ,
' description ' : ' Freshtorge - Sandra trifft Frau Merkel ' ,
' timestamp ' : 1585185161 ,
' duration ' : 170.0 ,
} ,
' params ' : {
' noplaylist ' : True ,
' skip_download ' : True ,
} ,
2019-05-13 00:11:48 +07:00
} , {
' url ' : ' https://www.tele5.de/filme/making-of/avengers-endgame/ ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/ ' ,
' only_matching ' : True ,
2018-09-01 19:43:34 +02:00
} ]
def _real_extract ( self , url ) :
2020-04-12 13:22:20 +02:00
url , smuggled_data = unsmuggle_url ( url , { } )
2018-09-08 16:04:39 +07:00
qs = compat_urlparse . parse_qs ( compat_urlparse . urlparse ( url ) . query )
video_id = ( qs . get ( ' vid ' ) or qs . get ( ' ve_id ' ) or [ None ] ) [ 0 ]
2020-04-06 02:05:06 +07:00
NEXX_ID_RE = r ' \ d { 6,} '
JWPLATFORM_ID_RE = r ' [a-zA-Z0-9] {8} '
2020-04-12 11:25:12 +02:00
def nexx_url ( nexx_id ) :
return ' https://api.nexx.cloud/v3/759/videos/byid/ %s ' % nexx_id
2020-04-06 02:05:06 +07:00
def nexx_result ( nexx_id ) :
2020-04-12 11:25:12 +02:00
return self . url_result ( nexx_url ( nexx_id ) , ie = NexxIE . ie_key ( ) , video_id = nexx_id )
2020-04-06 02:05:06 +07:00
nexx_id = jwplatform_id = None
if video_id :
if re . match ( NEXX_ID_RE , video_id ) :
return nexx_result ( video_id )
elif re . match ( JWPLATFORM_ID_RE , video_id ) :
jwplatform_id = video_id
if not nexx_id :
2018-09-08 16:04:39 +07:00
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
2020-04-06 02:05:06 +07:00
2020-04-12 13:22:20 +02:00
if not smuggled_data . get ( ' force_singlevideo ' , False ) :
2020-04-12 13:25:12 +02:00
# TODO: user now has to specify --no-playlist every time (annoying and not expected)
2020-04-12 13:22:20 +02:00
if not self . _downloader . params . get ( ' noplaylist ' ) :
# TODO: use something other than a regex?
urls = re . findall ( ' href= " ([^ " ]+) " \\ s+class= " special-video__link(?: video-teaser__link)? " ' , webpage , re . MULTILINE )
entries = [ ]
for url in urls :
entries . append ( {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' Tele5 ' ,
' url ' : smuggle_url (
' https://tele5.de %s ' % url ,
{ ' force_singlevideo ' : True } ) ,
} )
2020-04-12 13:25:12 +02:00
# TODO: use something other than a regex?
2020-04-12 13:29:34 +02:00
title = re . search ( " <h1>([^<]+)</h1> " , webpage , 0 )
if title :
return self . playlist_result ( entries , playlist_title = title . group ( 1 ) )
2020-04-12 13:22:20 +02:00
2020-04-06 02:05:06 +07:00
def extract_id ( pattern , name , default = NO_DEFAULT ) :
return self . _html_search_regex (
( r ' id \ s*= \ s*[ " \' ]video-player[ " \' ][^>]+data-id \ s*= \ s*[ " \' ]( %s ) ' % pattern ,
2020-04-12 11:28:26 +02:00
r ' \ s+id \ s*= \ s*[ " \' ]player_( %s ) ' % pattern ,
r ' \ bdata-id \ s*= \ s*[ " \' ]( %s ) ' % pattern ) , webpage , name ,
2020-04-06 02:05:06 +07:00
default = default )
nexx_id = extract_id ( NEXX_ID_RE , ' nexx id ' , default = None )
if nexx_id :
return nexx_result ( nexx_id )
2020-04-12 11:33:55 +02:00
if not jwplatform_id :
jwplatform_id = extract_id ( JWPLATFORM_ID_RE , ' jwplatform id ' )
2020-04-06 02:05:06 +07:00
media = self . _download_json (
' https://cdn.jwplayer.com/v2/media/ ' + jwplatform_id ,
display_id )
nexx_id = try_get (
media , lambda x : x [ ' playlist ' ] [ 0 ] [ ' nexx_id ' ] , compat_str )
2020-04-12 11:25:12 +02:00
# TODO: nexx offers more formats, but fails (404) on some videos
2020-04-12 11:28:26 +02:00
# if nexx_id:
# return nexx_result(nexx_id)
2018-09-01 19:43:34 +02:00
return self . url_result (
2020-04-06 02:05:06 +07:00
' jwplatform: %s ' % jwplatform_id , ie = JWPlatformIE . ie_key ( ) ,
video_id = jwplatform_id )