2014-01-28 21:57:38 +01:00
from __future__ import unicode_literals
2014-01-30 04:52:50 +01:00
import json
2013-06-23 22:23:19 +02:00
import re
from . common import InfoExtractor
2014-02-19 01:27:12 +01:00
from . . utils import int_or_none
2013-06-23 22:23:19 +02:00
class LiveLeakIE ( InfoExtractor ) :
2015-01-23 17:09:26 +01:00
_VALID_URL = r ' https?://(?: \ w+ \ .)?liveleak \ .com/view \ ?(?:.*?)i=(?P<id>[ \ w_]+)(?:.*) '
2014-01-30 06:09:06 +01:00
_TESTS = [ {
2014-01-28 21:57:38 +01:00
' url ' : ' http://www.liveleak.com/view?i=757_1364311680 ' ,
2015-01-23 17:22:14 +01:00
' md5 ' : ' 50f79e05ba149149c1b4ea961223d5b3 ' ,
2014-01-28 21:57:38 +01:00
' info_dict ' : {
2014-02-19 01:27:12 +01:00
' id ' : ' 757_1364311680 ' ,
2015-01-23 17:22:14 +01:00
' ext ' : ' flv ' ,
2014-01-28 21:57:38 +01:00
' description ' : ' extremely bad day for this guy..! ' ,
' uploader ' : ' ljfriel2 ' ,
' title ' : ' Most unlucky car accident '
2013-06-27 20:46:46 +02:00
}
2014-11-23 21:20:46 +01:00
} , {
2014-01-30 06:09:06 +01:00
' url ' : ' http://www.liveleak.com/view?i=f93_1390833151 ' ,
2015-01-23 17:22:14 +01:00
' md5 ' : ' b13a29626183c9d33944e6a04f41aafc ' ,
2014-01-30 06:09:06 +01:00
' info_dict ' : {
2014-02-19 01:27:12 +01:00
' id ' : ' f93_1390833151 ' ,
' ext ' : ' mp4 ' ,
2014-01-30 06:09:06 +01:00
' description ' : ' German Television Channel NDR does an exclusive interview with Edward Snowden. \r \n Uploaded on LiveLeak cause German Television thinks the rest of the world isn \' t intereseted in Edward Snowden. ' ,
' uploader ' : ' ARD_Stinkt ' ,
' title ' : ' German Television does first Edward Snowden Interview (ENGLISH) ' ,
}
2014-11-23 21:20:46 +01:00
} , {
2014-02-19 01:27:12 +01:00
' url ' : ' http://www.liveleak.com/view?i=4f7_1392687779 ' ,
' md5 ' : ' 42c6d97d54f1db107958760788c5f48f ' ,
' info_dict ' : {
' id ' : ' 4f7_1392687779 ' ,
' ext ' : ' mp4 ' ,
' description ' : " The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends ' reactions would be a bit stronger. \r \n \r \n Action-go to 0:55. " ,
' uploader ' : ' CapObveus ' ,
' title ' : ' Man is Fatally Struck by Reckless Car While Packing up a Moving Truck ' ,
' age_limit ' : 18 ,
}
2015-06-14 22:41:44 +02:00
} , {
2015-06-15 02:54:49 +06:00
# Covers https://github.com/rg3/youtube-dl/pull/5983
2015-06-14 22:41:44 +02:00
' url ' : ' http://www.liveleak.com/view?i=801_1409392012 ' ,
' md5 ' : ' 0b3bec2d888c20728ca2ad3642f0ef15 ' ,
' info_dict ' : {
' id ' : ' 801_1409392012 ' ,
' ext ' : ' mp4 ' ,
' description ' : " Happened on 27.7.2014. \r \n At 0:53 you can see people still swimming at near beach. " ,
' uploader ' : ' bony333 ' ,
' title ' : ' Crazy Hungarian tourist films close call waterspout in Croatia '
}
2014-01-30 06:09:06 +01:00
} ]
2013-06-23 22:23:19 +02:00
2015-09-15 14:28:03 +02:00
video_count = 0
def _video_count ( self ) :
self . video_count + = 1
if self . video_count == 1 :
return ' '
else :
return ' - ' + str ( self . video_count - 1 )
# Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/rg3/youtube-dl/pull/4768)
def _get_orig_video_url ( self , url ) :
return re . sub ( r ' \ .h264_.+? \ .mp4 ' , ' ' , url )
2013-06-23 22:23:19 +02:00
def _real_extract ( self , url ) :
2015-09-15 14:28:03 +02:00
entries = list ( ) # collect all found videos
page_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , page_id )
2014-02-19 01:27:12 +01:00
video_title = self . _og_search_title ( webpage ) . replace ( ' LiveLeak.com - ' , ' ' ) . strip ( )
video_description = self . _og_search_description ( webpage )
video_uploader = self . _html_search_regex (
r ' By:.*?( \ w+)</a> ' , webpage , ' uploader ' , fatal = False )
age_limit = int_or_none ( self . _search_regex (
r ' you confirm that you are ([0-9]+) years and over. ' ,
webpage , ' age limit ' , default = None ) )
2015-09-15 14:28:03 +02:00
# extracts native video #1 (single video, maybe multiple formats)
2014-01-30 04:52:50 +01:00
sources_raw = self . _search_regex (
2014-01-30 06:09:06 +01:00
r ' (?s)sources: \ s*( \ [.*? \ ]), ' , webpage , ' video URLs ' , default = None )
2015-09-15 14:28:03 +02:00
if sources_raw :
sources_json = re . sub ( r ' \ s([a-z]+): \ s ' , r ' " \ 1 " : ' , sources_raw )
sources = json . loads ( sources_json )
formats = [ {
' format_id ' : ' %s ' % i ,
' format_note ' : s . get ( ' label ' ) ,
' url ' : s [ ' file ' ] ,
} for i , s in enumerate ( sources ) ]
for i , s in enumerate ( sources ) :
orig_url = self . _get_orig_video_url ( s [ ' file ' ] )
if s [ ' file ' ] != orig_url :
formats . append ( {
' format_id ' : ' original- %s ' % i ,
' format_note ' : s . get ( ' label ' ) ,
' url ' : orig_url ,
' preference ' : 1 ,
} )
self . _sort_formats ( formats )
entries . append ( {
' id ' : page_id ,
' title ' : video_title ,
' description ' : video_description ,
' uploader ' : video_uploader ,
' formats ' : formats ,
' age_limit ' : age_limit ,
} )
# extracts native videos #2 (maybe multiple videos, single format)
sources = re . findall ( r ' (?s)jwplayer \ ( " file_[0-9a-f]+ " \ ).+?file: " (.*?) " ' , webpage )
for url in sources :
formats = [ {
' format_id ' : ' 0 ' ,
' format_note ' : ' standard quality (with logo) ' ,
' url ' : url ,
} ]
orig_url = self . _get_orig_video_url ( url )
if orig_url != url :
2015-01-23 17:22:14 +01:00
formats . append ( {
2015-09-15 14:28:03 +02:00
' format_id ' : ' 1 ' ,
' format_note ' : ' high quality (no logo) ' ,
2015-01-23 17:22:14 +01:00
' url ' : orig_url ,
' preference ' : 1 ,
} )
2015-09-15 14:28:03 +02:00
entries . append ( {
' id ' : page_id + self . _video_count ( ) ,
' title ' : video_title ,
' description ' : video_description ,
' uploader ' : video_uploader ,
' formats ' : formats ,
' age_limit ' : age_limit ,
} )
# collect embedded videos:
embed_urls = list ( )
# prochan.com:
embed_prochan = ( re . findall (
r ' <iframe[^>]+src= " (http://www.prochan.com/embed \ ?[^ " ]+) " ' ,
webpage ) )
if len ( embed_prochan ) > 0 :
for embed in embed_prochan :
embed_urls . append ( embed )
# add all collected embed urls
for embed_url in embed_urls :
entries . append ( {
' _type ' : ' url_transparent ' ,
' id ' : page_id + self . _video_count ( ) ,
' url ' : embed_url ,
' title ' : video_title ,
' description ' : video_description ,
' uploader ' : video_uploader ,
' age_limit ' : age_limit ,
} )
if len ( entries ) == 0 :
raise ExtractorError ( ' No videos found ' )
if len ( entries ) == 1 :
return entries [ 0 ]
else :
return {
' _type ' : ' multi_video ' ,
' id ' : page_id ,
' entries ' : entries ,
}