2014-01-17 03:09:07 +01:00
# coding: utf-8
2014-01-15 16:48:55 +05:30
from __future__ import unicode_literals
2018-02-25 09:36:42 -05:00
import re
2014-01-15 11:49:50 +05:30
from . common import InfoExtractor
2016-09-14 23:59:13 +07:00
from . . utils import month_by_name
2014-01-17 03:09:07 +01:00
2014-01-15 11:49:50 +05:30
class FranceInterIE ( InfoExtractor ) :
2016-09-02 18:31:52 +02:00
_VALID_URL = r ' https?://(?:www \ .)?franceinter \ .fr/emissions/(?P<id>[^?#]+) '
2018-02-25 09:36:42 -05:00
_TESTS = [
{
' url ' : ' https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016 ' ,
' md5 ' : ' 9e54d7bdb6fdc02a841007f8a975c094 ' ,
' info_dict ' : {
' id ' : ' affaires-sensibles/affaires-sensibles-07-septembre-2016 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Affaire Cahuzac : le contentieux du compte en Suisse ' ,
' description ' : ' md5:401969c5d318c061f86bda1fa359292b ' ,
' upload_date ' : ' 20160907 ' ,
} ,
} ,
{
' note ' : ' Audio + video (Dailymotion embed) ' ,
' url ' : ' https://www.franceinter.fr/emissions/l-instant-m/l-instant-m-13-fevrier-2018 ' ,
' info_dict ' : {
2018-10-06 12:35:35 -04:00
' id ' : ' l-instant-m/l-instant-m-13-fevrier-2018 ' ,
' title ' : ' Propagande, stéréotypes, spectaculaire : les jeux vidéo font-ils du mal à l \' Histoire ? ' ,
' description ' : ' Le youtubeur Nota Bene pour \\ \" History’ s Creed \\ \" , la nouvelle websérie d \' ARTE Creative qui explore la relation Jeux vidéo et Histoire ' ,
2018-02-25 09:36:42 -05:00
' upload_date ' : ' 20180213 ' ,
} ,
2018-10-06 12:35:35 -04:00
' playlist_count ' : 2 ,
2014-01-17 03:09:07 +01:00
} ,
2018-02-25 09:36:42 -05:00
]
2014-01-15 11:49:50 +05:30
2014-01-17 03:09:07 +01:00
def _real_extract ( self , url ) :
2015-12-22 11:30:35 +01:00
video_id = self . _match_id ( url )
2014-01-17 03:10:54 +01:00
2014-01-17 03:09:07 +01:00
webpage = self . _download_webpage ( url , video_id )
2014-09-19 20:58:50 +07:00
2018-02-25 09:36:42 -05:00
audio_url = self . _search_regex (
2016-09-14 23:59:13 +07:00
r ' (?s)<div[^>]+class=[ " \' ]page-diffusion[ " \' ][^>]*>.*?<button[^>]+data-url=([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' ,
2018-02-25 09:36:42 -05:00
webpage , ' audio url ' , group = ' url ' )
2016-09-02 18:31:52 +02:00
title = self . _og_search_title ( webpage )
description = self . _og_search_description ( webpage )
2016-09-14 23:59:13 +07:00
upload_date_str = self . _search_regex (
2018-01-21 11:50:53 +01:00
r ' class=[ " \' ] \ s*cover-emission-period \ s*[ " \' ][^>]*>[^<]+ \ s+( \ d { 1,2} \ s+[^ \ s]+ \ s+ \ d {4} )< ' ,
2016-09-14 23:59:13 +07:00
webpage , ' upload date ' , fatal = False )
if upload_date_str :
upload_date_list = upload_date_str . split ( )
upload_date_list . reverse ( )
2016-09-16 22:02:59 +07:00
upload_date_list [ 1 ] = ' %02d ' % ( month_by_name ( upload_date_list [ 1 ] , lang = ' fr ' ) or 0 )
2016-09-17 15:44:37 +07:00
upload_date_list [ 2 ] = ' %02d ' % int ( upload_date_list [ 2 ] )
2016-09-14 23:59:13 +07:00
upload_date = ' ' . join ( upload_date_list )
else :
upload_date = None
2014-09-19 20:58:50 +07:00
2018-10-06 12:35:35 -04:00
audio = {
2014-01-17 03:09:07 +01:00
' id ' : video_id ,
2014-09-19 20:58:50 +07:00
' title ' : title ,
' description ' : description ,
2016-09-14 23:59:13 +07:00
' upload_date ' : upload_date ,
2014-01-17 03:09:07 +01:00
' formats ' : [ {
2018-02-25 09:36:42 -05:00
' url ' : audio_url ,
2014-01-17 03:09:07 +01:00
' vcodec ' : ' none ' ,
} ] ,
}
2018-10-06 12:35:35 -04:00
# If there is a video, return playlist of audio + video, else just audio
maybe_video_uuid = re . search ( r ' data-video-anchor-target=[ " \' ]([^ " \' ]+) ' , webpage )
if maybe_video_uuid :
video_uuid = maybe_video_uuid . group ( 1 )
video_url = self . _search_regex (
r ' (?sx)data-uuid=[ " \' ] %s .*?<iframe[^>]*src=[ " \' ]([^ " \' ]+) ' % video_uuid ,
webpage , ' video url ' , fatal = False , group = 1 )
if video_url :
video = { ' _type ' : ' url ' , ' url ' : video_url }
return {
' _type ' : ' playlist ' ,
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' upload_date ' : upload_date ,
' entries ' : [ audio , video ]
}
else :
return audio
else :
return audio