2017-06-29 13:10:45 -06:00
# coding: utf-8
from __future__ import unicode_literals
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
)
class CJSWIE ( InfoExtractor ) :
2017-07-04 15:36:48 -06:00
_VALID_URL = r ' https?://(?:www \ .)?cjsw \ .com/program/ \ S+/(?P<id>[0-9]+) '
2017-06-29 13:10:45 -06:00
IE_NAME = ' cjsw '
_TEST = {
' url ' : ' http://cjsw.com/program/freshly-squeezed/episode/20170620 ' ,
' md5 ' : ' cee14d40f1e9433632c56e3d14977120 ' ,
' info_dict ' : {
' id ' : ' 20170620 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Freshly Squeezed ' ,
' description ' : ' Sled Island artists featured // Live session with Phi Pho, followed by a live session with Sinzere & The Late Nights! // Stay Fresh Y \' all!! ' ,
}
}
def _real_extract ( self , url ) :
episode_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , episode_id )
2017-07-04 15:36:48 -06:00
episode_controls = self . _search_regex ( r ' <div[^>]+class=([ " \' ])episode-controls \ 1[^>]*> ' , webpage , ' episode_controls ' , fatal = False )
2017-06-29 13:10:45 -06:00
if not episode_controls :
raise ExtractorError ( ' No streamable podcast ' , video_id = episode_id , expected = True )
2017-07-04 15:36:48 -06:00
title = self . _html_search_regex (
r ' <button[^>]+data-showname=([ " \' ])(?P<title>.+?) \ 1[^>]*> ' , webpage , ' title ' , group = ' title ' )
description = self . _html_search_regex (
r ' <p>(?P<description>.+?)</p> ' , webpage , ' description ' , fatal = False )
2017-06-29 13:10:45 -06:00
formats = [ {
' url ' : self . _html_search_regex (
2017-07-04 15:36:48 -06:00
r ' <button[^>]+data-audio-src=([ " \' ])(?P<audio_url>.+?) \ 1[^>]*> ' , webpage , ' audio_url ' , group = ' audio_url ' ) ,
2017-06-29 13:10:45 -06:00
' ext ' : ' mp3 ' ,
2017-07-04 15:36:48 -06:00
' vcodec ' : ' none ' ,
2017-06-29 13:10:45 -06:00
} ]
return {
' id ' : episode_id ,
' title ' : title ,
' description ' : description ,
' formats ' : formats ,
}