[kaltura][azmedien-live] Add support for live streams

2017-02-20 18:40:12 +01:00 · 2017-02-20 18:40:12 +01:00 · 5324aece98
commit 5324aece98
parent 4d345bf17b
3 changed files with 128 additions and 47 deletions
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@ -7,13 +7,21 @@ from .common import InfoExtractor
 from .kaltura import KalturaIE
 from ..utils import (
    get_element_by_class,
    base_url,
    ExtractorError,
    get_element_by_id,
    NO_DEFAULT,
    strip_or_none,
    urljoin,
 )
 class AZMedienBaseIE(InfoExtractor):
    def _extract_partner_id(self, video_id, webpage, default=NO_DEFAULT):
        return self._search_regex(
            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
            webpage, 'kaltura partner id', default=default)
    def _kaltura_video(self, partner_id, entry_id):
        return self.url_result(
            'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
@ -73,12 +81,8 @@ class AZMedienIE(AZMedienBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-
+        partner_id = self._extract_partner_id(video_id, webpage)
        partner_id = self._search_regex(
            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
            webpage, 'kaltura partner id')
        entry_id = self._html_search_regex(
            r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
            % re.escape(video_id), webpage, 'kaltura entry id', group='id')
@ -211,3 +215,48 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE):
        title = self._og_search_title(webpage, fatal=False)
        description = self._og_search_description(webpage)
        return self.playlist_result(entries, playlist_id, title, description)
 class AZMedienLiveIE(AZMedienBaseIE):
    IE_DESC = 'AZ Medien Live TV'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?
                        (?P<id>
                            (?:
                                telezueri\.ch|
                                telebaern\.tv|
                                telem1\.ch
                            )/
                            live
                        )
                    '''
    _TEST = {
        'url': 'http://www.telezueri.ch/live',
        'only_matching': True,
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        partner_id = self._extract_partner_id(video_id, webpage)
        script_urls = [urljoin(
            base_url(url), m.group('url')) for m in re.finditer(
                r'<script[^>]+type=["\']text/javascript["\'][^>]+src=["\'](?P<url>.*/[0-9a-f]+\.js)["\']',
                webpage)]
        for url in script_urls:
            js = self._download_webpage(url, video_id, note='Downloading javascript file %s' % url)
            entry_id = self._search_regex(
                r'[^/]{2}\s*kalturaLiveVideo\(\s*["\'](.+?)["\'].+\)',
                js,
                'partner id',
                default=None,
                fatal=False)
            if entry_id:
                break
        else:
            raise ExtractorError('Cannot extract Kaltura partner id for live broadcast.')
        return self._kaltura_video(partner_id, entry_id)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -82,6 +82,7 @@ from .awaan import (
 )
 from .azmedien import (
    AZMedienIE,
    AZMedienLiveIE,
    AZMedienPlaylistIE,
    AZMedienShowPlaylistIE,
 )
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@ -102,6 +102,23 @@ class KalturaIE(InfoExtractor):
        {
            'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
            'only_matching': True,
        },
        {
            # Kaltura live stream
            'url': 'kaltura:1719221:1_hoislpiz',
            'info_dict': {
                'id': '1_hoislpiz',
                'ext': 'm3u8',
                'title': 'TeleZüri LIVE Stream',
                'upload_date': '20150624',
                'uploader_id': 'webit',
                'thumbnail': 're:^https?://.*/thumbnail/.*',
                'timestamp': 1435129674,
                'is_live': True,
            },
            'params': {
                'skip_download': True,
            }
        }
    ]
@ -263,11 +280,24 @@ class KalturaIE(InfoExtractor):
                unsigned_url += '?referrer=%s' % referrer
            return unsigned_url
        formats = []
        is_live = False
        if info.get('objectType') == 'KalturaLiveStreamEntry':
            is_live = True
            for f in info.get('liveStreamConfigurations'):
                if f.get('protocol') == 'hds':
                    formats.extend(self._extract_f4m_formats(f.get('url'), entry_id))
                elif f.get('protocol') == 'hls' or f.get('protocol') == 'applehttp':
                    formats.extend(self._extract_m3u8_formats(f.get('url'), entry_id))
                elif f.get('protocol') == 'sl':
                    formats.extend(self._extract_ism_formats(f.get('url'), entry_id))
                elif f.get('protocol') == 'mpegdash':
                    formats.extend(self._extract_mpd_formats(f.get('url'), entry_id))
        else:
            data_url = info['dataUrl']
            if '/flvclipper/' in data_url:
                data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
        formats = []
            for f in flavor_assets:
                # Continue if asset is not ready
                if f.get('status') != 2:
@ -334,4 +364,5 @@ class KalturaIE(InfoExtractor):
            'timestamp': info.get('createdAt'),
            'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
            'view_count': info.get('plays'),
            'is_live': is_live,
        }