[stolaf] new extractor

2016-04-02 11:43:17 +02:00 · 2016-04-02 11:43:17 +02:00 · a3158f41b1
commit a3158f41b1
parent e46295036c
2 changed files with 79 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1056,6 +1056,7 @@ from .srgssr import (
 from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .stolaf import StOlafIE
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
--- a/youtube_dl/extractor/stolaf.py
+++ b/youtube_dl/extractor/stolaf.py
@ -0,0 +1,78 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import js_to_json
 class StOlafIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?stolaf\.edu/multimedia/play/\?e=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.stolaf.edu/multimedia/play/?e=573',
        'info_dict': {
            'id': '573',
            'ext': 'mp4',
            'title': 'Senior Soloists Concert',
            'description': 'St. Olaf Orchestra & Senior Soloists',
            'thumbnail': 'http://www.stolaf.edu/multimedia/components/poster/e573',
        },
        'params': {
            'skip_download': True, # because m3u8
        },
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        title = self._og_search_property('title', webpage)
        description = self._og_search_property('description', webpage)
        javascript = self._download_webpage(
            'http://www.stolaf.edu/multimedia/components/eventlib.cfc',
            playlist_id, 'Downloading playlist #%u' % (3),
            query={
                'method': 'getPlayerPlaylist',
                'eventtype': 'e',
                'eventid': playlist_id,
                # param below selects quality of the m3u8 stream; any floating-point
                # constant is accepted, but values above 3 are clamped. only
                # 1, 2 and 3 seem to give actual streams, though.
                # XXX: request all three? or transform the URL locally?
                'html5stream': 3
            })
        thePlaylist = self._parse_json(
            self._search_regex(r'(?s)thePlaylist\s*=\s*(\[.*?\]);', javascript, 'thePlaylist'),
            playlist_id, transform_source=js_to_json)
        token = self._search_regex(r'n7kIjJed73\s*=\s*\'(.*?)\';', javascript, 'token')
        entries = []
        for (i, item) in enumerate(thePlaylist):
            video_id = '%s-%u' % (playlist_id, i)
            formats = []
            for (j, source) in enumerate(item['sources']):
                if source.get('type') == 'rtmp':
                    formats.extend(self._extract_smil_formats('//stolaf.edu' + source['file'], video_id, rtmp_securetoken=token))
                else:
                    formats.extend(self._extract_m3u8_formats(source['file'], video_id, 'mp4'))
            entries.append({
                'id': video_id,
                'title': title,
                'description': description,
                'formats': formats,
                'thumbnail': item.get('image'),
            })
        if len(entries) == 1:
            result = entries[0]
            result['id'] = playlist_id
            return result
        return {
            '_type': 'multi_video',
            'id': playlist_id,
            'title': title,
            'description': description,
            'entries': entries,
        }