[stolaf] new extractor

This commit is contained in:
felix 2016-04-02 11:43:17 +02:00
parent e46295036c
commit a3158f41b1
2 changed files with 79 additions and 0 deletions

View File

@ -1056,6 +1056,7 @@ from .srgssr import (
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE from .steam import SteamIE
from .stolaf import StOlafIE
from .streamable import StreamableIE from .streamable import StreamableIE
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE

View File

@ -0,0 +1,78 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import js_to_json
class StOlafIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?stolaf\.edu/multimedia/play/\?e=(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.stolaf.edu/multimedia/play/?e=573',
'info_dict': {
'id': '573',
'ext': 'mp4',
'title': 'Senior Soloists Concert',
'description': 'St. Olaf Orchestra & Senior Soloists',
'thumbnail': 'http://www.stolaf.edu/multimedia/components/poster/e573',
},
'params': {
'skip_download': True, # because m3u8
},
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = self._og_search_property('title', webpage)
description = self._og_search_property('description', webpage)
javascript = self._download_webpage(
'http://www.stolaf.edu/multimedia/components/eventlib.cfc',
playlist_id, 'Downloading playlist #%u' % (3),
query={
'method': 'getPlayerPlaylist',
'eventtype': 'e',
'eventid': playlist_id,
# param below selects quality of the m3u8 stream; any floating-point
# constant is accepted, but values above 3 are clamped. only
# 1, 2 and 3 seem to give actual streams, though.
# XXX: request all three? or transform the URL locally?
'html5stream': 3
})
thePlaylist = self._parse_json(
self._search_regex(r'(?s)thePlaylist\s*=\s*(\[.*?\]);', javascript, 'thePlaylist'),
playlist_id, transform_source=js_to_json)
token = self._search_regex(r'n7kIjJed73\s*=\s*\'(.*?)\';', javascript, 'token')
entries = []
for (i, item) in enumerate(thePlaylist):
video_id = '%s-%u' % (playlist_id, i)
formats = []
for (j, source) in enumerate(item['sources']):
if source.get('type') == 'rtmp':
formats.extend(self._extract_smil_formats('//stolaf.edu' + source['file'], video_id, rtmp_securetoken=token))
else:
formats.extend(self._extract_m3u8_formats(source['file'], video_id, 'mp4'))
entries.append({
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'thumbnail': item.get('image'),
})
if len(entries) == 1:
result = entries[0]
result['id'] = playlist_id
return result
return {
'_type': 'multi_video',
'id': playlist_id,
'title': title,
'description': description,
'entries': entries,
}