From a3158f41b12cdfc39c9fafe94a190a0740285193 Mon Sep 17 00:00:00 2001 From: felix Date: Sat, 2 Apr 2016 11:43:17 +0200 Subject: [PATCH] [stolaf] new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/stolaf.py | 78 ++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/stolaf.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 465d9d364..8c77cd373 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1056,6 +1056,7 @@ from .srgssr import ( from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE +from .stolaf import StOlafIE from .streamable import StreamableIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE diff --git a/youtube_dl/extractor/stolaf.py b/youtube_dl/extractor/stolaf.py new file mode 100644 index 000000000..321c58438 --- /dev/null +++ b/youtube_dl/extractor/stolaf.py @@ -0,0 +1,78 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class StOlafIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?stolaf\.edu/multimedia/play/\?e=(?P\d+)' + _TESTS = [{ + 'url': 'https://www.stolaf.edu/multimedia/play/?e=573', + 'info_dict': { + 'id': '573', + 'ext': 'mp4', + 'title': 'Senior Soloists Concert', + 'description': 'St. Olaf Orchestra & Senior Soloists', + 'thumbnail': 'http://www.stolaf.edu/multimedia/components/poster/e573', + }, + 'params': { + 'skip_download': True, # because m3u8 + }, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._og_search_property('title', webpage) + description = self._og_search_property('description', webpage) + + javascript = self._download_webpage( + 'http://www.stolaf.edu/multimedia/components/eventlib.cfc', + playlist_id, 'Downloading playlist #%u' % (3), + query={ + 'method': 'getPlayerPlaylist', + 'eventtype': 'e', + 'eventid': playlist_id, + # param below selects quality of the m3u8 stream; any floating-point + # constant is accepted, but values above 3 are clamped. only + # 1, 2 and 3 seem to give actual streams, though. + # XXX: request all three? or transform the URL locally? + 'html5stream': 3 + }) + thePlaylist = self._parse_json( + self._search_regex(r'(?s)thePlaylist\s*=\s*(\[.*?\]);', javascript, 'thePlaylist'), + playlist_id, transform_source=js_to_json) + token = self._search_regex(r'n7kIjJed73\s*=\s*\'(.*?)\';', javascript, 'token') + + entries = [] + for (i, item) in enumerate(thePlaylist): + video_id = '%s-%u' % (playlist_id, i) + formats = [] + for (j, source) in enumerate(item['sources']): + if source.get('type') == 'rtmp': + formats.extend(self._extract_smil_formats('//stolaf.edu' + source['file'], video_id, rtmp_securetoken=token)) + else: + formats.extend(self._extract_m3u8_formats(source['file'], video_id, 'mp4')) + + entries.append({ + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'thumbnail': item.get('image'), + }) + + if len(entries) == 1: + result = entries[0] + result['id'] = playlist_id + return result + + return { + '_type': 'multi_video', + 'id': playlist_id, + 'title': title, + 'description': description, + 'entries': entries, + }