[stolaf] new extractor

2016-04-02 11:43:17 +02:00 · 2016-04-02 11:43:17 +02:00 · a3158f41b1
commit a3158f41b1
parent e46295036c
2 changed files with 79 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1056,6 +1056,7 @@ from .srgssr import (
 from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
+from .stolaf import StOlafIE
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
--- a/youtube_dl/extractor/stolaf.py
+++ b/youtube_dl/extractor/stolaf.py
@ -0,0 +1,78 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class StOlafIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?stolaf\.edu/multimedia/play/\?e=(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.stolaf.edu/multimedia/play/?e=573',
+        'info_dict': {
+            'id': '573',
+            'ext': 'mp4',
+            'title': 'Senior Soloists Concert',
+            'description': 'St. Olaf Orchestra & Senior Soloists',
+            'thumbnail': 'http://www.stolaf.edu/multimedia/components/poster/e573',
+        },
+        'params': {
+            'skip_download': True, # because m3u8
+        },
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        title = self._og_search_property('title', webpage)
+        description = self._og_search_property('description', webpage)
+
+        javascript = self._download_webpage(
+            'http://www.stolaf.edu/multimedia/components/eventlib.cfc',
+            playlist_id, 'Downloading playlist #%u' % (3),
+            query={
+                'method': 'getPlayerPlaylist',
+                'eventtype': 'e',
+                'eventid': playlist_id,
+                # param below selects quality of the m3u8 stream; any floating-point
+                # constant is accepted, but values above 3 are clamped. only
+                # 1, 2 and 3 seem to give actual streams, though.
+                # XXX: request all three? or transform the URL locally?
+                'html5stream': 3
+            })
+        thePlaylist = self._parse_json(
+            self._search_regex(r'(?s)thePlaylist\s*=\s*(\[.*?\]);', javascript, 'thePlaylist'),
+            playlist_id, transform_source=js_to_json)
+        token = self._search_regex(r'n7kIjJed73\s*=\s*\'(.*?)\';', javascript, 'token')
+
+        entries = []
+        for (i, item) in enumerate(thePlaylist):
+            video_id = '%s-%u' % (playlist_id, i)
+            formats = []
+            for (j, source) in enumerate(item['sources']):
+                if source.get('type') == 'rtmp':
+                    formats.extend(self._extract_smil_formats('//stolaf.edu' + source['file'], video_id, rtmp_securetoken=token))
+                else:
+                    formats.extend(self._extract_m3u8_formats(source['file'], video_id, 'mp4'))
+
+            entries.append({
+                'id': video_id,
+                'title': title,
+                'description': description,
+                'formats': formats,
+                'thumbnail': item.get('image'),
+            })
+
+        if len(entries) == 1:
+            result = entries[0]
+            result['id'] = playlist_id
+            return result
+
+        return {
+            '_type': 'multi_video',
+            'id': playlist_id,
+            'title': title,
+            'description': description,
+            'entries': entries,
+        }