From 6f2c248d1642d0835e94f078538aa463ad19d69b Mon Sep 17 00:00:00 2001 From: Dennis Scheiba Date: Sun, 31 Jul 2016 03:39:48 +0200 Subject: [PATCH] Nobelprize Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nobelprize.py | 45 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 youtube_dl/extractor/nobelprize.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 53fab1a31..dd71a3a96 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -546,6 +546,7 @@ from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE +from .nobelprize import NobelprizeIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE diff --git a/youtube_dl/extractor/nobelprize.py b/youtube_dl/extractor/nobelprize.py new file mode 100644 index 000000000..393be1dc7 --- /dev/null +++ b/youtube_dl/extractor/nobelprize.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + clean_html, + get_element_by_class, +) + + +class NobelprizeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer/.+?id=(?P[0-9]{4})' + IE_DESC = 'Nobelprize' + + _TEST = { + 'url': 'https://www.nobelprize.org/mediaplayer/index.php?id=2028', + 'md5': '19bb7134879a6e8f0731235f3c076321', + 'info_dict': { + 'id': '2028', + 'ext': 'mp4', + 'title': 'Acceptance Speech by Elie Wiesel (18 minutes)' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, id) + + # we now do a regex search for a JS variable in our webpage + # which will deliver us a m3u8 file with all streams available + + m3u8_playlist = self._search_regex( + r"(http://nobelvod-vh.akamaihd.net/i/flashcontent/.+master\.m3u8)", + webpage, + 'm3u8 url', + ) + + formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4') + + return { + 'id': video_id, + 'title': clean_html(get_element_by_class('video-headline', webpage)), + 'formats': formats, + }