Nobelprize Add new extractor

This commit is contained in:
Dennis Scheiba 2016-07-31 03:39:48 +02:00
parent cf03e34ad3
commit 6f2c248d16
2 changed files with 46 additions and 0 deletions

View File

@ -546,6 +546,7 @@ from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .ninenow import NineNowIE from .ninenow import NineNowIE
from .nintendo import NintendoIE from .nintendo import NintendoIE
from .nobelprize import NobelprizeIE
from .noco import NocoIE from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE

View File

@ -0,0 +1,45 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
)
class NobelprizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer/.+?id=(?P<id>[0-9]{4})'
IE_DESC = 'Nobelprize'
_TEST = {
'url': 'https://www.nobelprize.org/mediaplayer/index.php?id=2028',
'md5': '19bb7134879a6e8f0731235f3c076321',
'info_dict': {
'id': '2028',
'ext': 'mp4',
'title': 'Acceptance Speech by Elie Wiesel (18 minutes)'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, id)
# we now do a regex search for a JS variable in our webpage
# which will deliver us a m3u8 file with all streams available
m3u8_playlist = self._search_regex(
r"(http://nobelvod-vh.akamaihd.net/i/flashcontent/.+master\.m3u8)",
webpage,
'm3u8 url',
)
formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4')
return {
'id': video_id,
'title': clean_html(get_element_by_class('video-headline', webpage)),
'formats': formats,
}