[nieuwsblad] Add new extractor (Closes #7662)
Fix regex used to extract kaltura url fields (id, wid and uiconf_id) Update unit test with new kaltura file format: Changed from mp4 to mov
This commit is contained in:
parent
9ce9d546d9
commit
9492d4aeeb
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NieuwsbladIE(InfoExtractor):
|
class NieuwsbladIE(InfoExtractor):
|
||||||
|
""" Extractor for www.nieuwsblad.be """
|
||||||
_VALID_URL = r'https?://(?:www\.)?nieuwsblad\.be/.+?/dmf([0-9]+?)_(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?nieuwsblad\.be/.+?/dmf([0-9]+?)_(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Source: VMMA
|
# Source: VMMA
|
||||||
@ -37,10 +38,10 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
# Source: Mediahuis (using kaltura)
|
# Source: Mediahuis (using kaltura)
|
||||||
{
|
{
|
||||||
'url': 'http://www.nieuwsblad.be/cnt/dmf20151225_02037264',
|
'url': 'http://www.nieuwsblad.be/cnt/dmf20151225_02037264',
|
||||||
'md5': 'a9580438899f6355550fe1d44d4cddb9',
|
'md5': 'd4decdc7f105c26767b928c54c7d5184',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_z4jndqki',
|
'id': '1_z4jndqki',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'autobrand Peer',
|
'title': 'autobrand Peer',
|
||||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||||
'timestamp': int,
|
'timestamp': int,
|
||||||
@ -51,6 +52,7 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
""" Extract the video info from the given 'nieuwsblad' URL """
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -79,12 +81,13 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_kaltura(self, url, web_page):
|
def _extract_kaltura(self, url, web_page):
|
||||||
kaltura_id = self._search_regex(r'\'entry_id\': \'(.+?)\'', web_page, 'kaltura_id')
|
""" Delegate the video extraction to 'Kaltura' extractor """
|
||||||
kaltura_wid = self._search_regex(r'\'wid\': \'(.+?)\'', web_page, 'kaltura_wid')
|
kaltura_id = self._search_regex(r'entry_id\s*:\s*\"(.+?)\"', web_page, 'kaltura_id')
|
||||||
kaltura_uiconf_id = self._search_regex(r'\'uiconf_id\': \'(.+?)\'', web_page, 'kaltura_uiconf_id')
|
kaltura_wid = self._search_regex(r'wid\s*\:\s*\"(.+?)\"', web_page, 'kaltura_wid')
|
||||||
|
kaltura_uiconf_id = self._search_regex(r'uiconf_id\s*:\s*\"(.+?)\"', web_page, 'kaltura_uiconf_id')
|
||||||
kaltura_url = (
|
kaltura_url = (
|
||||||
'https://cdnapisec.kaltura.com/index.php/kwidget/wid/%s/uiconf_id/%s/entry_id/%s' %
|
'https://cdnapisec.kaltura.com/index.php/kwidget/wid/{0}/uiconf_id/{1}/entry_id/{2}'
|
||||||
(kaltura_wid, kaltura_uiconf_id, kaltura_id)
|
.format(kaltura_wid, kaltura_uiconf_id, kaltura_id)
|
||||||
)
|
)
|
||||||
url_with_source = smuggle_url(kaltura_url, {'source_url': url})
|
url_with_source = smuggle_url(kaltura_url, {'source_url': url})
|
||||||
return self.url_result(url_with_source, 'Kaltura')
|
return self.url_result(url_with_source, 'Kaltura')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user