[nieuwsblad] Add new extractor
This commit is contained in:
parent
16a09aefe3
commit
9ce9d546d9
90
youtube_dl/extractor/nieuwsblad.py
Normal file
90
youtube_dl/extractor/nieuwsblad.py
Normal file
@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url
|
||||
)
|
||||
|
||||
|
||||
class NieuwsbladIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nieuwsblad\.be/.+?/dmf([0-9]+?)_(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
# Source: VMMA
|
||||
{
|
||||
'url': 'http://www.nieuwsblad.be/cnt/dmf20151224_02036890',
|
||||
'md5': '3dcf2c3a140d8e54dd8376d4c4a609f4',
|
||||
'info_dict': {
|
||||
'id': '02036890',
|
||||
'ext': 'mp4',
|
||||
'title': 'Krijgt zieke Pauline (3) het mooiste kerstcadeau?',
|
||||
'thumbnail': 're:http.*jpg$',
|
||||
}
|
||||
},
|
||||
# Source: VRT
|
||||
{
|
||||
'url': 'http://www.nieuwsblad.be/cnt/dmf20151124_01986463',
|
||||
'md5': '8e46cb7ddfddeb64735fa39f105002c2',
|
||||
'info_dict': {
|
||||
'id': '01986463',
|
||||
'ext': 'mp4',
|
||||
'title': 'Angst voor terreur: fotograaf toont hoe hij de werkelijkheid kan manipuleren',
|
||||
'thumbnail': 're:http.*jpg$',
|
||||
}
|
||||
},
|
||||
# Source: Mediahuis (using kaltura)
|
||||
{
|
||||
'url': 'http://www.nieuwsblad.be/cnt/dmf20151225_02037264',
|
||||
'md5': 'a9580438899f6355550fe1d44d4cddb9',
|
||||
'info_dict': {
|
||||
'id': '1_z4jndqki',
|
||||
'ext': 'mp4',
|
||||
'title': 'autobrand Peer',
|
||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||
'timestamp': int,
|
||||
'upload_date': '20151225',
|
||||
'uploader_id': 'dcc-video-manager-hbvl@mediahuis.be'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
iframe_m = re.search(r'<script[^>]+src="(.+?kaltura.com.*?)"', webpage)
|
||||
if iframe_m:
|
||||
return self._extract_kaltura(url, webpage)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
iframe_m = re.search(r'<iframe[^>]+src="(.+?vrt.be.*?)"', webpage)
|
||||
if iframe_m:
|
||||
webpage = self._download_webpage(iframe_m.group(1), "vrt-iframe")
|
||||
video_url = self._search_regex(r'sources.pdl = "(.*?)";', webpage, 'vrt-video')
|
||||
|
||||
iframe_m = re.search(r'<iframe[^>]+src="(.+?vmma.be.*?)"', webpage)
|
||||
if iframe_m:
|
||||
webpage = self._download_webpage(iframe_m.group(1), "vmma-iframe")
|
||||
video_url = self._search_regex(r'<source src="(.*?)"', webpage, 'vmma-video')
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail
|
||||
}
|
||||
|
||||
def _extract_kaltura(self, url, web_page):
|
||||
kaltura_id = self._search_regex(r'\'entry_id\': \'(.+?)\'', web_page, 'kaltura_id')
|
||||
kaltura_wid = self._search_regex(r'\'wid\': \'(.+?)\'', web_page, 'kaltura_wid')
|
||||
kaltura_uiconf_id = self._search_regex(r'\'uiconf_id\': \'(.+?)\'', web_page, 'kaltura_uiconf_id')
|
||||
kaltura_url = (
|
||||
'https://cdnapisec.kaltura.com/index.php/kwidget/wid/%s/uiconf_id/%s/entry_id/%s' %
|
||||
(kaltura_wid, kaltura_uiconf_id, kaltura_id)
|
||||
)
|
||||
url_with_source = smuggle_url(kaltura_url, {'source_url': url})
|
||||
return self.url_result(url_with_source, 'Kaltura')
|
Loading…
x
Reference in New Issue
Block a user