Fix remarks on review of #8008
Use the generic extraction method KalturaIE._extract_url() Add support for embedded vier.be partner videos
This commit is contained in:
parent
9492d4aeeb
commit
f880f2847b
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url
|
smuggle_url
|
||||||
)
|
)
|
||||||
@ -21,6 +22,8 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
'id': '02036890',
|
'id': '02036890',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Krijgt zieke Pauline (3) het mooiste kerstcadeau?',
|
'title': 'Krijgt zieke Pauline (3) het mooiste kerstcadeau?',
|
||||||
|
'description': 'Er is misschien toch goed nieuws voor de zieke Pauline (3). Het Riziv buigt zich'
|
||||||
|
' namelijk over de vraag om de peperdure behandeling van 15.000 euro terug t...',
|
||||||
'thumbnail': 're:http.*jpg$',
|
'thumbnail': 're:http.*jpg$',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -32,6 +35,8 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
'id': '01986463',
|
'id': '01986463',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Angst voor terreur: fotograaf toont hoe hij de werkelijkheid kan manipuleren',
|
'title': 'Angst voor terreur: fotograaf toont hoe hij de werkelijkheid kan manipuleren',
|
||||||
|
'description': 'De metro rijdt niet, de scholen en crèches zijn dicht, vele winkels zijn gesloten. '
|
||||||
|
'Fotograaf Jimmy Kets brengt Brussel vandaag in beeld. Maar hij toont ook...',
|
||||||
'thumbnail': 're:http.*jpg$',
|
'thumbnail': 're:http.*jpg$',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -49,6 +54,19 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
'uploader_id': 'dcc-video-manager-hbvl@mediahuis.be'
|
'uploader_id': 'dcc-video-manager-hbvl@mediahuis.be'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Source: Vier.be
|
||||||
|
{
|
||||||
|
'url': 'http://www.nieuwsblad.be/cnt/dmf20170411_02829396',
|
||||||
|
'md5': '35cb487bfd8c61fe38c9838420fd0de6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '02829396',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dit is het nieuwste speeltje van Michel Van den Brande',
|
||||||
|
'description': 'In de jongste aflevering van \'The Sky is the Limit\' pronkt Michel Van den Brande'
|
||||||
|
' met zijn nieuwste speeltje: een glanzende BMW. Een van zijn medewerkers ma...',
|
||||||
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -58,10 +76,19 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
|
|
||||||
iframe_m = re.search(r'<script[^>]+src="(.+?kaltura.com.*?)"', webpage)
|
iframe_m = re.search(r'<script[^>]+src="(.+?kaltura.com.*?)"', webpage)
|
||||||
if iframe_m:
|
if iframe_m:
|
||||||
return self._extract_kaltura(url, webpage)
|
kaltura_url = KalturaIE._extract_url(webpage)
|
||||||
|
url_with_source = smuggle_url(kaltura_url, {'source_url': url})
|
||||||
|
return self.url_result(url_with_source, 'Kaltura')
|
||||||
|
|
||||||
|
iframe_m = re.search(r'<iframe[^>]+src="(.+?vier.be.*?)"', webpage)
|
||||||
|
if iframe_m:
|
||||||
|
vier_url = iframe_m.group(1)
|
||||||
|
url_with_source = smuggle_url(vier_url, {'source_url': url, 'video_id': video_id})
|
||||||
|
return self.url_result(url_with_source, 'Vier')
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
iframe_m = re.search(r'<iframe[^>]+src="(.+?vrt.be.*?)"', webpage)
|
iframe_m = re.search(r'<iframe[^>]+src="(.+?vrt.be.*?)"', webpage)
|
||||||
if iframe_m:
|
if iframe_m:
|
||||||
@ -77,17 +104,6 @@ class NieuwsbladIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'thumbnail': thumbnail
|
'thumbnail': thumbnail
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_kaltura(self, url, web_page):
|
|
||||||
""" Delegate the video extraction to 'Kaltura' extractor """
|
|
||||||
kaltura_id = self._search_regex(r'entry_id\s*:\s*\"(.+?)\"', web_page, 'kaltura_id')
|
|
||||||
kaltura_wid = self._search_regex(r'wid\s*\:\s*\"(.+?)\"', web_page, 'kaltura_wid')
|
|
||||||
kaltura_uiconf_id = self._search_regex(r'uiconf_id\s*:\s*\"(.+?)\"', web_page, 'kaltura_uiconf_id')
|
|
||||||
kaltura_url = (
|
|
||||||
'https://cdnapisec.kaltura.com/index.php/kwidget/wid/{0}/uiconf_id/{1}/entry_id/{2}'
|
|
||||||
.format(kaltura_wid, kaltura_uiconf_id, kaltura_id)
|
|
||||||
)
|
|
||||||
url_with_source = smuggle_url(kaltura_url, {'source_url': url})
|
|
||||||
return self.url_result(url_with_source, 'Kaltura')
|
|
||||||
|
@ -5,12 +5,19 @@ import re
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unsmuggle_url
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VierIE(InfoExtractor):
|
class VierIE(InfoExtractor):
|
||||||
IE_NAME = 'vier'
|
IE_NAME = 'vier'
|
||||||
IE_DESC = 'vier.be and vijf.be'
|
IE_DESC = 'vier.be and vijf.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(' \
|
||||||
|
r'?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))' \
|
||||||
|
r'?|video/partner/embed/v2/(?P<partner_embed_id>\d+)/' \
|
||||||
|
r'?|video/v3/embed/(?P<embed_id>\d+)' \
|
||||||
|
r')'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -30,7 +37,7 @@ class VierIE(InfoExtractor):
|
|||||||
'id': '2561614',
|
'id': '2561614',
|
||||||
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
|
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
|
'title': 'EXTRA: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
|
||||||
'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
|
'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -43,11 +50,15 @@ class VierIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.vier.be/video/v3/embed/16129',
|
'url': 'http://www.vier.be/video/v3/embed/16129',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.vier.be/video/partner/embed/v2/2658547/4b5a8c17b5358cb1d1b48e57966721bbef6df328/srnieuwsblad/asmh',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
embed_id = mobj.group('embed_id')
|
embed_id = mobj.group('embed_id') or mobj.group('partner_embed_id')
|
||||||
display_id = mobj.group('display_id') or embed_id
|
display_id = mobj.group('display_id') or embed_id
|
||||||
site = mobj.group('site')
|
site = mobj.group('site')
|
||||||
|
|
||||||
@ -67,6 +78,11 @@ class VierIE(InfoExtractor):
|
|||||||
formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
|
formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = smuggled_data.get('video_id') or video_id
|
||||||
|
source_url = smuggled_data.get('source_url')
|
||||||
|
if source_url:
|
||||||
|
webpage = self._download_webpage(source_url, display_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=display_id)
|
title = self._og_search_title(webpage, default=display_id)
|
||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user