[wdr] Use old extractor method and added more formats
This commit is contained in:
parent
b4fd2653ea
commit
15db1db897
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -17,15 +16,16 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WDRIE(InfoExtractor):
|
class WDRIE(InfoExtractor):
|
||||||
_PLAYER_REGEX = 'https?://deviceids-medstdp.wdr.de/ondemand/.+?/.+?\.js'
|
_PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
|
||||||
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)\.html'
|
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/hier_und_heute/videostreetfoodpioniere100.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/hier_und_heute/videostreetfoodpioniere100.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-750693',
|
'id': 'mdb-750693',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Streetfood-Pioniere',
|
'title': 'HIER UND HEUTE: Streetfood-Pioniere',
|
||||||
'description': 'md5:bff1fdc6de7df044ac2bec13ab46e6a9',
|
'description': 'md5:bff1fdc6de7df044ac2bec13ab46e6a9',
|
||||||
'upload_date': '20150703',
|
'upload_date': '20150703',
|
||||||
'is_live': False
|
'is_live': False
|
||||||
@ -41,8 +41,8 @@ class WDRIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-726385',
|
'id': 'mdb-726385',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Weselsky | 1LIVE Bahnansage (04.06.2015)',
|
'title': '1LIVE Bahnansage',
|
||||||
'description': 'md5:8b9ef2af8c1bb01394ab98f3450ff04d',
|
'description': 'md5:36016b06288e1f1a5b2602c8fe947b8d',
|
||||||
'upload_date': '20150604',
|
'upload_date': '20150604',
|
||||||
'is_live': False
|
'is_live': False
|
||||||
},
|
},
|
||||||
@ -54,7 +54,7 @@ class WDRIE(InfoExtractor):
|
|||||||
'id': 'mdb-752045',
|
'id': 'mdb-752045',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Roskilde Festival 2015',
|
'title': 'Roskilde Festival 2015',
|
||||||
'description': 'md5:48e7a0a884c0e841a9d9174e27c67df3',
|
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
|
||||||
'upload_date': '20150702',
|
'upload_date': '20150702',
|
||||||
'is_live': False
|
'is_live': False
|
||||||
},
|
},
|
||||||
@ -82,82 +82,99 @@ class WDRIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _overiew_page_extractor(self, page_url, page_id, webpage):
|
|
||||||
entries = []
|
|
||||||
for page_num in itertools.count(2):
|
|
||||||
hrefs = re.findall(
|
|
||||||
r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
|
|
||||||
webpage)
|
|
||||||
entries.extend(
|
|
||||||
self.url_result(page_url + href, 'WDR')
|
|
||||||
for href in hrefs)
|
|
||||||
next_url_m = re.search(
|
|
||||||
r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
|
|
||||||
if not next_url_m:
|
|
||||||
break
|
|
||||||
next_url = page_url + next_url_m.group(1)
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
next_url, page_id,
|
|
||||||
note='Downloading playlist page %d' % page_num)
|
|
||||||
return self.playlist_result(entries, page_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_url = mobj.group('url')
|
page_url = mobj.group('url')
|
||||||
page_id = mobj.group('id')
|
page_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
entries = re.search(r'%s' % self._PLAYER_REGEX, webpage)
|
|
||||||
|
|
||||||
if entries is None: # Overview page
|
if mobj.group('player') is None:
|
||||||
return self._overiew_page_extractor(page_url, page_id, webpage)
|
entries = [
|
||||||
|
self.url_result(page_url + href, 'WDR')
|
||||||
|
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
|
||||||
|
]
|
||||||
|
|
||||||
jsonpage = self._download_webpage(entries.group(0), entries.group(0))
|
if entries: # Playlist page
|
||||||
jsonvars = json.loads(jsonpage[38:-2])
|
return self.playlist_result(entries, page_id)
|
||||||
|
|
||||||
|
# Overview page
|
||||||
|
entries = []
|
||||||
|
for page_num in itertools.count(2):
|
||||||
|
hrefs = re.findall(
|
||||||
|
r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
entries.extend(
|
||||||
|
self.url_result(page_url + href, 'WDR')
|
||||||
|
for href in hrefs)
|
||||||
|
next_url_m = re.search(
|
||||||
|
r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
|
||||||
|
if not next_url_m:
|
||||||
|
break
|
||||||
|
next_url = page_url + next_url_m.group(1)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
next_url, page_id,
|
||||||
|
note='Downloading playlist page %d' % page_num)
|
||||||
|
return self.playlist_result(entries, page_id)
|
||||||
|
|
||||||
page_id = jsonvars['trackerData']['trackerClipId']
|
|
||||||
title = jsonvars['trackerData']['trackerClipTitle']
|
|
||||||
formats = []
|
formats = []
|
||||||
for _id, video_field in jsonvars['mediaResource'].items():
|
flashvars = compat_parse_qs(
|
||||||
if 'videoURL' in video_field:
|
self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
||||||
video_url = video_field['videoURL']
|
|
||||||
elif 'audioURL' in video_field:
|
|
||||||
video_url = video_field['audioURL']
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
is_live = video_field.get('flashvarsExt', {'isLive': '0'})
|
|
||||||
is_live = is_live.get('isLive', '0') == '1'
|
|
||||||
|
|
||||||
if video_url.endswith('.f4m'):
|
page_id = flashvars['trackerClipId'][0]
|
||||||
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
|
video_url = flashvars['dslSrc'][0]
|
||||||
ext = 'flv'
|
title = flashvars['trackerClipTitle'][0]
|
||||||
elif video_url.endswith('.smil'):
|
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
|
||||||
fmt = self._extract_smil_formats(video_url, page_id)[0]
|
is_live = flashvars.get('isLive', ['0'])[0] == '1'
|
||||||
video_url = fmt['url']
|
|
||||||
sep = '&' if '?' in video_url else '?'
|
|
||||||
video_url += sep
|
|
||||||
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
|
|
||||||
ext = fmt['ext']
|
|
||||||
else:
|
|
||||||
ext = determine_ext(video_url)
|
|
||||||
|
|
||||||
formats.append({'url': video_url, 'ext': ext, 'format_id': _id})
|
|
||||||
|
|
||||||
thumbnail = re.search('<div class="illustrationCont w960">\n<div class="linkCont">\n<img src="(?P<thumbnail>.+?)"', webpage)
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnail = page_url + thumbnail.group('thumbnail')
|
|
||||||
|
|
||||||
if is_live:
|
if is_live:
|
||||||
title = self._live_title(title)
|
title = self._live_title(title)
|
||||||
|
|
||||||
if 'trackerClipAirTime' in jsonvars['trackerData']:
|
if 'trackerClipAirTime' in flashvars:
|
||||||
upload_date = jsonvars['trackerData']['trackerClipAirTime']
|
upload_date = flashvars['trackerClipAirTime'][0]
|
||||||
else:
|
else:
|
||||||
upload_date = self._html_search_meta('DC.Date', webpage, 'content')
|
upload_date = self._html_search_meta('DC.Date', webpage, 'content')
|
||||||
|
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
if video_url.endswith('.f4m'):
|
||||||
|
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
|
||||||
|
ext = 'flv'
|
||||||
|
elif video_url.endswith('.smil'):
|
||||||
|
fmt = self._extract_smil_formats(video_url, page_id)[0]
|
||||||
|
video_url = fmt['url']
|
||||||
|
sep = '&' if '?' in video_url else '?'
|
||||||
|
video_url += sep
|
||||||
|
video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
|
||||||
|
ext = fmt['ext']
|
||||||
|
else:
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
|
formats.append({'ext': ext, 'url': video_url})
|
||||||
|
|
||||||
|
m3u8_url = re.search(r'<li>\n<a rel="adaptiv" type="application/vnd\.apple\.mpegURL" href="(?P<link>.+?)"', webpage)
|
||||||
|
|
||||||
|
if m3u8_url is not None:
|
||||||
|
m3u8_url = m3u8_url.group('link')
|
||||||
|
formats.append({'ext': 'm3u8', 'url': m3u8_url})
|
||||||
|
|
||||||
|
webL_quality = -1
|
||||||
|
for video_vars in re.findall(r'<li>\n<a rel="(?P<format_id>web.?)" href=".+?/(?P<link>fsk.+?)"', webpage):
|
||||||
|
format_id = video_vars[0]
|
||||||
|
video_url = 'http://ondemand-ww.wdr.de/medstdp/' + video_vars[1]
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if format_id == 'webL':
|
||||||
|
quality = webL_quality
|
||||||
|
webL_quality -= 1
|
||||||
|
if format_id == 'webM':
|
||||||
|
quality = -3
|
||||||
|
if format_id == 'webS':
|
||||||
|
quality = -4
|
||||||
|
formats.append({'format_id': format_id, 'ext': ext, 'url': video_url, 'source_preference': quality})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self._html_search_meta('Description', webpage, 'content')
|
description = self._html_search_meta('Description', webpage, 'content')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user