From 15db1db89788ac5370d34ded59476ba7b5911763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrn=20Brodersen?= Date: Sat, 4 Jul 2015 13:14:14 +0200 Subject: [PATCH] [wdr] Use old extractor method and added more formats --- youtube_dl/extractor/wdr.py | 141 ++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1e2900c02..23e26f8d3 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import itertools import re -import json from .common import InfoExtractor from ..compat import ( @@ -17,15 +16,16 @@ from ..utils import ( class WDRIE(InfoExtractor): - _PLAYER_REGEX = 'https?://deviceids-medstdp.wdr.de/ondemand/.+?/.+?\.js' - _VALID_URL = r'(?Phttps?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P.+?)\.html' + _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' + _VALID_URL = r'(?Phttps?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P.+?)(?P%s)?\.html' % _PLAYER_REGEX + _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/hier_und_heute/videostreetfoodpioniere100.html', 'info_dict': { 'id': 'mdb-750693', 'ext': 'mp4', - 'title': 'Streetfood-Pioniere', + 'title': 'HIER UND HEUTE: Streetfood-Pioniere', 'description': 'md5:bff1fdc6de7df044ac2bec13ab46e6a9', 'upload_date': '20150703', 'is_live': False @@ -41,8 +41,8 @@ class WDRIE(InfoExtractor): 'info_dict': { 'id': 'mdb-726385', 'ext': 'mp3', - 'title': 'Weselsky | 1LIVE Bahnansage (04.06.2015)', - 'description': 'md5:8b9ef2af8c1bb01394ab98f3450ff04d', + 'title': '1LIVE Bahnansage', + 'description': 'md5:36016b06288e1f1a5b2602c8fe947b8d', 'upload_date': '20150604', 'is_live': False }, @@ -54,7 +54,7 @@ class WDRIE(InfoExtractor): 'id': 'mdb-752045', 'ext': 'mp3', 'title': 'Roskilde Festival 2015', - 'description': 'md5:48e7a0a884c0e841a9d9174e27c67df3', + 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'upload_date': '20150702', 'is_live': False }, @@ -82,82 +82,99 @@ class WDRIE(InfoExtractor): } ] - def _overiew_page_extractor(self, page_url, page_id, webpage): - entries = [] - for page_num in itertools.count(2): - hrefs = re.findall( - r'
  • \s*]*>\s*\s*\s*]*>\s*\s*\n