diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py index 7a405e561..03b4f964b 100644 --- a/youtube_dl/extractor/kanal2.py +++ b/youtube_dl/extractor/kanal2.py @@ -19,7 +19,7 @@ import re class Kanal2IE(InfoExtractor): - _VALID_URL = r'(?Phttps?:\/\/.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]*id=(?P[a-zA-Z0-9_-]+)[^ ]*' + _VALID_URL = r'(?Phttps?://.+\.postimees\.ee)[a-zA-Z0-9/._-]+\?[a-zA-Z0-9=&._-]*id=(?P[a-zA-Z0-9_-]+)[^ ]*' _TESTS = [{ # The most ordinary case 'url': 'http://kanal2.postimees.ee/pluss/video/?id=40792', @@ -85,8 +85,8 @@ class Kanal2IE(InfoExtractor): host = xmlfile.find('./playlist/video/streamItems').get('host') formats = [{ - 'protocol': re.compile('(?P.+):\/\/[^\0]*').match(host).group('protocol') or 'rtmp', - 'app': re.compile(((re.compile('(?P.+):\/\/[^\0]*').match(host).group('protocol') or 'rtmp') + ':\/\/[^\0]*\/(?P.+\/)')).match(host).group('app') or 'kanal2vod', + 'protocol': re.compile('(?P.+)://[^\0]*').match(host).group('protocol') or 'rtmp', + 'app': re.compile(((re.compile('(?P.+)://[^\0]*').match(host).group('protocol') or 'rtmp') + '://[^\0]*/(?P.+/)')).match(host).group('app') or 'kanal2vod', 'url': host + stream.get('streamName'), 'play_path': 'mp4:' + stream.get('streamName'), 'ext': 'flv', @@ -97,12 +97,12 @@ class Kanal2IE(InfoExtractor): self._sort_formats(formats) # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http(kanal12 fix)) - thumbnail = re.compile('[^\0]*(?Phttps?:\/\/[^"]+)[^\0]*').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl') + thumbnail = re.compile('[^\0]*(?Phttps?://[^"]+)[^\0]*').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl') average_rating = int_or_none(xpath_text(xmlfile, './playlist/video/rating/value')) webpage = self._download_webpage(url, video_id) if 'player-container' in webpage: - description = self._search_regex(r'[^\0]*

]*>([^<]*)<\/p>[^\0]*', webpage, 'description', default=None) + description = self._search_regex(r'[^\0]*

]*>([^<]*)

[^\0]*', webpage, 'description', default=None) if description is not None: description = description.strip() @@ -111,12 +111,12 @@ class Kanal2IE(InfoExtractor): episode = int_or_none(epandseasonregex.group('episode')) season = int_or_none(epandseasonregex.group('season')) - dateandtimeregex = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P