From e7a33a4eb30c3eaebffd1af6a34a19c429eef565 Mon Sep 17 00:00:00 2001 From: runningbits Date: Mon, 3 Apr 2017 23:27:15 +0200 Subject: [PATCH] [spiegeltv] ability to extract correct title for embedded videos (/embed/ in path) These URLs are used on the spiegel.de main site when embedding short content from spiegel.tv. --- youtube_dl/extractor/spiegeltv.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index e1cfb8698..8163619ba 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -24,6 +24,19 @@ class SpiegeltvIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'http://www.spiegel.tv/filme/putins-trollfabriken/embed/?autoplay=true', + 'info_dict': { + 'id': 'putins-trollfabriken', + 'ext': 'm4v', + 'title': 'Putins Trollfabriken', + 'description': 'Propagandakrieg in den sozialen Medien', + 'thumbnail': r're:http://.*\.jpg$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/', 'only_matching': True, @@ -34,7 +47,10 @@ class SpiegeltvIE(InfoExtractor): url = url.replace('/#/', '/') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(.*?)', webpage, 'title') + if '/embed/' not in url: + title = self._html_search_regex(r'(.*?)', webpage, 'title') + else: + title = self._html_search_regex(r'(.*?)(?:\s*\-\s* Embed)?', webpage, 'title') apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' version_json = self._download_json(