[spiegeltv] ability to extract correct title for embedded videos (/embed/ in path)

These URLs are used on the spiegel.de main site when embedding short content from spiegel.tv.
2017-04-03 23:27:15 +02:00 · 2017-04-03 23:27:15 +02:00 · e7a33a4eb3
commit e7a33a4eb3
parent b68e00b08a
1 changed files with 17 additions and 1 deletions
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@ -24,6 +24,19 @@ class SpiegeltvIE(InfoExtractor):
            # m3u8 download
            'skip_download': True,
        }
+    }, {
+        'url': 'http://www.spiegel.tv/filme/putins-trollfabriken/embed/?autoplay=true',
+        'info_dict': {
+            'id': 'putins-trollfabriken',
+            'ext': 'm4v',
+            'title': 'Putins Trollfabriken',
+            'description': 'Propagandakrieg in den sozialen Medien',
+            'thumbnail': r're:http://.*\.jpg$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
    }, {
        'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
        'only_matching': True,
@ -34,7 +47,10 @@ class SpiegeltvIE(InfoExtractor):
            url = url.replace('/#/', '/')
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
+        if '/embed/' not in url:
+            title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
+        else:
+            title = self._html_search_regex(r'<title.*?>(.*?)(?:\s*\-\s* Embed)?</title>', webpage, 'title')

        apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
        version_json = self._download_json(