DiscoveryIE: make playlist extraction tolerate entries with blank 'src' values

There are alot here. I assume the videos are disabled. http://www.discovery.com/tv-shows/other-shows/videos/other-shows-when-we-left-earth-videos/ There is an 'embedurl' for all of the videos, however there is no support for the urls in question. (todo)
2015-07-13 07:52:03 -05:00 · 2015-07-13 07:52:03 -05:00 · d9dd8b5664
commit d9dd8b5664
parent 41c0d2f8cb
1 changed files with 22 additions and 14 deletions
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -41,19 +41,27 @@ class DiscoveryIE(InfoExtractor):
        info = self._download_json(url + '?flat=1', video_id)
        video_title = info.get('playlist_title') or info.get('video_title')
-
+        entries = []
-        entries = [{
+        collected = {}
-            'id': compat_str(video_info['id']),
+        for idx, video_info in enumerate(info['playlist']):
-            'formats': self._extract_m3u8_formats(
+            if collected.get( video_info.get('id') ): 
-                video_info['src'], video_id, ext='mp4',
+                 continue
-                note='Download m3u8 information for video %d' % (idx + 1)),
+            collected[video_info.get('id')] = True
-            'title': video_info['title'],
+            if video_info['src'] == '':
-            'description': video_info.get('description'),
+                 self.report_warning('video "%s" does not have a src url' % video_info.get('id','UNKNOWN'))
-            'duration': parse_duration(video_info.get('video_length')),
+                 continue
-            'webpage_url': video_info.get('href'),
+            entries.append({
-            'thumbnail': video_info.get('thumbnailURL'),
+                'id': compat_str(video_info['id']),
-            'alt_title': video_info.get('secondary_title'),
+                'formats': self._extract_m3u8_formats(
-            'timestamp': parse_iso8601(video_info.get('publishedDate')),
+                    video_info['src'], video_id, ext='mp4',
-        } for idx, video_info in enumerate(info['playlist'])]
+                    note='Download m3u8 information for video %d' % (idx + 1)),
                'title': video_info['title'],
                'description': video_info.get('description'),
                'duration': parse_duration(video_info.get('video_length')),
                'webpage_url': video_info.get('href'),
                'thumbnail': video_info.get('thumbnailURL'),
                'alt_title': video_info.get('secondary_title'),
                'timestamp': parse_iso8601(video_info.get('publishedDate')),
            })
        return self.playlist_result(entries, video_id, video_title)