From d9dd8b5664e5e737806e74d9044ee345e51a945f Mon Sep 17 00:00:00 2001 From: fnord Date: Mon, 13 Jul 2015 07:52:03 -0500 Subject: [PATCH 1/2] DiscoveryIE: make playlist extraction tolerate entries with blank 'src' values There are alot here. I assume the videos are disabled. http://www.discovery.com/tv-shows/other-shows/videos/other-shows-when-we-left-earth-videos/ There is an 'embedurl' for all of the videos, however there is no support for the urls in question. (todo) --- youtube_dl/extractor/discovery.py | 36 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index d6723ecf2..5046a56bb 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -41,19 +41,27 @@ class DiscoveryIE(InfoExtractor): info = self._download_json(url + '?flat=1', video_id) video_title = info.get('playlist_title') or info.get('video_title') - - entries = [{ - 'id': compat_str(video_info['id']), - 'formats': self._extract_m3u8_formats( - video_info['src'], video_id, ext='mp4', - note='Download m3u8 information for video %d' % (idx + 1)), - 'title': video_info['title'], - 'description': video_info.get('description'), - 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href'), - 'thumbnail': video_info.get('thumbnailURL'), - 'alt_title': video_info.get('secondary_title'), - 'timestamp': parse_iso8601(video_info.get('publishedDate')), - } for idx, video_info in enumerate(info['playlist'])] + entries = [] + collected = {} + for idx, video_info in enumerate(info['playlist']): + if collected.get( video_info.get('id') ): + continue + collected[video_info.get('id')] = True + if video_info['src'] == '': + self.report_warning('video "%s" does not have a src url' % video_info.get('id','UNKNOWN')) + continue + entries.append({ + 'id': compat_str(video_info['id']), + 'formats': self._extract_m3u8_formats( + video_info['src'], video_id, ext='mp4', + note='Download m3u8 information for video %d' % (idx + 1)), + 'title': video_info['title'], + 'description': video_info.get('description'), + 'duration': parse_duration(video_info.get('video_length')), + 'webpage_url': video_info.get('href'), + 'thumbnail': video_info.get('thumbnailURL'), + 'alt_title': video_info.get('secondary_title'), + 'timestamp': parse_iso8601(video_info.get('publishedDate')), + }) return self.playlist_result(entries, video_id, video_title) From eeb414652d21f762ccf863f703b353f699f21a69 Mon Sep 17 00:00:00 2001 From: fnord Date: Fri, 17 Jul 2015 03:31:57 -0500 Subject: [PATCH 2/2] discovery: fix syntax --- youtube_dl/extractor/discovery.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 5046a56bb..e8e1cca2e 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -44,12 +44,12 @@ class DiscoveryIE(InfoExtractor): entries = [] collected = {} for idx, video_info in enumerate(info['playlist']): - if collected.get( video_info.get('id') ): - continue + if collected.get(video_info.get('id')): + continue collected[video_info.get('id')] = True if video_info['src'] == '': - self.report_warning('video "%s" does not have a src url' % video_info.get('id','UNKNOWN')) - continue + self.report_warning('video "%s" does not have a src url' % video_info.get('id', 'UNKNOWN')) + continue entries.append({ 'id': compat_str(video_info['id']), 'formats': self._extract_m3u8_formats(