From 8fde7f7dd95fa0047930e1d156208f03e81f9ffb Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 20:38:52 -0400 Subject: [PATCH] [Generic] Check iframes for known file extensions --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 658533cf6..01b20fbdc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -21,6 +21,7 @@ from ..utils import ( HEADRequest, is_html, js_to_json, + KNOWN_EXTENSIONS, orderedSet, sanitized_Request, smuggle_url, @@ -1882,6 +1883,14 @@ class GenericIE(InfoExtractor): video_description = self._og_search_description(webpage, default=None) video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Maybe the video is actually in an iframe we don't have special knowledge of. + # Let's look for direct links in file extensions. + matches = re.findall( + r']+?src="([^"]+\.(?:%s)(?:\?[^"]*)?)"' % '|'.join(KNOWN_EXTENSIONS), + webpage) + if matches: + return self.playlist_from_matches(matches, video_id, video_title) + # Look for Brightcove Legacy Studio embeds bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) if bc_urls: