From 7cdeaca34b452531d3d8c0e30246c96a4ca6e5bb Mon Sep 17 00:00:00 2001
From: fnord <fnord@fnord.mobi>
Date: Mon, 13 Jul 2015 10:47:14 -0500
Subject: [PATCH] generic: dynamically find extractor for iframes/embeds/etc if
 static methods fail

---
 youtube_dl/extractor/generic.py | 62 +++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 392ad3648..4ec7d9c2a 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -970,6 +970,57 @@ class GenericIE(InfoExtractor):
             'title': title,
         }
 
+    def _extract_plugin_embeds(self, webpage, url):
+        match = re.findall(
+            r'<(?:[^>]+?data-video-url|meta[^>]+?content|(?:embed|iframe)[^>]+?src)\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage)
+
+        # In addition to 'generic', ignore matches from these plugins
+        # ..however _extract_plugin_embeds should run last
+        notbefore_blacklist = {
+            # test 37 (Wistia) http://thoughtworks.wistia.com/medias/uxjb0lwrcz
+            #  duplicate embed causes test failure
+            'Wistia': True,
+            # test 46 for rtl.nl (http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen)
+            #  has a broken youtube embed, download & test failure
+            'youtube': True,
+        }
+        elist = []
+        if not match:
+            return elist
+        # eliminate duplicate checks
+        checked = {url: True}
+        for m in match:
+            u=unescapeHTML(m[1])
+
+            if checked.get(u,False) == True:
+               continue
+            checked[u] = True
+
+            for ie in self._downloader._ies:
+               found = False
+               if ie.IE_NAME == self.IE_NAME:
+                  continue
+               if not ie.working():
+                  continue
+               if notbefore_blacklist.get(ie.IE_NAME,False) == True:
+                  continue
+               if ie.suitable(u):
+                   print (' EMBED ['+ie.IE_NAME+'] '+u)
+                   found = True
+                   elist.append({
+                        '_type': 'url',
+                        'url': u,
+                        'ie_key': ie.ie_key(),
+                   })
+                   break
+            if not found:
+                 #self._downloader.params.get('verbose', False):
+                print (' EMBED [?!] '+u)
+        if elist:
+            print(''+str(len(elist))+' embeds')
+        return elist
+
+
     def _real_extract(self, url):
         if url.startswith('//'):
             return {
@@ -1603,6 +1654,17 @@ class GenericIE(InfoExtractor):
                 self._proto_relative_url(unescapeHTML(mobj.group(1))),
                 'AdobeTVVideo')
 
+        # Last-ditch attempt to find matching plugin for embeds
+        # (this can potentially replace alot of code above)
+        elist = self._extract_plugin_embeds(webpage, url)
+        if elist:
+            return {
+                '_type': 'playlist',
+                'title': video_title,
+                'id': video_id,
+                'entries': elist,
+            }
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True