[geeksandsundry] Add new extractor

2015-06-24 17:55:27 -04:00 · 2015-06-24 17:55:27 -04:00 · 2c4b64e9f7
commit 2c4b64e9f7
parent 12e9e8445d
2 changed files with 39 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -196,6 +196,7 @@ from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
 from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
+from .geeksandsundry import GeeksAndSundryIE
 from .generic import GenericIE
 from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
--- a/youtube_dl/extractor/geeksandsundry.py
+++ b/youtube_dl/extractor/geeksandsundry.py
@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+
+class GeeksAndSundryIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?geekandsundry\.com/(?P<title>.+)'
+    _TEST = {
+        'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+        'md5': '02206df2e7a1805349a75af8df396222',
+        'info_dict': {
+            'id': 'tabletop-bonus-wils-final-thoughts-on-dread/',
+            'ext': 'mp4',
+            'title': 'TableTop Bonus! Wil\u2019s Final Thoughts on Dread | Geek and Sundry',
+        }
+    }
+
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page = mobj.group('title')
+        webpage_url = "https://geekandsundry.com/" + page
+        webpage = self._download_webpage(webpage_url, page)
+
+        self.report_extraction(page)
+
+        video_id = self._html_search_regex(r'data-video-id=\"(\d+)\"', webpage, u'video id')
+        pub_id = self._html_search_regex(r'data-account=\"(\d+)\"', webpage, u'pub id')
+
+        video_url = "http://c.brightcove.com/services/mobile/streaming/index/master.m3u8?videoId=%s&pubId=%s" % (video_id, pub_id)
+
+        return {
+            'id': page,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
+        }