[msnbc] new extractor

2015-08-15 00:46:19 +02:00 · 2015-08-15 00:46:19 +02:00 · be6751d4ca
commit be6751d4ca
parent 6be5e46994
2 changed files with 47 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -335,6 +335,7 @@ from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
+from .msnbc import MSNBCIE
 from .mtv import (
    MTVIE,
    MTVServicesEmbeddedIE,
--- a/youtube_dl/extractor/msnbc.py
+++ b/youtube_dl/extractor/msnbc.py
@ -0,0 +1,46 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MSNBCIE(InfoExtractor):
+    _VALID_URL = r'http://www\.msnbc\.com/(?P<showname>[a-z0-9-]+)/watch/(?P<id>[a-z0-9-]+)'
+
+    _TESTS = [{
+        'url': 'http://www.msnbc.com/morning-joe/watch/american-trains-iraqis-in-fight-against-isis-465258051578',
+        'info_dict': {
+            'id': 'n_mj_vandyke_150616_647133',
+            'title': 'American trains Iraqis in fight against ISIS',
+            'description': 'md5:6432ea377a7f0bc6981d4c4fc48d4c4e',
+            'timestamp': 1434451583,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        guid = self._html_search_meta('nv:videoId', webpage, 'guid')
+
+        playlist_json = self._download_json('http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&byGuid=%s' % (guid), guid)
+
+        entry = playlist_json['entries'][0]
+
+        thumbnails = [{
+            'url': thumb['plfile$url'],
+            'width': thumb['plfile$width'],
+            'height': thumb['plfile$height'],
+        } for thumb in entry['media$thumbnails']]
+
+        for content_item in entry['media$content']:
+            return {
+                '_type': 'url_transparent',
+                'ie_key': 'ThePlatform',
+                'id': guid,
+                'title': entry['title'],
+                'description': entry['description'],
+                'timestamp': entry['media$availableDate'] / 1000,
+                'thumbnails': thumbnails,
+                'url': content_item['plfile$url'],
+            }