Fixed for modified embed page for Pakistan users

This is a crude/quick fix for Dailymotion visitors from Pakistan, or any other country which is being served a new "v5Player" embed page. Had to add "fatal=False" in a few places otherwise it won't run, which issues couple of Warnings but otherwise works.
2015-07-15 20:22:20 +05:00 · 2015-07-15 20:22:20 +05:00 · a742c00965
commit a742c00965
parent b062d94eef
1 changed files with 100 additions and 52 deletions
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -19,7 +19,6 @@ from ..utils import (
    unescapeHTML,
 )
 class DailymotionBaseInfoExtractor(InfoExtractor):
    @staticmethod
    def _build_request(url):
@ -34,7 +33,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
    IE_NAME = 'dailymotion'
    _FORMATS = [
        ('stream_h264_ld_url', 'ld'),
        ('stream_h264_url', 'standard'),
@ -42,7 +40,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
        ('stream_h264_hd_url', 'hd'),
        ('stream_h264_hd1080_url', 'hd180'),
    ]
-
+    """Formats for 'playerv5' pages"""
    _FORMATSv5 = [
        ('240', 'ld'),
        ('380', 'standard'),
        ('480', 'hq'),
        ('720', 'hd'),
        ('1080', 'hd1080'),
    ]
    _TESTS = [
        {
            'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -117,56 +122,99 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
        embed_page = self._download_webpage(
            embed_request, video_id, 'Downloading embed page')
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
-                                  'video info', flags=re.MULTILINE)
+                                  'video info', flags=re.MULTILINE, fatal=False)
-        info = json.loads(info)
+        """For normal embed pages with info JSON"""
-        if info.get('error') is not None:
+        if info is not None: 
-            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
+            info = json.loads(info)
-            raise ExtractorError(msg, expected=True)
+            if info.get('error') is not None:
-
+                msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
-        formats = []
+                raise ExtractorError(msg, expected=True)
-        for (key, format_id) in self._FORMATS:
+            formats = []
-            video_url = info.get(key)
+            for (key, format_id) in self._FORMATS:
-            if video_url is not None:
+                video_url = info.get(key)
-                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
+                if video_url is not None:
-                if m_size is not None:
+                    m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
-                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
+                    if m_size is not None:
-                else:
+                        width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
-                    width, height = None, None
+                    else:
-                formats.append({
+                        width, height = None, None
-                    'url': video_url,
+                    formats.append({
-                    'ext': 'mp4',
+                        'url': video_url,
-                    'format_id': format_id,
+                        'ext': 'mp4',
-                    'width': width,
+                        'format_id': format_id,
-                    'height': height,
+                        'width': width,
-                })
+                        'height': height,
-        if not formats:
+                    })
-            raise ExtractorError('Unable to extract video URL')
+            if not formats:
-
+                raise ExtractorError('Unable to extract video URL')
-        # subtitles
+            video_subtitles = self.extract_subtitles(video_id, webpage)
-        video_subtitles = self.extract_subtitles(video_id, webpage)
+            view_count = str_to_int(self._search_regex(
-
+                r'video_views_count[^>]+>\s+([\d\.,]+)',
-        view_count = str_to_int(self._search_regex(
+                webpage, 'view count', fatal=False))
-            r'video_views_count[^>]+>\s+([\d\.,]+)',
+            title = self._og_search_title(webpage, default=None)
-            webpage, 'view count', fatal=False))
+            if title is None:
-
+                title = self._html_search_regex(
        title = self._og_search_title(webpage, default=None)
        if title is None:
            title = self._html_search_regex(
                r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
-                'title')
+                    'title')
-
+            return {
-        return {
+                'id': video_id,
-            'id': video_id,
+                'formats': formats,
-            'formats': formats,
+                'uploader': info['owner.screenname'],
-            'uploader': info['owner.screenname'],
+                'upload_date': video_upload_date,
-            'upload_date': video_upload_date,
+                'title': title,
-            'title': title,
+                'subtitles': video_subtitles,
-            'subtitles': video_subtitles,
+                'thumbnail': info['thumbnail_url'],
-            'thumbnail': info['thumbnail_url'],
+                'age_limit': age_limit,
-            'age_limit': age_limit,
+                'view_count': view_count,
-            'view_count': view_count,
+                'duration': info['duration']
-            'duration': info['duration']
+                }
-        }
+        else:
            formats = []
            for (key, format_id) in self._FORMATSv5:
                video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page,
                                               'video info', flags=re.MULTILINE, fatal=False)
                if video_url:
                    video_url = video_url.replace("\\", "")
                if video_url is not None:
                    m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
                    if m_size is not None:
                        width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
                    else:
                        width, height = None, None
                    formats.append({
                                    'url': video_url,
                                    'ext': 'mp4',
                                    'format_id': format_id,
                                    'width': width,
                                    'height': height,
                                    })
            if not formats:
                raise ExtractorError('Unable to extract video URL from playerv5 page')
            v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page,
            'video info', flags=re.MULTILINE)
            v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page,
            'video info', flags=re.MULTILINE) 
            video_subtitles = self.extract_subtitles(video_id, webpage)
            view_count = str_to_int(self._search_regex(
                r'video_views_count[^>]+>\s+([\d\.,]+)',
                webpage, 'view count', fatal=False))
            title = self._og_search_title(webpage, default=None)
            if title is None:
                title = self._html_search_regex(
                    r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
                    'title')       
            return  {
                'id':       video_id,
                'formats': formats,
                'uploader': v5screenname,
                'upload_date':  video_upload_date,
                'title':    self._og_search_title(webpage),
                'subtitles':    video_subtitles,
                'thumbnail': v5thumbnailurl,
                'age_limit': age_limit,
                'view_count': view_count,
            } 
    def _get_subtitles(self, video_id, webpage):
        try: