From a742c0096587c29851655d3f7ae7e44d496d6f0e Mon Sep 17 00:00:00 2001 From: Asadullah-Ahmad Date: Wed, 15 Jul 2015 20:22:20 +0500 Subject: [PATCH] Fixed for modified embed page for Pakistan users This is a crude/quick fix for Dailymotion visitors from Pakistan, or any other country which is being served a new "v5Player" embed page. Had to add "fatal=False" in a few places otherwise it won't run, which issues couple of Warnings but otherwise works. --- youtube_dl/extractor/dailymotion.py | 152 ++++++++++++++++++---------- 1 file changed, 100 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1a41c0db1..a2fc07dfe 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -19,7 +19,6 @@ from ..utils import ( unescapeHTML, ) - class DailymotionBaseInfoExtractor(InfoExtractor): @staticmethod def _build_request(url): @@ -34,7 +33,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P[^/?_]+)' IE_NAME = 'dailymotion' - _FORMATS = [ ('stream_h264_ld_url', 'ld'), ('stream_h264_url', 'standard'), @@ -42,7 +40,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): ('stream_h264_hd_url', 'hd'), ('stream_h264_hd1080_url', 'hd180'), ] - + """Formats for 'playerv5' pages""" + _FORMATSv5 = [ + ('240', 'ld'), + ('380', 'standard'), + ('480', 'hq'), + ('720', 'hd'), + ('1080', 'hd1080'), + ] _TESTS = [ { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', @@ -117,56 +122,99 @@ class DailymotionIE(DailymotionBaseInfoExtractor): embed_page = self._download_webpage( embed_request, video_id, 'Downloading embed page') info = self._search_regex(r'var info = ({.*?}),$', embed_page, - 'video info', flags=re.MULTILINE) - info = json.loads(info) - if info.get('error') is not None: - msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] - raise ExtractorError(msg, expected=True) - - formats = [] - for (key, format_id) in self._FORMATS: - video_url = info.get(key) - if video_url is not None: - m_size = re.search(r'H264-(\d+)x(\d+)', video_url) - if m_size is not None: - width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) - else: - width, height = None, None - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': format_id, - 'width': width, - 'height': height, - }) - if not formats: - raise ExtractorError('Unable to extract video URL') - - # subtitles - video_subtitles = self.extract_subtitles(video_id, webpage) - - view_count = str_to_int(self._search_regex( - r'video_views_count[^>]+>\s+([\d\.,]+)', - webpage, 'view count', fatal=False)) - - title = self._og_search_title(webpage, default=None) - if title is None: - title = self._html_search_regex( + 'video info', flags=re.MULTILINE, fatal=False) + """For normal embed pages with info JSON""" + if info is not None: + info = json.loads(info) + if info.get('error') is not None: + msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] + raise ExtractorError(msg, expected=True) + formats = [] + for (key, format_id) in self._FORMATS: + video_url = info.get(key) + if video_url is not None: + m_size = re.search(r'H264-(\d+)x(\d+)', video_url) + if m_size is not None: + width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) + else: + width, height = None, None + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': format_id, + 'width': width, + 'height': height, + }) + if not formats: + raise ExtractorError('Unable to extract video URL') + video_subtitles = self.extract_subtitles(video_id, webpage) + view_count = str_to_int(self._search_regex( + r'video_views_count[^>]+>\s+([\d\.,]+)', + webpage, 'view count', fatal=False)) + title = self._og_search_title(webpage, default=None) + if title is None: + title = self._html_search_regex( r'(?s)]*>(.*?)', webpage, - 'title') - - return { - 'id': video_id, - 'formats': formats, - 'uploader': info['owner.screenname'], - 'upload_date': video_upload_date, - 'title': title, - 'subtitles': video_subtitles, - 'thumbnail': info['thumbnail_url'], - 'age_limit': age_limit, - 'view_count': view_count, - 'duration': info['duration'] - } + 'title') + return { + 'id': video_id, + 'formats': formats, + 'uploader': info['owner.screenname'], + 'upload_date': video_upload_date, + 'title': title, + 'subtitles': video_subtitles, + 'thumbnail': info['thumbnail_url'], + 'age_limit': age_limit, + 'view_count': view_count, + 'duration': info['duration'] + } + else: + formats = [] + for (key, format_id) in self._FORMATSv5: + video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page, + 'video info', flags=re.MULTILINE, fatal=False) + if video_url: + video_url = video_url.replace("\\", "") + if video_url is not None: + m_size = re.search(r'H264-(\d+)x(\d+)', video_url) + if m_size is not None: + width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) + else: + width, height = None, None + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': format_id, + 'width': width, + 'height': height, + }) + if not formats: + raise ExtractorError('Unable to extract video URL from playerv5 page') + v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page, + 'video info', flags=re.MULTILINE) + v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page, + 'video info', flags=re.MULTILINE) + video_subtitles = self.extract_subtitles(video_id, webpage) + view_count = str_to_int(self._search_regex( + r'video_views_count[^>]+>\s+([\d\.,]+)', + webpage, 'view count', fatal=False)) + title = self._og_search_title(webpage, default=None) + if title is None: + title = self._html_search_regex( + r'(?s)]*>(.*?)', webpage, + 'title') + return { + 'id': video_id, + 'formats': formats, + 'uploader': v5screenname, + 'upload_date': video_upload_date, + 'title': self._og_search_title(webpage), + 'subtitles': video_subtitles, + 'thumbnail': v5thumbnailurl, + 'age_limit': age_limit, + 'view_count': view_count, + } + def _get_subtitles(self, video_id, webpage): try: