Fixed for modified embed page for Pakistan users

This is a crude/quick fix for Dailymotion visitors from Pakistan, or any other country which is being served a new "v5Player" embed page. Had to add "fatal=False" in a few places otherwise it won't run, which issues couple of Warnings but otherwise works.
This commit is contained in:
Asadullah-Ahmad 2015-07-15 20:22:20 +05:00
parent b062d94eef
commit a742c00965

View File

@ -19,7 +19,6 @@ from ..utils import (
unescapeHTML, unescapeHTML,
) )
class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _build_request(url): def _build_request(url):
@ -34,7 +33,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_FORMATS = [ _FORMATS = [
('stream_h264_ld_url', 'ld'), ('stream_h264_ld_url', 'ld'),
('stream_h264_url', 'standard'), ('stream_h264_url', 'standard'),
@ -42,7 +40,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
('stream_h264_hd_url', 'hd'), ('stream_h264_hd_url', 'hd'),
('stream_h264_hd1080_url', 'hd180'), ('stream_h264_hd1080_url', 'hd180'),
] ]
"""Formats for 'playerv5' pages"""
_FORMATSv5 = [
('240', 'ld'),
('380', 'standard'),
('480', 'hq'),
('720', 'hd'),
('1080', 'hd1080'),
]
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -117,56 +122,99 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
embed_page = self._download_webpage( embed_page = self._download_webpage(
embed_request, video_id, 'Downloading embed page') embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page, info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE) 'video info', flags=re.MULTILINE, fatal=False)
info = json.loads(info) """For normal embed pages with info JSON"""
if info.get('error') is not None: if info is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] info = json.loads(info)
raise ExtractorError(msg, expected=True) if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
formats = [] raise ExtractorError(msg, expected=True)
for (key, format_id) in self._FORMATS: formats = []
video_url = info.get(key) for (key, format_id) in self._FORMATS:
if video_url is not None: video_url = info.get(key)
m_size = re.search(r'H264-(\d+)x(\d+)', video_url) if video_url is not None:
if m_size is not None: m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
width, height = map(int_or_none, (m_size.group(1), m_size.group(2))) if m_size is not None:
else: width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
width, height = None, None else:
formats.append({ width, height = None, None
'url': video_url, formats.append({
'ext': 'mp4', 'url': video_url,
'format_id': format_id, 'ext': 'mp4',
'width': width, 'format_id': format_id,
'height': height, 'width': width,
}) 'height': height,
if not formats: })
raise ExtractorError('Unable to extract video URL') if not formats:
raise ExtractorError('Unable to extract video URL')
# subtitles video_subtitles = self.extract_subtitles(video_id, webpage)
video_subtitles = self.extract_subtitles(video_id, webpage) view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
view_count = str_to_int(self._search_regex( webpage, 'view count', fatal=False))
r'video_views_count[^>]+>\s+([\d\.,]+)', title = self._og_search_title(webpage, default=None)
webpage, 'view count', fatal=False)) if title is None:
title = self._html_search_regex(
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage, r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
'title') 'title')
return {
return { 'id': video_id,
'id': video_id, 'formats': formats,
'formats': formats, 'uploader': info['owner.screenname'],
'uploader': info['owner.screenname'], 'upload_date': video_upload_date,
'upload_date': video_upload_date, 'title': title,
'title': title, 'subtitles': video_subtitles,
'subtitles': video_subtitles, 'thumbnail': info['thumbnail_url'],
'thumbnail': info['thumbnail_url'], 'age_limit': age_limit,
'age_limit': age_limit, 'view_count': view_count,
'view_count': view_count, 'duration': info['duration']
'duration': info['duration'] }
} else:
formats = []
for (key, format_id) in self._FORMATSv5:
video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page,
'video info', flags=re.MULTILINE, fatal=False)
if video_url:
video_url = video_url.replace("\\", "")
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError('Unable to extract video URL from playerv5 page')
v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page,
'video info', flags=re.MULTILINE)
v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page,
'video info', flags=re.MULTILINE)
video_subtitles = self.extract_subtitles(video_id, webpage)
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
webpage, 'view count', fatal=False))
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
'title')
return {
'id': video_id,
'formats': formats,
'uploader': v5screenname,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,
'thumbnail': v5thumbnailurl,
'age_limit': age_limit,
'view_count': view_count,
}
def _get_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
try: try: