Fixed for modified embed page for Pakistan users

This is a crude/quick fix for Dailymotion visitors from Pakistan, or any other country which is being served a new "v5Player" embed page. Had to add "fatal=False" in a few places otherwise it won't run, which issues couple of Warnings but otherwise works.
This commit is contained in:
Asadullah-Ahmad 2015-07-15 20:22:20 +05:00
parent b062d94eef
commit a742c00965

View File

@ -19,7 +19,6 @@ from ..utils import (
unescapeHTML,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
@ -34,7 +33,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = 'dailymotion'
_FORMATS = [
('stream_h264_ld_url', 'ld'),
('stream_h264_url', 'standard'),
@ -42,7 +40,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
('stream_h264_hd_url', 'hd'),
('stream_h264_hd1080_url', 'hd180'),
]
"""Formats for 'playerv5' pages"""
_FORMATSv5 = [
('240', 'ld'),
('380', 'standard'),
('480', 'hq'),
('720', 'hd'),
('1080', 'hd1080'),
]
_TESTS = [
{
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -117,56 +122,99 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
embed_page = self._download_webpage(
embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
raise ExtractorError(msg, expected=True)
formats = []
for (key, format_id) in self._FORMATS:
video_url = info.get(key)
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError('Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
webpage, 'view count', fatal=False))
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
'video info', flags=re.MULTILINE, fatal=False)
"""For normal embed pages with info JSON"""
if info is not None:
info = json.loads(info)
if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
raise ExtractorError(msg, expected=True)
formats = []
for (key, format_id) in self._FORMATS:
video_url = info.get(key)
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError('Unable to extract video URL')
video_subtitles = self.extract_subtitles(video_id, webpage)
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
webpage, 'view count', fatal=False))
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
'title')
return {
'id': video_id,
'formats': formats,
'uploader': info['owner.screenname'],
'upload_date': video_upload_date,
'title': title,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
'duration': info['duration']
}
'title')
return {
'id': video_id,
'formats': formats,
'uploader': info['owner.screenname'],
'upload_date': video_upload_date,
'title': title,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
'view_count': view_count,
'duration': info['duration']
}
else:
formats = []
for (key, format_id) in self._FORMATSv5:
video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page,
'video info', flags=re.MULTILINE, fatal=False)
if video_url:
video_url = video_url.replace("\\", "")
if video_url is not None:
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if m_size is not None:
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
else:
width, height = None, None
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError('Unable to extract video URL from playerv5 page')
v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page,
'video info', flags=re.MULTILINE)
v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page,
'video info', flags=re.MULTILINE)
video_subtitles = self.extract_subtitles(video_id, webpage)
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
webpage, 'view count', fatal=False))
title = self._og_search_title(webpage, default=None)
if title is None:
title = self._html_search_regex(
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
'title')
return {
'id': video_id,
'formats': formats,
'uploader': v5screenname,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,
'thumbnail': v5thumbnailurl,
'age_limit': age_limit,
'view_count': view_count,
}
def _get_subtitles(self, video_id, webpage):
try: