fixed dailymotion view_count extraction and added support for playerv5 embed pages
This commit is contained in:
parent
e901e6fa81
commit
26b26d207d
BIN
youtube-dl
BIN
youtube-dl
Binary file not shown.
@ -19,6 +19,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DailymotionBaseInfoExtractor(InfoExtractor):
|
class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_request(url):
|
def _build_request(url):
|
||||||
@ -33,6 +34,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
_FORMATS = [
|
||||||
('stream_h264_ld_url', 'ld'),
|
('stream_h264_ld_url', 'ld'),
|
||||||
('stream_h264_url', 'standard'),
|
('stream_h264_url', 'standard'),
|
||||||
@ -121,10 +123,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
embed_request = self._build_request(embed_url)
|
embed_request = self._build_request(embed_url)
|
||||||
embed_page = self._download_webpage(
|
embed_page = self._download_webpage(
|
||||||
embed_request, video_id, 'Downloading embed page')
|
embed_request, video_id, 'Downloading embed page')
|
||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
checkv5 = self._search_regex(r'playerV5(.)', embed_page,
|
||||||
'video info', flags=re.MULTILINE, fatal=False)
|
'checkv5', default=None, fatal=False)
|
||||||
"""For normal embed pages with info JSON"""
|
"""For normal embed pages with info variable"""
|
||||||
if info is not None:
|
if checkv5 is None:
|
||||||
|
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||||
|
'video info', flags=re.MULTILINE)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
if info.get('error') is not None:
|
if info.get('error') is not None:
|
||||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||||
@ -148,9 +152,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Unable to extract video URL')
|
raise ExtractorError('Unable to extract video URL')
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = self._search_regex(
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
r'video_views_count[^>]+>\s+([\d\. ]+)\s+views',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False)
|
||||||
|
view_count = view_count.replace(" ", "")
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
title = self._og_search_title(webpage, default=None)
|
title = self._og_search_title(webpage, default=None)
|
||||||
if title is None:
|
if title is None:
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
@ -171,38 +177,44 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats = []
|
formats = []
|
||||||
for (key, format_id) in self._FORMATSv5:
|
for (key, format_id) in self._FORMATSv5:
|
||||||
video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page,
|
"""Verify format is available"""
|
||||||
'video info', flags=re.MULTILINE, fatal=False)
|
checkformat = self._search_regex(r'%s+":(.)' % key, embed_page,
|
||||||
if video_url:
|
'checkformat', default=None)
|
||||||
|
if checkformat is not None:
|
||||||
|
video_url = self._search_regex(r'%s+".{30}(.*?)"' % key, embed_page,
|
||||||
|
'video info', flags=re.MULTILINE)
|
||||||
video_url = video_url.replace("\\", "")
|
video_url = video_url.replace("\\", "")
|
||||||
if video_url is not None:
|
if video_url is not None:
|
||||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||||
if m_size is not None:
|
if m_size is not None:
|
||||||
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
||||||
else:
|
else:
|
||||||
width, height = None, None
|
width, height = None, None
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Unable to extract video URL from playerv5 page')
|
raise ExtractorError('Unable to extract video URL from playerv5 page')
|
||||||
v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page,
|
v5screenname = self._search_regex(r'screenname":"(.*?)"', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info-v5screenname', flags=re.MULTILINE, fatal=False)
|
||||||
v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page,
|
v5thumbnailurl = self._search_regex(r'poster_url":"(.*?)"', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info-v5thumbnailurl', flags=re.MULTILINE, fatal=False)
|
||||||
|
if v5thumbnailurl is not None:
|
||||||
|
v5thumbnailurl = v5thumbnailurl.replace("\\", "")
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = self._search_regex(r'video_views_count[^>]+>\s+([\d\. ]+)\s+views',
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
webpage, 'view count', fatal=False)
|
||||||
webpage, 'view count', fatal=False))
|
view_count = view_count.replace(" ", "")
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
title = self._og_search_title(webpage, default=None)
|
title = self._og_search_title(webpage, default=None)
|
||||||
if title is None:
|
if title is None:
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
|
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
|
||||||
'title')
|
'title')
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user