Merge branch 'fix.25.12.2018'
This commit is contained in:
commit
0317d16c78
@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
@ -218,6 +218,25 @@ class FacebookIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
|
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
|
||||||
'uploader': 'ESL One Dota 2',
|
'uploader': 'ESL One Dota 2',
|
||||||
|
'timestamp': 1527084179,
|
||||||
|
'upload_date': '20180523',
|
||||||
|
'uploader_id': '234218833769558',
|
||||||
|
'is_live': False
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# no timestamp
|
||||||
|
'url': 'https://www.facebook.com/SuperNewsGames/videos/642255722780473/',
|
||||||
|
'info_dict': {
|
||||||
|
'timestamp': 1521221400,
|
||||||
|
'uploader': 'Super News Games',
|
||||||
|
'uploader_id': '229550157384367',
|
||||||
|
'id': '642255722780473',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20180316',
|
||||||
|
'title': 'The Voice of Nick is trying Fortnite after 100 hours of PLAYERUNKNOWN\'S BATTL...',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -339,6 +358,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
video_data = extract_from_jsmods_instances(server_js_data)
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
|
|
||||||
|
tahoe_data = FacebookTahoeData(self, webpage, video_id)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
return webpage, False
|
return webpage, False
|
||||||
@ -349,36 +369,33 @@ class FacebookIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
elif '>You must log in to continue' in webpage:
|
elif '>You must log in to continue' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
# Video info not in first request, do a secondary request using
|
# Video info not in first request, do a secondary request using
|
||||||
# tahoe player specific URL
|
# tahoe player specific URL
|
||||||
tahoe_data = self._download_webpage(
|
|
||||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'__a': 1,
|
|
||||||
'__pc': self._search_regex(
|
|
||||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
|
||||||
'pkg cohort', default='PHASED:DEFAULT'),
|
|
||||||
'__rev': self._search_regex(
|
|
||||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
|
||||||
'client revision', default='3944515'),
|
|
||||||
'fb_dtsg': self._search_regex(
|
|
||||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
|
||||||
webpage, 'dtsg token', default=''),
|
|
||||||
}),
|
|
||||||
headers={
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
tahoe_js_data = self._parse_json(
|
tahoe_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
|
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data.primary,
|
||||||
'tahoe js data', default='{}'),
|
'tahoe js data', default='{}'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
|
|
||||||
video_data = extract_from_jsmods_instances(tahoe_js_data)
|
video_data = extract_from_jsmods_instances(tahoe_js_data)
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
|
is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary
|
||||||
|
is_live_stream = video_data[0].get('is_live_stream', False)
|
||||||
|
is_broadcast = video_data[0].get('is_broadcast', False)
|
||||||
|
|
||||||
|
live_status = 'not_live'
|
||||||
|
if is_broadcast:
|
||||||
|
live_status = 'completed'
|
||||||
|
if is_live_stream:
|
||||||
|
live_status = 'live'
|
||||||
|
if is_scheduled:
|
||||||
|
live_status = 'upcoming'
|
||||||
|
|
||||||
|
is_live = live_status == 'live'
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data:
|
for f in video_data:
|
||||||
format_id = f['stream_type']
|
format_id = f['stream_type']
|
||||||
@ -423,16 +440,35 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_title = 'Facebook video #%s' % video_id
|
video_title = 'Facebook video #%s' % video_id
|
||||||
uploader = clean_html(get_element_by_id(
|
uploader = clean_html(get_element_by_id(
|
||||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \
|
||||||
default=None) or self._og_search_title(webpage, fatal=False)
|
self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
|
r'\"ownerName\":"(.+?)"', tahoe_data.secondary,
|
||||||
|
'uploader_id', fatal=False)
|
||||||
|
|
||||||
|
|
||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
|
'timestamp', default=None) or self._search_regex(
|
||||||
|
r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
|
|
||||||
|
uploader_id = self._search_regex(
|
||||||
|
r'ownerid:"([\d]+)', webpage,
|
||||||
|
'uploader_id', default=None) or self._search_regex(
|
||||||
|
r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
|
||||||
|
'uploader_id', fatal=False)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
view_count = parse_count(self._search_regex(
|
view_count = parse_count(self._search_regex(
|
||||||
|
r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||||
|
default=None) or self._search_regex(
|
||||||
|
r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count',
|
||||||
|
default=None) or self._search_regex(
|
||||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||||
default=None))
|
default=None) or self._search_regex(
|
||||||
|
r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count',
|
||||||
|
default=None)
|
||||||
|
)
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -442,6 +478,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'is_live': is_live,
|
||||||
|
'live_status': live_status
|
||||||
}
|
}
|
||||||
|
|
||||||
return webpage, info_dict
|
return webpage, info_dict
|
||||||
@ -472,6 +511,54 @@ class FacebookIE(InfoExtractor):
|
|||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
|
class FacebookTahoeData:
|
||||||
|
def __init__(self, extractor, page, video_id):
|
||||||
|
self._page = page
|
||||||
|
self._video_id = video_id
|
||||||
|
self._extractor = extractor
|
||||||
|
self._data = {}
|
||||||
|
|
||||||
|
def _get_data(self, data_type):
|
||||||
|
if data_type in self._data:
|
||||||
|
data = self._data[data_type]
|
||||||
|
else:
|
||||||
|
req_data, headers = self._get_request_data_and_headers()
|
||||||
|
data = self._extractor._download_webpage(
|
||||||
|
self._extractor._VIDEO_PAGE_TAHOE_TEMPLATE % (self._video_id, data_type), self._video_id,
|
||||||
|
data=req_data,
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
return '' if not data else data
|
||||||
|
|
||||||
|
@property
|
||||||
|
def primary(self):
|
||||||
|
return self._get_data('primary')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def secondary(self):
|
||||||
|
return self._get_data('secondary')
|
||||||
|
|
||||||
|
def _get_request_data_and_headers(self):
|
||||||
|
tahoe_request_data = urlencode_postdata(
|
||||||
|
{
|
||||||
|
'__a': 1,
|
||||||
|
'__pc': self._extractor._search_regex(
|
||||||
|
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page,
|
||||||
|
'pkg cohort', default='PHASED:DEFAULT'),
|
||||||
|
'__rev': self._extractor._search_regex(
|
||||||
|
r'client_revision["\']\s*:\s*(\d+),', self._page,
|
||||||
|
'client revision', default='3944515'),
|
||||||
|
'fb_dtsg': self._extractor._search_regex(
|
||||||
|
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||||
|
self._page, 'dtsg token', default=''),
|
||||||
|
})
|
||||||
|
tahoe_request_headers = {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
|
||||||
|
return tahoe_request_data, tahoe_request_headers
|
||||||
|
|
||||||
|
|
||||||
class FacebookPluginsVideoIE(InfoExtractor):
|
class FacebookPluginsVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
||||||
|
|
||||||
|
@ -94,6 +94,21 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Video has not been found',
|
'skip': 'Video has not been found',
|
||||||
|
}, {
|
||||||
|
# live video
|
||||||
|
'url': 'https://www.ok.ru/video/1050794925929',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1050794925929',
|
||||||
|
'title': 're:^Поиск репертуара [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': u'20190428',
|
||||||
|
'uploader': u'(((((КнЯзЬ ))))',
|
||||||
|
'uploader_id': u'557343776873',
|
||||||
|
'is_live': True
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -131,8 +146,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'http://ok.ru/video/%s' % video_id, video_id)
|
'http://ok.ru/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
|
r'<div class="vp_video_stub_txt">(?P<error>.*?)<\/div>',
|
||||||
webpage, 'error', default=None)
|
webpage, name='error', group='error', default=None)
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
@ -176,6 +191,45 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
upload_date = unified_strdate(self._html_search_meta(
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
'ya:ovs:upload_date', webpage, 'upload date', default=None))
|
'ya:ovs:upload_date', webpage, 'upload date', default=None))
|
||||||
|
|
||||||
|
if upload_date is None:
|
||||||
|
upload_date_str = self._search_regex(
|
||||||
|
r'vp-layer-info_date">(?P<date>.*?)<\/span>',
|
||||||
|
webpage, 'upload date', group='date')
|
||||||
|
if upload_date_str:
|
||||||
|
upload_date_str = upload_date_str.replace('Sept', 'Sep')
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
upload_date_time = None
|
||||||
|
try:
|
||||||
|
upload_date_time = datetime.strptime(upload_date_str, '%d %b %Y')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
upload_date_time = datetime.strptime(upload_date_str, '%d %b')
|
||||||
|
upload_date_time = upload_date_time.replace(year=datetime.utcnow().year)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
upload_date_time = datetime.strptime(upload_date_str, '%d %B')
|
||||||
|
upload_date_time = upload_date_time.replace(year=datetime.utcnow().year)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
if upload_date_str.find(':') >=0:
|
||||||
|
hour_and_minutes = upload_date_str.split(' ')[-1]
|
||||||
|
else:
|
||||||
|
hour_and_minutes = upload_date_str
|
||||||
|
upload_date_time = datetime.strptime(hour_and_minutes, '%H:%M')
|
||||||
|
upload_date_time = upload_date_time.replace(year=datetime.utcnow().year)
|
||||||
|
upload_date_time = upload_date_time.replace(day=datetime.utcnow().day)
|
||||||
|
if upload_date_str.find('yesterday') ==0:
|
||||||
|
upload_date_time = upload_date_time - timedelta(days=1)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if upload_date_time:
|
||||||
|
upload_date = upload_date_time.strftime('%Y%m%d')
|
||||||
|
|
||||||
age_limit = None
|
age_limit = None
|
||||||
adult = self._html_search_meta(
|
adult = self._html_search_meta(
|
||||||
'ya:ovs:adult', webpage, 'age limit', default=None)
|
'ya:ovs:adult', webpage, 'age limit', default=None)
|
||||||
@ -207,6 +261,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
assert title
|
assert title
|
||||||
if provider == 'LIVE_TV_APP':
|
if provider == 'LIVE_TV_APP':
|
||||||
info['title'] = self._live_title(title)
|
info['title'] = self._live_title(title)
|
||||||
|
info['is_live'] = True
|
||||||
|
|
||||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||||
|
|
||||||
|
@ -397,6 +397,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'timestamp': 1324343742,
|
'timestamp': 1324343742,
|
||||||
'upload_date': '20111220',
|
'upload_date': '20111220',
|
||||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -693,12 +694,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'timestamp', default=None)
|
'timestamp', default=None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
|
# When userInteractionCount does not exist views is 0
|
||||||
|
view_count = int_or_none(
|
||||||
|
self._search_regex(
|
||||||
|
r'"interactionType":"http:\/\/schema\.org\/WatchAction","userInteractionCount":(.+?)}',
|
||||||
|
webpage, 'view count', default=0
|
||||||
|
)
|
||||||
|
)
|
||||||
like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
|
like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
|
||||||
comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
|
comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
# This info is only available in vimeo.com/{id} urls
|
# This info is only available in vimeo.com/{id} urls
|
||||||
view_count = None
|
|
||||||
like_count = None
|
like_count = None
|
||||||
comment_count = None
|
comment_count = None
|
||||||
|
|
||||||
|
@ -282,7 +282,13 @@ class VKIE(VKBaseIE):
|
|||||||
# The video is not available in your region.
|
# The video is not available in your region.
|
||||||
'url': 'https://vk.com/video-51812607_171445436',
|
'url': 'https://vk.com/video-51812607_171445436',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
},
|
||||||
|
{
|
||||||
|
# Video %s is not available.
|
||||||
|
'url': 'https://vk.com/video-173478245_456239188',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -345,6 +351,9 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
r'<!>The video .+? is not available in your region.':
|
r'<!>The video .+? is not available in your region.':
|
||||||
'Video %s is not available in your region.',
|
'Video %s is not available in your region.',
|
||||||
|
|
||||||
|
r'<!>The video .+? is unavailable':
|
||||||
|
'Video %s is not available.',
|
||||||
}
|
}
|
||||||
|
|
||||||
for error_re, error_msg in ERRORS.items():
|
for error_re, error_msg in ERRORS.items():
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2019.06.08'
|
__version__ = 'vc.2019.06.08'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user