Merge pull request #211 from ytdl-org/master

[pull] master from ytdl-org:master
This commit is contained in:
pull[bot] 2019-12-31 16:39:23 +00:00 committed by GitHub
commit 5d2f416a86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 24 deletions

View File

@ -588,11 +588,15 @@ class BrightcoveNewIE(AdobePassIE):
policy_key_id = '%s_%s' % (account_id, player_id) policy_key_id = '%s_%s' % (account_id, player_id)
policy_key = self._downloader.cache.load('brightcove', policy_key_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id)
if not policy_key: policy_key_extracted = False
def extract_policy_key():
webpage = self._download_webpage( webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js' 'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id) % (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex( catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None) r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog: if catalog:
@ -605,28 +609,38 @@ class BrightcoveNewIE(AdobePassIE):
policy_key = self._search_regex( policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
self._downloader.cache.store('brightcove', policy_key_id, policy_key) self._downloader.cache.store('brightcove', policy_key_id, policy_key)
return policy_key
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
headers = { headers = {}
'Accept': 'application/json;pk=%s' % policy_key,
}
referrer = smuggled_data.get('referrer') referrer = smuggled_data.get('referrer')
if referrer: if referrer:
headers.update({ headers.update({
'Referer': referrer, 'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0), 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
}) })
try:
json_data = self._download_json(api_url, video_id, headers=headers) for _ in range(2):
except ExtractorError as e: if not policy_key:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: policy_key = extract_policy_key()
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] policy_key_extracted = True
message = json_data.get('message') or json_data['error_code'] headers['Accept'] = 'application/json;pk=%s' % policy_key
if json_data.get('error_subcode') == 'CLIENT_GEO': try:
self.raise_geo_restricted(msg=message) json_data = self._download_json(api_url, video_id, headers=headers)
raise ExtractorError(message, expected=True) break
raise except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
policy_key = None
continue
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors') errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH': if errors and errors[0].get('error_subcode') == 'TVE_AUTH':

View File

@ -227,12 +227,13 @@ class PornHubIE(PornHubBaseIE):
else: else:
thumbnail, duration = [None] * 2 thumbnail, duration = [None] * 2
if not video_urls: def extract_js_vars(webpage, pattern, fatal=True):
tv_webpage = dl_webpage('tv')
assignments = self._search_regex( assignments = self._search_regex(
r'(var.+?mediastring.+?)</script>', tv_webpage, pattern, webpage, 'encoded url', fatal=fatal)
'encoded url').split(';') if not assignments:
return {}
assignments = assignments.split(';')
js_vars = {} js_vars = {}
@ -254,11 +255,31 @@ class PornHubIE(PornHubBaseIE):
assn = re.sub(r'var\s+', '', assn) assn = re.sub(r'var\s+', '', assn)
vname, value = assn.split('=', 1) vname, value = assn.split('=', 1)
js_vars[vname] = parse_js_value(value) js_vars[vname] = parse_js_value(value)
return js_vars
video_url = js_vars['mediastring'] def add_video_url(video_url):
if video_url not in video_urls_set: v_url = url_or_none(video_url)
video_urls.append((video_url, None)) if not v_url:
video_urls_set.add(video_url) return
if v_url in video_urls_set:
return
video_urls.append((v_url, None))
video_urls_set.add(v_url)
if not video_urls:
FORMAT_PREFIXES = ('media', 'quality')
js_vars = extract_js_vars(
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
fatal=False)
if js_vars:
for key, format_url in js_vars.items():
if any(key.startswith(p) for p in FORMAT_PREFIXES):
add_video_url(format_url)
if not video_urls:
js_vars = extract_js_vars(
dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
add_video_url(js_vars['mediastring'])
for mobj in re.finditer( for mobj in re.finditer(
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
@ -276,10 +297,16 @@ class PornHubIE(PornHubBaseIE):
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date: if upload_date:
upload_date = upload_date.replace('/', '') upload_date = upload_date.replace('/', '')
if determine_ext(video_url) == 'mpd': ext = determine_ext(video_url)
if ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
continue continue
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
tbr = None tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj: if mobj: