diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 9553f82d6..5e0c4bc3e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -588,11 +588,15 @@ class BrightcoveNewIE(AdobePassIE): policy_key_id = '%s_%s' % (account_id, player_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id) - if not policy_key: + policy_key_extracted = False + + def extract_policy_key(): webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id) + policy_key = None + catalog = self._search_regex( r'catalog\(({.+?})\);', webpage, 'catalog', default=None) if catalog: @@ -605,28 +609,38 @@ class BrightcoveNewIE(AdobePassIE): policy_key = self._search_regex( r'policyKey\s*:\s*(["\'])(?P.+?)\1', webpage, 'policy key', group='pk') + self._downloader.cache.store('brightcove', policy_key_id, policy_key) + return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } + headers = {} referrer = smuggled_data.get('referrer') if referrer: headers.update({ 'Referer': referrer, 'Origin': re.search(r'https?://[^/]+', referrer).group(0), }) - try: - json_data = self._download_json(api_url, video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise + + for _ in range(2): + if not policy_key: + policy_key = extract_policy_key() + policy_key_extracted = True + headers['Accept'] = 'application/json;pk=%s' % policy_key + try: + json_data = self._download_json(api_url, video_id, headers=headers) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: + policy_key = None + continue + raise ExtractorError(message, expected=True) + raise errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH': diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ba0ad7da2..75ed69cde 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -227,12 +227,13 @@ class PornHubIE(PornHubBaseIE): else: thumbnail, duration = [None] * 2 - if not video_urls: - tv_webpage = dl_webpage('tv') - + def extract_js_vars(webpage, pattern, fatal=True): assignments = self._search_regex( - r'(var.+?mediastring.+?)', tv_webpage, - 'encoded url').split(';') + pattern, webpage, 'encoded url', fatal=fatal) + if not assignments: + return {} + + assignments = assignments.split(';') js_vars = {} @@ -254,11 +255,31 @@ class PornHubIE(PornHubBaseIE): assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) + return js_vars - video_url = js_vars['mediastring'] - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) + def add_video_url(video_url): + v_url = url_or_none(video_url) + if not v_url: + return + if v_url in video_urls_set: + return + video_urls.append((v_url, None)) + video_urls_set.add(v_url) + + if not video_urls: + FORMAT_PREFIXES = ('media', 'quality') + js_vars = extract_js_vars( + webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + fatal=False) + if js_vars: + for key, format_url in js_vars.items(): + if any(key.startswith(p) for p in FORMAT_PREFIXES): + add_video_url(format_url) + + if not video_urls: + js_vars = extract_js_vars( + dl_webpage('tv'), r'(var.+?mediastring.+?)') + add_video_url(js_vars['mediastring']) for mobj in re.finditer( r']+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P(?:(?!\1).)+)\1', @@ -276,10 +297,16 @@ class PornHubIE(PornHubBaseIE): r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') - if determine_ext(video_url) == 'mpd': + ext = determine_ext(video_url) + if ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue tbr = None mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url) if mobj: