[infoq] Probe if audio URL is valid
Make it possible to pass headers to _is_valid_url
This commit is contained in:
parent
11a8f1679b
commit
00d8d755fc
@ -1015,13 +1015,13 @@ class InfoExtractor(object):
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return True
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
|
@ -73,22 +73,28 @@ class InfoQIE(BokeCCBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage):
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
# so probe URL to make sure
|
||||
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||
return []
|
||||
|
||||
return [{
|
||||
'format_id': 'http_audio',
|
||||
'url': http_audio_url,
|
||||
'vcodec': 'none',
|
||||
'http_headers': {
|
||||
'Cookie': self._extract_cookies(webpage)
|
||||
},
|
||||
'http_headers': cookies_header,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -105,7 +111,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage) +
|
||||
self._extract_http_video(webpage) +
|
||||
self._extract_http_audio(webpage))
|
||||
self._extract_http_audio(webpage, video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user