[infoq] Probe if audio URL is valid
Make it possible to pass headers to _is_valid_url
This commit is contained in:
parent
11a8f1679b
commit
00d8d755fc
@ -1015,13 +1015,13 @@ class InfoExtractor(object):
|
|||||||
unique_formats.append(f)
|
unique_formats.append(f)
|
||||||
formats[:] = unique_formats
|
formats[:] = unique_formats
|
||||||
|
|
||||||
def _is_valid_url(self, url, video_id, item='video'):
|
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||||
url = self._proto_relative_url(url, scheme='http:')
|
url = self._proto_relative_url(url, scheme='http:')
|
||||||
# For now assume non HTTP(S) URLs always valid
|
# For now assume non HTTP(S) URLs always valid
|
||||||
if not (url.startswith('http://') or url.startswith('https://')):
|
if not (url.startswith('http://') or url.startswith('https://')):
|
||||||
return True
|
return True
|
||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||||
|
@ -73,22 +73,28 @@ class InfoQIE(BokeCCBaseIE):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_http_audio(self, webpage):
|
def _extract_http_audio(self, webpage, video_id):
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
http_audio_url = fields['filename']
|
http_audio_url = fields['filename']
|
||||||
if http_audio_url is None:
|
if http_audio_url is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||||
|
|
||||||
# base URL is found in the Location header in the response returned by
|
# base URL is found in the Location header in the response returned by
|
||||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||||
|
|
||||||
|
# audio file seem to be missing some times even if there is a download link
|
||||||
|
# so probe URL to make sure
|
||||||
|
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||||
|
return []
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'format_id': 'http_audio',
|
'format_id': 'http_audio',
|
||||||
'url': http_audio_url,
|
'url': http_audio_url,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'http_headers': {
|
'http_headers': cookies_header,
|
||||||
'Cookie': self._extract_cookies(webpage)
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -105,7 +111,7 @@ class InfoQIE(BokeCCBaseIE):
|
|||||||
formats = (
|
formats = (
|
||||||
self._extract_rtmp_video(webpage) +
|
self._extract_rtmp_video(webpage) +
|
||||||
self._extract_http_video(webpage) +
|
self._extract_http_video(webpage) +
|
||||||
self._extract_http_audio(webpage))
|
self._extract_http_audio(webpage, video_id))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user