[infoq] Probe if audio URL is valid

Make it possible to pass headers to _is_valid_url
This commit is contained in:
Mattias Wadman 2017-01-23 20:43:42 +01:00
parent 11a8f1679b
commit 00d8d755fc
2 changed files with 13 additions and 7 deletions

View File

@ -1015,13 +1015,13 @@ class InfoExtractor(object):
unique_formats.append(f)
formats[:] = unique_formats
def _is_valid_url(self, url, video_id, item='video'):
def _is_valid_url(self, url, video_id, item='video', headers={}):
url = self._proto_relative_url(url, scheme='http:')
# For now assume non HTTP(S) URLs always valid
if not (url.startswith('http://') or url.startswith('https://')):
return True
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item)
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True
except ExtractorError as e:
if isinstance(e.cause, compat_urllib_error.URLError):

View File

@ -73,22 +73,28 @@ class InfoQIE(BokeCCBaseIE):
},
}]
def _extract_http_audio(self, webpage):
def _extract_http_audio(self, webpage, video_id):
fields = self._hidden_inputs(webpage)
http_audio_url = fields['filename']
if http_audio_url is None:
return []
cookies_header = {'Cookie': self._extract_cookies(webpage)}
# base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
# audio file seem to be missing some times even if there is a download link
# so probe URL to make sure
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
return []
return [{
'format_id': 'http_audio',
'url': http_audio_url,
'vcodec': 'none',
'http_headers': {
'Cookie': self._extract_cookies(webpage)
},
'http_headers': cookies_header,
}]
def _real_extract(self, url):
@ -105,7 +111,7 @@ class InfoQIE(BokeCCBaseIE):
formats = (
self._extract_rtmp_video(webpage) +
self._extract_http_video(webpage) +
self._extract_http_audio(webpage))
self._extract_http_audio(webpage, video_id))
self._sort_formats(formats)