Detect the cloudflare challenge
and if cfscrape
is available, try to solve it.
This commit is contained in:
parent
93bb6b1bae
commit
ba2623208c
@ -52,6 +52,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
GeoUtils,
|
GeoUtils,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
JSON_LD_RE,
|
JSON_LD_RE,
|
||||||
@ -66,6 +67,7 @@ from ..utils import (
|
|||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
std_headers,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -79,6 +81,11 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
import cfscrape
|
||||||
|
cfscrape_available = True
|
||||||
|
except ImportError:
|
||||||
|
cfscrape_available = False
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
@ -625,6 +632,26 @@ class InfoExtractor(object):
|
|||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
if isinstance(err, compat_urllib_error.HTTPError) and not isinstance(url_or_request, HEADRequest):
|
||||||
|
if err.code == 503 and err.headers.get('Server').startswith('cloudflare'):
|
||||||
|
if not cfscrape_available:
|
||||||
|
raise ExtractorError('Cloudflare challenge found. Provide cookies or install cfscrape.', expected=True)
|
||||||
|
else:
|
||||||
|
self.to_screen('Solving Cloudflare challenge (~7s)')
|
||||||
|
scraper = cfscrape.create_scraper()
|
||||||
|
cookies = dict((cookie.name, cookie.value) for cookie in self._downloader.cookiejar)
|
||||||
|
try:
|
||||||
|
tokens = scraper.get_tokens(err.geturl(), std_headers['User-Agent'], cookies=cookies)
|
||||||
|
except ValueError as e:
|
||||||
|
raise ExtractorError('cfscrape error: %s' % e, expected=True)
|
||||||
|
cookie = url_or_request.get_header('Cookie')
|
||||||
|
cookie += '; cf_clearance=' + tokens[0]['cf_clearance']
|
||||||
|
url_or_request = update_Request(url_or_request, headers={'Cookie': cookie})
|
||||||
|
self.to_screen('Redownload webpage')
|
||||||
|
try:
|
||||||
|
return self._downloader.urlopen(url_or_request)
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as new_err:
|
||||||
|
err = new_err
|
||||||
if isinstance(err, compat_urllib_error.HTTPError):
|
if isinstance(err, compat_urllib_error.HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
# Retain reference to error to prevent file object from
|
# Retain reference to error to prevent file object from
|
||||||
|
Loading…
x
Reference in New Issue
Block a user