reverted commit 7f4b465 in favor of much easier fix: using utf-8-sig for encoding
This commit is contained in:
parent
2ac405397e
commit
a72b74c6f9
@ -50,7 +50,8 @@ class CBAIE(InfoExtractor):
|
|||||||
description = ''
|
description = ''
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', 'unable to query posts api-endpoint')
|
posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint',
|
||||||
|
'unable to query posts api-endpoint', encoding='utf-8-sig')
|
||||||
try:
|
try:
|
||||||
title = posts_result['title']['raw']
|
title = posts_result['title']['raw']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@ -71,7 +72,7 @@ class CBAIE(InfoExtractor):
|
|||||||
api_media_url = update_url_query(api_media_url, {'c': self._API_KEY})
|
api_media_url = update_url_query(api_media_url, {'c': self._API_KEY})
|
||||||
|
|
||||||
media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str,
|
media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str,
|
||||||
'unable to qeury media api-endpoint%s' % api_key_str)
|
'unable to qeury media api-endpoint%s' % api_key_str, encoding='utf-8-sig')
|
||||||
for media in media_result:
|
for media in media_result:
|
||||||
try:
|
try:
|
||||||
url = media['source_url']
|
url = media['source_url']
|
||||||
|
@ -57,7 +57,6 @@ from ..utils import (
|
|||||||
parse_m3u8_attributes,
|
parse_m3u8_attributes,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_strip_bom,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -439,10 +438,6 @@ class InfoExtractor(object):
|
|||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
webpage_bytes, bom_enc = parse_strip_bom(webpage_bytes)
|
|
||||||
if not encoding:
|
|
||||||
encoding = bom_enc
|
|
||||||
|
|
||||||
if prefix is not None:
|
if prefix is not None:
|
||||||
webpage_bytes = prefix + webpage_bytes
|
webpage_bytes = prefix + webpage_bytes
|
||||||
if not encoding:
|
if not encoding:
|
||||||
|
@ -2235,8 +2235,8 @@ def age_restricted(content_limit, age_limit):
|
|||||||
return age_limit < content_limit
|
return age_limit < content_limit
|
||||||
|
|
||||||
|
|
||||||
def parse_strip_bom(data):
|
def is_html(first_bytes):
|
||||||
""" try to find Unicode BOM and strip it. """
|
""" Detect whether a file contains HTML by examining its first bytes. """
|
||||||
|
|
||||||
BOMS = [
|
BOMS = [
|
||||||
(b'\xef\xbb\xbf', 'utf-8'),
|
(b'\xef\xbb\xbf', 'utf-8'),
|
||||||
@ -2246,20 +2246,12 @@ def parse_strip_bom(data):
|
|||||||
(b'\xfe\xff', 'utf-16-be'),
|
(b'\xfe\xff', 'utf-16-be'),
|
||||||
]
|
]
|
||||||
for bom, enc in BOMS:
|
for bom, enc in BOMS:
|
||||||
if data.startswith(bom):
|
if first_bytes.startswith(bom):
|
||||||
return data[len(bom):], enc
|
s = first_bytes[len(bom):].decode(enc, 'replace')
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
return data, None
|
s = first_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
|
||||||
def is_html(first_bytes):
|
|
||||||
""" Detect whether a file contains HTML by examining its first bytes. """
|
|
||||||
|
|
||||||
first_bytes, enc = parse_strip_bom(first_bytes)
|
|
||||||
if enc == None:
|
|
||||||
enc = 'utf-8'
|
|
||||||
|
|
||||||
s = first_bytes.decode(enc, 'replace')
|
|
||||||
return re.match(r'^\s*<', s)
|
return re.match(r'^\s*<', s)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user