diff --git a/youtube_dl/extractor/cba.py b/youtube_dl/extractor/cba.py
index c87171e2b..f291b2da4 100644
--- a/youtube_dl/extractor/cba.py
+++ b/youtube_dl/extractor/cba.py
@@ -8,7 +8,6 @@ from .common import InfoExtractor
 from ..utils import (
     clean_html,
     ExtractorError,
-    strip_bom_utf8,
     RegexNotFoundError,
     UnavailableVideoError,
     update_url_query,
@@ -59,8 +58,7 @@ class CBAIE(InfoExtractor):
         description = ''
         formats = []
 
-        posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint',
-                                           'unable to query posts api-endpoint', transform_source=strip_bom_utf8)
+        posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint', 'unable to query posts api-endpoint')
         try:
             title = clean_html(posts_result['title']['rendered'])
             description = clean_html(posts_result['content']['rendered'])
@@ -73,7 +71,7 @@ class CBAIE(InfoExtractor):
             api_media_url = update_url_query(api_media_url, {'c': self._API_KEY})
 
         media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str,
-                                         'unable to qeury media api-endpoint%s' % api_key_str, transform_source=strip_bom_utf8)
+                                           'unable to qeury media api-endpoint%s' % api_key_str)
         for media in media_result:
             try:
                 url = media['source_url']
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index da0af29ec..f46857523 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -57,6 +57,7 @@ from ..utils import (
     parse_m3u8_attributes,
     extract_attributes,
     parse_codecs,
+    parse_strip_bom,
 )
 
 
@@ -438,6 +439,10 @@ class InfoExtractor(object):
     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
         content_type = urlh.headers.get('Content-Type', '')
         webpage_bytes = urlh.read()
+        webpage_bytes, bom_enc = parse_strip_bom(webpage_bytes)
+        if not encoding:
+            encoding = bom_enc
+
         if prefix is not None:
             webpage_bytes = prefix + webpage_bytes
         if not encoding:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1d3d6600c..3dea36635 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2235,8 +2235,8 @@ def age_restricted(content_limit, age_limit):
     return age_limit < content_limit
 
 
-def is_html(first_bytes):
-    """ Detect whether a file contains HTML by examining its first bytes. """
+def parse_strip_bom(data):
+    """ try to find Unicode BOM and strip it. """
 
     BOMS = [
         (b'\xef\xbb\xbf', 'utf-8'),
@@ -2246,12 +2246,20 @@ def is_html(first_bytes):
         (b'\xfe\xff', 'utf-16-be'),
     ]
     for bom, enc in BOMS:
-        if first_bytes.startswith(bom):
-            s = first_bytes[len(bom):].decode(enc, 'replace')
-            break
+        if data.startswith(bom):
+            return data[len(bom):], enc
     else:
-        s = first_bytes.decode('utf-8', 'replace')
+        return data, None
 
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    first_bytes, enc = parse_strip_bom(first_bytes)
+    if enc == None:
+        enc = 'utf-8'
+
+    s = first_bytes.decode(enc, 'replace')
     return re.match(r'^\s*<', s)
 
 
@@ -3121,11 +3129,3 @@ def decode_png(png_data):
             current_row.append(color)
 
     return width, height, pixels
-
-
-def strip_bom_utf8(s):
-    BOM_UTF8 = u'\ufeff'
-    if s.startswith(BOM_UTF8):
-        return s[len(BOM_UTF8):]
-
-    return s