Fixed invalid control character error

This commit is contained in:
Enes 2019-05-06 23:43:08 +03:00
parent fbf0c21b33
commit 29e7569801

View File

@ -2,9 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
merge_dicts, merge_dicts,
try_get, try_get,
) )
@ -21,11 +23,14 @@ class KanaldBaseIE(InfoExtractor):
'id': video_id, 'id': video_id,
} }
"""FIXME: https://www.kanald.com.tr/kuzeyguney/80-bolum-izle/19364 -> Invalid control character at: line 5 column 146 (char 255)"""
search_json_ld = self._search_regex( search_json_ld = self._search_regex(
r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?:\s+)?(?P<json_ld>{[^<]+VideoObject[^<]+})(?:\s+)?</script>', webpage, 'JSON-LD', group='json_ld') r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?:\s+)?(?P<json_ld>{[^<]+VideoObject[^<]+})(?:\s+)?</script>', webpage, 'JSON-LD', group='json_ld')
json_ld = self._parse_json(search_json_ld, video_id)
# https://stackoverflow.com/questions/22394235/invalid-control-character-with-python-json-loads
try:
json_ld = json.loads(search_json_ld, strict=False)
except ValueError as ve:
raise ExtractorError('%s: Failed to parse JSON ' % video_id, cause=ve)
if not re.match(r'dogannet\.tv', json_ld['contentUrl']): if not re.match(r'dogannet\.tv', json_ld['contentUrl']):
json_ld.update({ json_ld.update({