Revert "Fixed invalid control character error"
This commit is contained in:
parent
399ffa8f29
commit
fcf5e0ac1c
@ -2,11 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -23,14 +21,11 @@ class KanaldBaseIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
"""FIXME: https://www.kanald.com.tr/kuzeyguney/80-bolum-izle/19364 -> Invalid control character at: line 5 column 146 (char 255)"""
|
||||||
|
|
||||||
search_json_ld = self._search_regex(
|
search_json_ld = self._search_regex(
|
||||||
r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?:\s+)?(?P<json_ld>{[^<]+VideoObject[^<]+})(?:\s+)?</script>', webpage, 'JSON-LD', group='json_ld')
|
r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?:\s+)?(?P<json_ld>{[^<]+VideoObject[^<]+})(?:\s+)?</script>', webpage, 'JSON-LD', group='json_ld')
|
||||||
|
json_ld = self._parse_json(search_json_ld, video_id)
|
||||||
# https://stackoverflow.com/questions/22394235/invalid-control-character-with-python-json-loads
|
|
||||||
try:
|
|
||||||
json_ld = json.loads(search_json_ld, strict=False)
|
|
||||||
except ValueError as ve:
|
|
||||||
raise ExtractorError('%s: Failed to parse JSON ' % video_id, cause=ve)
|
|
||||||
|
|
||||||
if not re.match(r'dogannet\.tv', json_ld['contentUrl']):
|
if not re.match(r'dogannet\.tv', json_ld['contentUrl']):
|
||||||
json_ld.update({
|
json_ld.update({
|
||||||
@ -98,7 +93,7 @@ class KanaldEmbedIE(KanaldBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class KanaldSerieIE(InfoExtractor):
|
class KanaldSerieIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?kanald\.com\.tr/(?P<id>[a-zA-Z0-9-]+)/(?:bolum|bolumler)'
|
_VALID_URL = r'https?://(?:www\.)?kanald\.com\.tr/(?P<id>[a-zA-Z0-9-]+)/(?:bolum|bolumler)$'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.kanald.com.tr/kuzeyguney/bolum',
|
'url': 'https://www.kanald.com.tr/kuzeyguney/bolum',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user