From fef3a29f92bb1cda571d66c7953200a125a35b46 Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Mon, 10 Apr 2017 22:47:24 +0200 Subject: [PATCH 1/4] [GameStar] handle JSON containing control chars --- youtube_dl/extractor/common.py | 4 ++-- youtube_dl/extractor/gamestar.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ae8af61de..6b46db357 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -665,11 +665,11 @@ class InfoExtractor(object): return self._parse_json( json_string, video_id, transform_source=transform_source, fatal=fatal) - def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): + def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, strict=True): if transform_source: json_string = transform_source(json_string) try: - return json.loads(json_string) + return json.loads(json_string, strict=strict) except ValueError as ve: errmsg = '%s: Failed to parse JSON ' % video_id if fatal: diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index e607d6ab8..f9da04342 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -35,7 +35,7 @@ class GameStarIE(InfoExtractor): # while _search_json_ld finds only the first one json_ld = self._parse_json(self._search_regex( r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P[^<]+VideoObject[^<]+)', - webpage, 'JSON-LD', group='json_ld'), video_id) + webpage, 'JSON-LD', group='json_ld'), video_id, strict=False) info_dict = self._json_ld(json_ld, video_id) info_dict['title'] = remove_end(info_dict['title'], ' - GameStar') From e1e9624d1f973a0637c1fcece4532e5bbf1997c0 Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Mon, 10 Apr 2017 23:08:37 +0200 Subject: [PATCH 2/4] [GameStar] add test for JSON containing control chars --- youtube_dl/extractor/gamestar.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index f9da04342..5c412c4d8 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -10,7 +10,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P[0-9]+)\.html' - _TEST = { + _TESTS = [{ 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', 'info_dict': { @@ -22,8 +22,22 @@ class GameStarIE(InfoExtractor): 'timestamp': 1406542020, 'upload_date': '20140728', 'duration': 17 - } - } + }, + }, { + # control characters in JSON-LD (description field) + 'url': 'http://www.gamestar.de/videos/rain-world-gameplay-trailer-stellt-das-sandbox-spiel-vor,92640.html', + 'md5': '97e530c2c4e3d0d666e039f675656071', + 'info_dict': { + 'id': '92640', + 'ext': 'mp4', + 'title': 'Rain World - Gameplay-Trailer stellt das Sandbox-Spiel vor', + 'description': 'Der Trailer zum Sandbox-Spiel\xa0Rain World stellt anhand von Gameplay-Szenen die wichtigsten Elemente etwas genauer vor.\nDer Spieler...', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1490697540, + 'upload_date': '20170328', + 'duration': 72 + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From 7bff1e658431a0a8ad7c2b748fe2b0a4a8b9a97e Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Mon, 10 Apr 2017 23:20:34 +0200 Subject: [PATCH 3/4] [GameStar] fix comment_count extraction --- youtube_dl/extractor/gamestar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 5c412c4d8..1c729850d 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -55,8 +55,8 @@ class GameStarIE(InfoExtractor): view_count = json_ld.get('interactionCount') comment_count = int_or_none(self._html_search_regex( - r'([0-9]+) Kommentare', webpage, 'comment_count', - fatal=False)) + r'([0-9]+)', + webpage, 'comment_count', fatal=False)) info_dict.update({ 'id': video_id, From 66c31c84860c513d4bffb516e48a1f8085428f92 Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Tue, 11 Apr 2017 00:15:58 +0200 Subject: [PATCH 4/4] [GameStar] make comment_count regular expression flexible --- youtube_dl/extractor/gamestar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 1c729850d..bf0c28a9f 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -55,8 +55,8 @@ class GameStarIE(InfoExtractor): view_count = json_ld.get('interactionCount') comment_count = int_or_none(self._html_search_regex( - r'([0-9]+)', - webpage, 'comment_count', fatal=False)) + r']+class=(["\'])[a-zA-Z0-9_\- ]*comment-text[a-zA-Z0-9_\- ]*\1[^>]*>]*>(?P[0-9]+)', + webpage, 'comment_count', group='comment_count', fatal=False)) info_dict.update({ 'id': video_id,