From 3fde270c23b78712bf99d5eba52ea9d9453f4ddc Mon Sep 17 00:00:00 2001 From: Mike Gering Date: Tue, 19 May 2020 09:51:22 -0400 Subject: [PATCH 1/3] Fix for issue #25311, redtube json error --- youtube_dl/extractor/redtube.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index b1bde1e81..ae63de70b 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -75,10 +76,15 @@ class RedTubeIE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id), }) + # find the mediaDefinitions string that is json-parsable + mobj = re.search(r'mediaDefinition\s*:\s*(\[.+?\])', webpage) + doc1 = webpage[mobj.start(1):] + try: + x = json.loads(doc1) + except json.JSONDecodeError as exc: + doc1 = doc1[0:exc.pos] medias = self._parse_json( - self._search_regex( - r'mediaDefinition\s*:\s*(\[.+?\])', webpage, - 'media definitions', default='{}'), + doc1, video_id, fatal=False) if medias and isinstance(medias, list): for media in medias: From 799a1bba9216b416826c905268262eb88aec9610 Mon Sep 17 00:00:00 2001 From: Mike Gering Date: Tue, 19 May 2020 10:09:02 -0400 Subject: [PATCH 2/3] flake8 compliance --- youtube_dl/extractor/redtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index ae63de70b..00bd95bdb 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -80,7 +80,7 @@ class RedTubeIE(InfoExtractor): mobj = re.search(r'mediaDefinition\s*:\s*(\[.+?\])', webpage) doc1 = webpage[mobj.start(1):] try: - x = json.loads(doc1) + json.loads(doc1) except json.JSONDecodeError as exc: doc1 = doc1[0:exc.pos] medias = self._parse_json( From e92cca27bf4b7b4e2bd2d9fed2dcc62e9134a97e Mon Sep 17 00:00:00 2001 From: Mike Gering Date: Tue, 19 May 2020 10:21:23 -0400 Subject: [PATCH 3/3] better docs --- youtube_dl/extractor/redtube.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 00bd95bdb..38816c98e 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -76,12 +76,22 @@ class RedTubeIE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id), }) - # find the mediaDefinitions string that is json-parsable + # Find the mediaDefinitions string that is json-parsable + # Note: This regex pattern does not necessarily match + # the complete json expression; the complete json may + # extend beyond the matching ']'. Use this regex pattern + # to find the start of the json expression. We don't yet + # know where the json expression ends. mobj = re.search(r'mediaDefinition\s*:\s*(\[.+?\])', webpage) - doc1 = webpage[mobj.start(1):] + doc1 = webpage[mobj.start(1):] # get json plus remaining html try: + # Use the json decoder to find the end of the json + # expression. The decoder will raise an exception when it + # goes past the valid part. json.loads(doc1) except json.JSONDecodeError as exc: + # Use the exception 'pos' attribute to get the complete + # and valid json expression doc1 = doc1[0:exc.pos] medias = self._parse_json( doc1,