From e6e9e1f45bc28c597ec43591fbb5bbd25b668720 Mon Sep 17 00:00:00 2001 From: sulyi Date: Mon, 21 Nov 2016 11:39:53 +0100 Subject: [PATCH 1/3] [utils] Fixing runaway comments in js_to_json --- youtube_dl/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bda59e627..bcea188a3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2109,14 +2109,11 @@ def js_to_json(code): return '"%s"' % v - # fixing , followed nothing, but comments - # fixing unnecessary ? in /\*.*?\*/ - # fixing greedy comment return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - ,(?=(\s|(/\*((?!\*/)\n|.)*\*/))*[\]}])| - /\*((?!\*/)\n|.)*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| + ,(?=(\s|(/\*((?!\*/)(\n|.))*\*/))*[\]}])| + /\*((?!\*/)(\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From 321045a3d56e08879076b3c5cdb0f86bd1e43237 Mon Sep 17 00:00:00 2001 From: sulyi Date: Mon, 21 Nov 2016 11:44:07 +0100 Subject: [PATCH 2/3] [utils] Using non capturing groups in js_to_json --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcea188a3..548ce12e2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2112,8 +2112,8 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - ,(?=(\s|(/\*((?!\*/)(\n|.))*\*/))*[\]}])| - /\*((?!\*/)(\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| + ,(?=(?:\s|(?:/\*(?:(?!\*/)(?:\n|.))*\*/))*[\]}])| + /\*(?:(?!\*/)(?:\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From d88d4535be1d7975346b54f4fffa138a51a98d5f Mon Sep 17 00:00:00 2001 From: sulyi Date: Mon, 21 Nov 2016 11:44:55 +0100 Subject: [PATCH 3/3] [utils] Adding test_js_to_json_landofoz --- test/test_utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 2e3cd0179..c9a3a851d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -743,6 +743,26 @@ class TestUtil(unittest.TestCase): inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + def test_js_to_json_landofoz(self): + inp = '''{ + character: { + name: "Dorothy", + pet: "Toto", + /* source: "Kansas", + destination: "Emerald City", + roll: "heroine" */ + }, + comment: /* over the rainbow */ "/*", + no_comment: "*/" + }''' + self.assertEqual(js_to_json(inp), '''{ + "character": { + "name": "Dorothy", + "pet": "Toto"\n \n }, + "comment": "/*", + "no_comment": "*/" + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})