From 3e4207a7bae918bea0b9b9edbea37588773b74fc Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:34:42 +0100 Subject: [PATCH 1/7] [utils] Fixing js_to_json *or / in comment --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 877879446..3a14048c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2115,7 +2115,7 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*[^*]*\*/|,(?=(\s|(/\*[^*]*\*/))*[\]}])| + /\*((?!\*/)\n|.)*\*/|,(?=(\s|(/\*((?!\*/)\n|.)*\*/))*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) From 35c1b23be85394541342757307e95c43774aa51c Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:35:22 +0100 Subject: [PATCH 2/7] [utils] Rebalance of pattern in js_to_json --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3a14048c9..bda59e627 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2115,8 +2115,8 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*((?!\*/)\n|.)*\*/|,(?=(\s|(/\*((?!\*/)\n|.)*\*/))*[\]}])| - [a-zA-Z_][.a-zA-Z_0-9]*| + ,(?=(\s|(/\*((?!\*/)\n|.)*\*/))*[\]}])| + /\*((?!\*/)\n|.)*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From 200348e16d12ad3c9c9daedd6f1e068c4a7ed329 Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:36:03 +0100 Subject: [PATCH 3/7] [utils] Fixing runaway comments in js_to_json --- youtube_dl/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bda59e627..bcea188a3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2109,14 +2109,11 @@ def js_to_json(code): return '"%s"' % v - # fixing , followed nothing, but comments - # fixing unnecessary ? in /\*.*?\*/ - # fixing greedy comment return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - ,(?=(\s|(/\*((?!\*/)\n|.)*\*/))*[\]}])| - /\*((?!\*/)\n|.)*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| + ,(?=(\s|(/\*((?!\*/)(\n|.))*\*/))*[\]}])| + /\*((?!\*/)(\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From 5e3f8cea1bcb755424bc6f22d142e70d6c9eed1e Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:36:55 +0100 Subject: [PATCH 4/7] [utils] Using non capturing groups in js_to_json --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bcea188a3..548ce12e2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2112,8 +2112,8 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - ,(?=(\s|(/\*((?!\*/)(\n|.))*\*/))*[\]}])| - /\*((?!\*/)(\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| + ,(?=(?:\s|(?:/\*(?:(?!\*/)(?:\n|.))*\*/))*[\]}])| + /\*(?:(?!\*/)(?:\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From 2e9e076a115c3fc4e2e0f2b51a59814b7705c04c Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:37:50 +0100 Subject: [PATCH 5/7] [utils] Adding test_js_to_json_landofoz --- test/test_utils.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index cb75ca53e..d030f0b11 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -743,6 +743,26 @@ class TestUtil(unittest.TestCase): inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + def test_js_to_json_landofoz(self): + inp = '''{ + character: { + name: "Dorothy", + pet: "Toto", + /* source: "Kansas", + destination: "Emerald City", + roll: "heroine" */ + }, + comment: /* over the rainbow */ "/*", + no_comment: "*/" + }''' + self.assertEqual(js_to_json(inp), '''{ + "character": { + "name": "Dorothy", + "pet": "Toto"\n \n }, + "comment": "/*", + "no_comment": "*/" + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) From 579c49447ab9846fef0c682b8d3cdac38f4e297d Mon Sep 17 00:00:00 2001 From: sulyi Date: Tue, 22 Nov 2016 02:38:56 +0100 Subject: [PATCH 6/7] [utils] Fixing test_js_to_json_landofoz to test /*** rainbo ***/ --- test/test_utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index d030f0b11..55babcc93 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -753,15 +753,19 @@ class TestUtil(unittest.TestCase): roll: "heroine" */ }, comment: /* over the rainbow */ "/*", - no_comment: "*/" + no_comment: "*/", + /*******/ + /***********/ + /*** ***/ + /*** ***/ }''' self.assertEqual(js_to_json(inp), '''{ "character": { "name": "Dorothy", - "pet": "Toto"\n \n }, + "pet": "Toto" + \n }, "comment": "/*", - "no_comment": "*/" - }''') + "no_comment": "*/"\n \n \n \n \n }''') def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") From 287fd7f0cf871348bb85b77d49e9b3e618e48dfc Mon Sep 17 00:00:00 2001 From: sulyi Date: Sat, 26 Nov 2016 01:16:58 +0100 Subject: [PATCH 7/7] [jsinterp] Simpler string regex --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 548ce12e2..34c16d95a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2110,8 +2110,8 @@ def js_to_json(code): return '"%s"' % v return re.sub(r'''(?sx) - "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + "(?:[^'\\]|\\['"nurtbfx/\\])*"| + '(?:[^"\\]|\\['"nurtbfx/\\])*'| ,(?=(?:\s|(?:/\*(?:(?!\*/)(?:\n|.))*\*/))*[\]}])| /\*(?:(?!\*/)(?:\n|.))*\*/|[a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|