From 89ac4a19e658203db85c6a1d4b267a2eeb47a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 May 2016 20:39:58 +0600 Subject: [PATCH] [utils] Process non-base 10 integers in js_to_json --- test/test_utils.py | 19 +++++++++++++++++++ youtube_dl/utils.py | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index ab2842f3bc..26f66bff64 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -617,6 +617,15 @@ def test_js_to_json_realworld(self): json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -652,6 +661,16 @@ def test_js_to_json_edgecases(self): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52a20632fa..25a9f33c02 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1925,6 +1925,17 @@ def fix_kv(m): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) + INTEGER_TABLE = ( + (r'^(0[xX][0-9a-fA-F]+)', 16), + (r'^(0+[0-7]+)', 8), + ) + + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i + return '"%s"' % v return re.sub(r'''(?sx) @@ -1932,6 +1943,7 @@ def fix_kv(m): '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| /\*.*?\*/|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| + (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code)