Skip to content

Commit

Permalink
Fix #86, in array context if an object has repeated keys it's not val…
Browse files Browse the repository at this point in the history
…id and can be split up. This will allow to find cases in which the brace is missing but the form is still valid
  • Loading branch information
mangiucugna committed Dec 18, 2024
1 parent 5140581 commit 525b367
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "json_repair"
version = "0.31.0"
version = "0.32.0"
license = {file = "LICENSE"}
authors = [
{ name="Stefano Baccianella", email="[email protected]" },
Expand Down
10 changes: 10 additions & 0 deletions src/json_repair/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def parse_object(self) -> Dict[str, JSONReturnType]:

self.skip_whitespaces_at()

# Save this index in case we need find a duplicate key
rollback_index = self.index

# <member> starts with a <string>
key = ""
while self.get_char_at():
Expand All @@ -132,7 +135,14 @@ def parse_object(self) -> Dict[str, JSONReturnType]:
if key != "" or (key == "" and self.get_char_at() == ":"):
# If the string is empty but there is a object divider, we are done here
break
if ContextValues.ARRAY in self.context.context and key in obj:
self.log(
"While parsing an object we found a duplicate key, closing the object here and rolling back the index",
)
self.index = rollback_index - 1
break

# Skip filler whitespaces
self.skip_whitespaces_at()

# We reached the end here
Expand Down
1 change: 1 addition & 0 deletions tests/test_json_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def test_object_edge_cases():
assert repair_json('{text:words{words in brackets}m}') == '{"text": "words{words in brackets}m"}'
assert repair_json('{"key": "value, value2"```') == '{"key": "value, value2"}'
assert repair_json('{key:value,key2:value2}') == '{"key": "value", "key2": "value2"}'
assert repair_json('[{"lorem": {"ipsum": "sic"}, "lorem": {"ipsum": "sic"}]') == '[{"lorem": {"ipsum": "sic"}}, "lorem", {"ipsum": "sic"}]'

def test_number_edge_cases():
assert repair_json(' - { "test_key": ["test_value", "test_value2"] }') == '{"test_key": ["test_value", "test_value2"]}'
Expand Down
10 changes: 5 additions & 5 deletions tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_true_true_correct(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
max_time = 1.9 / 10 ** 3 # 1.9 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand All @@ -31,7 +31,7 @@ def test_true_true_incorrect(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
max_time = 9 / 10 ** 3 # 1.9 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand All @@ -53,7 +53,7 @@ def test_true_false_incorrect(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
max_time = 1.9 / 10 ** 3 # 1.9 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand All @@ -64,7 +64,7 @@ def test_false_true_correct(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
max_time = 1.9 / 10 ** 3 # 1.9 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand All @@ -75,7 +75,7 @@ def test_false_true_incorrect(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds
max_time = 1.8 / 10 ** 3 # 1.8 millisecond
max_time = 1.9 / 10 ** 3 # 1.9 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand Down

0 comments on commit 525b367

Please sign in to comment.