Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/configs/feature.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ develop:

benchmark:
evm-type: benchmark
fill-params: --no-html --fork=Osaka --gas-benchmark-values 1,5,10,30,60,100,150 -m benchmark ./tests/benchmark
fill-params: --no-html --fork=Osaka --gas-benchmark-values 1,5,10,30,60,100,150 -m benchmark ./tests/benchmark --maxprocesses=30 --durations=50

benchmark_fast:
evm-type: benchmark
Expand Down
66 changes: 44 additions & 22 deletions packages/testing/src/execution_testing/fixtures/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
of generated fixtures.
"""

import heapq
import json
import os
import re
Expand All @@ -13,6 +14,7 @@
IO,
ClassVar,
Dict,
Generator,
List,
Literal,
Optional,
Expand All @@ -26,12 +28,36 @@
from .file import Fixtures


def _sorted_entries_from_partial(
partial_path: Path,
) -> Generator[Tuple[str, str], None, None]:
"""
Generator yielding (key, value) pairs from a partial file, sorted by key.

Loads one partial file into memory at a time (not all partials together).
Each worker's partial file is typically small relative to the total.
"""
entries = []
with open(partial_path) as f:
for line in f:
line = line.strip()
if line:
entry = json.loads(line)
entries.append((entry["k"], entry["v"]))
entries.sort(key=lambda x: x[0])
yield from entries


def merge_partial_fixture_files(output_dir: Path) -> None:
"""
Merge all partial fixture JSONL files into final JSON fixture files.

Called at session end after all workers have written their partials.
Each partial file contains JSONL lines: {"k": fixture_id, "v": json_str}

Uses k-way merge: each partial file is sorted individually, then merged
using heapq.merge. This keeps memory usage proportional to the largest
single partial file, not the total of all partials.
"""
# Find all partial files
partial_files = list(output_dir.rglob("*.partial.*.jsonl"))
Expand All @@ -56,29 +82,25 @@ def merge_partial_fixture_files(output_dir: Path) -> None:

# Merge each group into its target file
for target_path, partials in partials_by_target.items():
entries: Dict[str, str] = {}

# Read all partial files
for partial in partials:
with open(partial) as f:
for line in f:
line = line.strip()
if not line:
continue
entry = json.loads(line)
entries[entry["k"]] = entry["v"]

# Write final JSON file
sorted_keys = sorted(entries.keys())
last_idx = len(sorted_keys) - 1
with open(target_path, "w") as f:
f.write("{\n")
for i, key in enumerate(sorted_keys):
# K-way merge: sort each partial individually, then merge streams
# Memory = O(largest single partial), not O(sum of all partials)
sorted_iterators = [_sorted_entries_from_partial(p) for p in partials]
merged = heapq.merge(*sorted_iterators, key=lambda x: x[0])

# Stream merged entries to output file
with open(target_path, "w") as out_f:
out_f.write("{\n")
first = True
for key, value in merged:
if not first:
out_f.write(",\n")
first = False
key_json = json.dumps(key)
value_indented = entries[key].replace("\n", "\n ")
f.write(f" {key_json}: {value_indented}")
f.write(",\n" if i < last_idx else "\n")
f.write("}")
value_indented = value.replace("\n", "\n ")
out_f.write(f" {key_json}: {value_indented}")
if not first:
out_f.write("\n")
out_f.write("}")

# Clean up partial files
for partial in partials:
Expand Down
Loading