Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
a2880c1
bug(test-benchmark): raise UsageError when config file missing
spencer-tb Jan 7, 2026
5252a3f
bug(test-benchmark): validate fixed-opcode-count input
spencer-tb Jan 7, 2026
df722a7
feat(test-benchmark): use longest-match for pattern specificity
spencer-tb Jan 7, 2026
4629459
feat(test-benchmark): add repricing marker to precompile tests
spencer-tb Jan 7, 2026
3df3750
test(test-benchmark): add unit tests for OpcodeCountsConfig
spencer-tb Jan 7, 2026
5c8f2d5
bug(test-benchmark): fix config file pattern matching for parametrize…
spencer-tb Jan 7, 2026
be5ebea
chore: fix tox ruff static fails
spencer-tb Jan 7, 2026
7b0d59f
chore(test-benchmark): remove repricing marker from modexp test
spencer-tb Jan 8, 2026
9e6ab20
chore(test-benchmark): remove repricing marker from bls12-381 and bn1…
spencer-tb Jan 8, 2026
3ff232b
chore(test-benchmark): small line refactor
spencer-tb Jan 8, 2026
b09c238
feat(test-benchmark): optimize and remove redundacies in parser
spencer-tb Jan 8, 2026
e61b479
feat(test-benchmark): support float values for fixed opcode counts
spencer-tb Jan 8, 2026
7c2331f
chore(test-benchmark): remove lower case pytest id for log tests
spencer-tb Jan 8, 2026
f27e0ec
feat(test-benchmark): properly support sub 1k opcode counts
spencer-tb Jan 8, 2026
899df0f
chore(test-benchmark): remove test mod for foc
spencer-tb Jan 8, 2026
025a5e6
chore: fix static tox mypy
spencer-tb Jan 8, 2026
3956afc
bug(test-benchmark): fix config file mode not parametrizing multiple …
spencer-tb Jan 9, 2026
cda4cf3
test(test-benchmark): add unit tests for multiple opcode count parame…
spencer-tb Jan 9, 2026
e64765d
feat(test-benchmark): add per-parameter pattern matching for config f…
spencer-tb Jan 9, 2026
bf0c154
test(test-benchmark): add unit tests for per-parameter opcode count p…
spencer-tb Jan 9, 2026
d774209
test(test-benchmark): add regression tests for CLI mode and default c…
spencer-tb Jan 9, 2026
b8b0a46
bug(test-benchmark): fix unhashable list params in per-parameter matc…
spencer-tb Jan 9, 2026
7053a23
chore: fix static tox ruff
spencer-tb Jan 9, 2026
a132418
bug(test-benchmark): fix inner iterations for non-integer opcode counts
spencer-tb Jan 9, 2026
f2e0bea
bug(test-benchmark): validate minimum fixed_opcode_count of 0.001
spencer-tb Jan 9, 2026
360096f
feat(test-benchmark): improve fixed_opcode_count granularity and vali…
spencer-tb Jan 9, 2026
406940d
docs(test-benchmark): clarify config file uses same value restrictions
spencer-tb Jan 9, 2026
8dad4f0
docs(test-benchmark): improve help and error messages for contract si…
spencer-tb Jan 9, 2026
725101f
feat(test-benchmark): simplify to 0.25K granularity for >= 1K counts
spencer-tb Jan 9, 2026
404f606
docs(test-benchmark): add granularity rules for opcode gas costs
spencer-tb Jan 9, 2026
6820a66
bug(test-benchmark): error on invalid regex patterns in config
spencer-tb Jan 9, 2026
9fafef7
test(test-benchmark): improve test IDs and add per-param fallback test
spencer-tb Jan 9, 2026
f203327
docs(test-benchmark): add README for benchmark tests
spencer-tb Jan 9, 2026
5a62902
docs(test-benchmark): comprehensive README for benchmark tests
spencer-tb Jan 9, 2026
fddba4f
chore: fix static tox ruff
spencer-tb Jan 9, 2026
a83b4c7
fix(test-benchmark): persist CREATE2 salt across outer loop calls
spencer-tb Jan 9, 2026
8a09073
chore: fix static tox ruff
spencer-tb Jan 9, 2026
22c14e9
chore(test-benchmark): add evmone to unit tests CI
spencer-tb Jan 12, 2026
e6313d6
chore(test-benchmark): use collect-only for collection tests
spencer-tb Jan 12, 2026
85ec326
fix(test-benchmark): deduplicate opcode counts in CLI mode
spencer-tb Jan 14, 2026
b62cf1a
refactor: update repricing marker
LouisTsai-Csie Jan 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,15 @@ jobs:
enable-cache: false
version: ${{ vars.UV_VERSION }}

- uses: ./.github/actions/build-evm-base
id: evm-builder
with:
type: benchmark

- name: Run benchmark unit tests
run: uvx tox -e tests_benchmark_pytest_py3
env:
EVM_BIN: ${{ steps.evm-builder.outputs.evm-bin }}

sanity-checks:
name: ${{ matrix.name }}
Expand Down
125 changes: 62 additions & 63 deletions packages/testing/src/execution_testing/cli/benchmark_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

Usage:
uv run benchmark_parser # Update `.fixed_opcode_counts.json`
uv run benchmark_parser --check # Check for new/missing entries (CI)
uv run benchmark_parser --check # Check for new/missing entries
"""

import argparse
import ast
import re
import sys
from pathlib import Path

Expand All @@ -19,6 +20,31 @@
)


def is_related_pattern(pattern: str, detected_patterns: set[str]) -> bool:
"""
Check if a pattern is related to any detected patterns or more specific.
Related patterns are preserved as they're intentional overrides.
"""
# Check if existing pattern is BROADER than detected
try:
compiled = re.compile(pattern)
for detected in detected_patterns:
if compiled.search(detected):
return True
except re.error:
pass

# Check if existing pattern is MORE SPECIFIC than detected
for detected in detected_patterns:
try:
if re.search(detected, pattern):
return True
except re.error:
continue

return False


def get_repo_root() -> Path:
"""Get the repository root directory."""
current = Path.cwd()
Expand Down Expand Up @@ -190,20 +216,15 @@ def _extract_opcode_name(self, node: ast.expr) -> str | None:
return None


def scan_benchmark_tests(
base_path: Path,
) -> tuple[dict[str, list[int]], dict[str, Path]]:
def scan_benchmark_tests(base_path: Path) -> dict[str, list[float]]:
"""
Scan benchmark test files and extract opcode patterns.

Returns:
Tuple of (config, pattern_sources) where:
- config: mapping of pattern -> opcode counts
- pattern_sources: mapping of pattern -> source file path
Mapping of pattern -> opcode counts (default [1] for new patterns).
"""
config: dict[str, list[int]] = {}
pattern_sources: dict[str, Path] = {}
default_counts = [1]
config: dict[str, list[float]] = {}
default_counts: list[float] = [1.0]

test_files = [
f
Expand All @@ -222,12 +243,11 @@ def scan_benchmark_tests(
for pattern in extractor.patterns:
if pattern not in config:
config[pattern] = default_counts
pattern_sources[pattern] = test_file
except Exception as e:
print(f"Warning: Failed to parse {test_file}: {e}")
continue

return config, pattern_sources
return config


def load_existing_config(config_file: Path) -> OpcodeCountsConfig:
Expand All @@ -237,47 +257,12 @@ def load_existing_config(config_file: Path) -> OpcodeCountsConfig:
return OpcodeCountsConfig.model_validate_json(config_file.read_bytes())


def categorize_patterns(
config: dict[str, list[int]], pattern_sources: dict[str, Path]
) -> dict[str, list[str]]:
"""
Categorize patterns by deriving category from source file name.

Example: test_arithmetic.py -> ARITHMETIC
"""
categories: dict[str, list[str]] = {}

for pattern in config.keys():
if pattern in pattern_sources:
source_file = pattern_sources[pattern]
file_name = source_file.stem
if file_name.startswith("test_"):
category = file_name[5:].upper() # Remove "test_" prefix
else:
category = "OTHER"
else:
category = "OTHER"

if category not in categories:
categories[category] = []
categories[category].append(pattern)

return {k: sorted(v) for k, v in sorted(categories.items())}


def generate_config_json(
config: dict[str, list[int]],
pattern_sources: dict[str, Path],
default_counts: list[int],
config: dict[str, list[float]],
default_counts: list[float],
) -> OpcodeCountsConfig:
"""Generate the JSON config file content."""
categories = categorize_patterns(config, pattern_sources)

scenario_configs: dict[str, list[int]] = {}
for _, patterns in categories.items():
for pattern in patterns:
scenario_configs[pattern] = config[pattern]

"""Generate the JSON config file content with sorted patterns."""
scenario_configs = {k: config[k] for k in sorted(config.keys())}
return OpcodeCountsConfig(
scenario_configs=scenario_configs,
default_counts=default_counts,
Expand All @@ -304,7 +289,7 @@ def main() -> int:
return 1

print(f"Scanning benchmark tests in {benchmark_dir}...")
detected, pattern_sources = scan_benchmark_tests(benchmark_dir)
detected = scan_benchmark_tests(benchmark_dir)
print(f"Detected {len(detected)} opcode patterns")

existing_file = load_existing_config(config_file)
Expand All @@ -314,11 +299,25 @@ def main() -> int:
detected_keys = set(detected.keys())
existing_keys = set(existing.keys())
new_patterns = sorted(detected_keys - existing_keys)
obsolete_patterns = sorted(existing_keys - detected_keys)

# Separate truly obsolete patterns from related patterns that should be kept
potentially_obsolete = existing_keys - detected_keys
related_patterns: set[str] = set()
obsolete_patterns: set[str] = set()
for pattern in potentially_obsolete:
if is_related_pattern(pattern, detected_keys):
related_patterns.add(pattern)
else:
obsolete_patterns.add(pattern)

# Merge: start with detected, preserve existing counts, keep related patterns
merged = detected.copy()
for pattern, counts in existing.items():
if pattern in detected_keys:
# Preserve existing counts for detected patterns
merged[pattern] = counts
elif pattern in related_patterns:
# Keep related patterns (broader or more specific) with their existing counts
merged[pattern] = counts

print("\n" + "=" * 60)
Expand All @@ -332,14 +331,21 @@ def main() -> int:
if len(new_patterns) > 15:
print(f" ... and {len(new_patterns) - 15} more")

if related_patterns:
print(f"\n~ Preserving {len(related_patterns)} RELATED patterns:")
for p in sorted(related_patterns)[:15]:
print(f" {p}")
if len(related_patterns) > 15:
print(f" ... and {len(related_patterns) - 15} more")

if obsolete_patterns:
print(f"\n- Found {len(obsolete_patterns)} OBSOLETE patterns:")
for p in obsolete_patterns[:15]:
for p in sorted(obsolete_patterns)[:15]:
print(f" {p}")
if len(obsolete_patterns) > 15:
print(f" ... and {len(obsolete_patterns) - 15} more")

if not new_patterns and not obsolete_patterns:
if not new_patterns and not obsolete_patterns and not related_patterns:
print("\nConfiguration is up to date!")

print("=" * 60)
Expand All @@ -350,14 +356,7 @@ def main() -> int:
return 1
return 0

for pattern in obsolete_patterns:
print(f"Removing obsolete: {pattern}")
if pattern in merged:
del merged[pattern]

content = generate_config_json(
merged, pattern_sources, existing_file.default_counts
)
content = generate_config_json(merged, existing_file.default_counts)
config_file.write_text(
content.model_dump_json(exclude_defaults=True, indent=2)
)
Expand Down
Loading
Loading