diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
index ee7a816c3d..7fe33f86c2 100644
--- a/.github/workflows/benchmark.yaml
+++ b/.github/workflows/benchmark.yaml
@@ -32,8 +32,15 @@ jobs:
           enable-cache: false
           version: ${{ vars.UV_VERSION }}
 
+      - uses: ./.github/actions/build-evm-base
+        id: evm-builder
+        with:
+          type: benchmark
+
       - name: Run benchmark unit tests
         run: uvx tox -e tests_benchmark_pytest_py3
+        env:
+          EVM_BIN: ${{ steps.evm-builder.outputs.evm-bin }}
 
   sanity-checks:
     name: ${{ matrix.name }}
diff --git a/packages/testing/src/execution_testing/cli/benchmark_parser.py b/packages/testing/src/execution_testing/cli/benchmark_parser.py
index f05612fea6..8c5fdfa698 100644
--- a/packages/testing/src/execution_testing/cli/benchmark_parser.py
+++ b/packages/testing/src/execution_testing/cli/benchmark_parser.py
@@ -6,11 +6,12 @@
 
 Usage:
     uv run benchmark_parser           # Update `.fixed_opcode_counts.json`
-    uv run benchmark_parser --check   # Check for new/missing entries (CI)
+    uv run benchmark_parser --check   # Check for new/missing entries
 """
 
 import argparse
 import ast
+import re
 import sys
 from pathlib import Path
 
@@ -19,6 +20,31 @@
 )
 
 
+def is_related_pattern(pattern: str, detected_patterns: set[str]) -> bool:
+    """
+    Check if a pattern is related to any detected patterns or more specific.
+    Related patterns are preserved as they're intentional overrides.
+    """
+    # Check if existing pattern is BROADER than detected
+    try:
+        compiled = re.compile(pattern)
+        for detected in detected_patterns:
+            if compiled.search(detected):
+                return True
+    except re.error:
+        pass
+
+    # Check if existing pattern is MORE SPECIFIC than detected
+    for detected in detected_patterns:
+        try:
+            if re.search(detected, pattern):
+                return True
+        except re.error:
+            continue
+
+    return False
+
+
 def get_repo_root() -> Path:
     """Get the repository root directory."""
     current = Path.cwd()
@@ -190,20 +216,15 @@ def _extract_opcode_name(self, node: ast.expr) -> str | None:
         return None
 
 
-def scan_benchmark_tests(
-    base_path: Path,
-) -> tuple[dict[str, list[int]], dict[str, Path]]:
+def scan_benchmark_tests(base_path: Path) -> dict[str, list[float]]:
     """
     Scan benchmark test files and extract opcode patterns.
 
     Returns:
-        Tuple of (config, pattern_sources) where:
-        - config: mapping of pattern -> opcode counts
-        - pattern_sources: mapping of pattern -> source file path
+        Mapping of pattern -> opcode counts (default [1] for new patterns).
     """
-    config: dict[str, list[int]] = {}
-    pattern_sources: dict[str, Path] = {}
-    default_counts = [1]
+    config: dict[str, list[float]] = {}
+    default_counts: list[float] = [1.0]
 
     test_files = [
         f
@@ -222,12 +243,11 @@ def scan_benchmark_tests(
             for pattern in extractor.patterns:
                 if pattern not in config:
                     config[pattern] = default_counts
-                    pattern_sources[pattern] = test_file
         except Exception as e:
             print(f"Warning: Failed to parse {test_file}: {e}")
             continue
 
-    return config, pattern_sources
+    return config
 
 
 def load_existing_config(config_file: Path) -> OpcodeCountsConfig:
@@ -237,47 +257,12 @@ def load_existing_config(config_file: Path) -> OpcodeCountsConfig:
     return OpcodeCountsConfig.model_validate_json(config_file.read_bytes())
 
 
-def categorize_patterns(
-    config: dict[str, list[int]], pattern_sources: dict[str, Path]
-) -> dict[str, list[str]]:
-    """
-    Categorize patterns by deriving category from source file name.
-
-    Example: test_arithmetic.py -> ARITHMETIC
-    """
-    categories: dict[str, list[str]] = {}
-
-    for pattern in config.keys():
-        if pattern in pattern_sources:
-            source_file = pattern_sources[pattern]
-            file_name = source_file.stem
-            if file_name.startswith("test_"):
-                category = file_name[5:].upper()  # Remove "test_" prefix
-            else:
-                category = "OTHER"
-        else:
-            category = "OTHER"
-
-        if category not in categories:
-            categories[category] = []
-        categories[category].append(pattern)
-
-    return {k: sorted(v) for k, v in sorted(categories.items())}
-
-
 def generate_config_json(
-    config: dict[str, list[int]],
-    pattern_sources: dict[str, Path],
-    default_counts: list[int],
+    config: dict[str, list[float]],
+    default_counts: list[float],
 ) -> OpcodeCountsConfig:
-    """Generate the JSON config file content."""
-    categories = categorize_patterns(config, pattern_sources)
-
-    scenario_configs: dict[str, list[int]] = {}
-    for _, patterns in categories.items():
-        for pattern in patterns:
-            scenario_configs[pattern] = config[pattern]
-
+    """Generate the JSON config file content with sorted patterns."""
+    scenario_configs = {k: config[k] for k in sorted(config.keys())}
     return OpcodeCountsConfig(
         scenario_configs=scenario_configs,
         default_counts=default_counts,
@@ -304,7 +289,7 @@ def main() -> int:
         return 1
 
     print(f"Scanning benchmark tests in {benchmark_dir}...")
-    detected, pattern_sources = scan_benchmark_tests(benchmark_dir)
+    detected = scan_benchmark_tests(benchmark_dir)
     print(f"Detected {len(detected)} opcode patterns")
 
     existing_file = load_existing_config(config_file)
@@ -314,11 +299,25 @@ def main() -> int:
     detected_keys = set(detected.keys())
     existing_keys = set(existing.keys())
     new_patterns = sorted(detected_keys - existing_keys)
-    obsolete_patterns = sorted(existing_keys - detected_keys)
 
+    # Separate truly obsolete patterns from related patterns that should be kept
+    potentially_obsolete = existing_keys - detected_keys
+    related_patterns: set[str] = set()
+    obsolete_patterns: set[str] = set()
+    for pattern in potentially_obsolete:
+        if is_related_pattern(pattern, detected_keys):
+            related_patterns.add(pattern)
+        else:
+            obsolete_patterns.add(pattern)
+
+    # Merge: start with detected, preserve existing counts, keep related patterns
     merged = detected.copy()
     for pattern, counts in existing.items():
         if pattern in detected_keys:
+            # Preserve existing counts for detected patterns
+            merged[pattern] = counts
+        elif pattern in related_patterns:
+            # Keep related patterns (broader or more specific) with their existing counts
             merged[pattern] = counts
 
     print("\n" + "=" * 60)
@@ -332,14 +331,21 @@ def main() -> int:
         if len(new_patterns) > 15:
             print(f"    ... and {len(new_patterns) - 15} more")
 
+    if related_patterns:
+        print(f"\n~ Preserving {len(related_patterns)} RELATED patterns:")
+        for p in sorted(related_patterns)[:15]:
+            print(f"    {p}")
+        if len(related_patterns) > 15:
+            print(f"    ... and {len(related_patterns) - 15} more")
+
     if obsolete_patterns:
         print(f"\n- Found {len(obsolete_patterns)} OBSOLETE patterns:")
-        for p in obsolete_patterns[:15]:
+        for p in sorted(obsolete_patterns)[:15]:
             print(f"    {p}")
         if len(obsolete_patterns) > 15:
             print(f"    ... and {len(obsolete_patterns) - 15} more")
 
-    if not new_patterns and not obsolete_patterns:
+    if not new_patterns and not obsolete_patterns and not related_patterns:
         print("\nConfiguration is up to date!")
 
     print("=" * 60)
@@ -350,14 +356,7 @@ def main() -> int:
             return 1
         return 0
 
-    for pattern in obsolete_patterns:
-        print(f"Removing obsolete: {pattern}")
-        if pattern in merged:
-            del merged[pattern]
-
-    content = generate_config_json(
-        merged, pattern_sources, existing_file.default_counts
-    )
+    content = generate_config_json(merged, existing_file.default_counts)
     config_file.write_text(
         content.model_dump_json(exclude_defaults=True, indent=2)
     )
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
index 30363d66ff..4be43f5994 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
@@ -1,17 +1,27 @@
 """Test the benchmarking pytest plugin for gas benchmark values."""
 
+import json
 import textwrap
 from pathlib import Path
 from typing import List
+from unittest.mock import MagicMock
 
 import pytest
 
+from execution_testing.cli.pytest_commands.plugins.shared.benchmarking import (
+    OpcodeCountsConfig,
+)
+
+# EVM binary for tests that actually fill (not just collect)
+BENCHMARK_EVM_T8N = "evmone-t8n"
+
 test_module_dummy = textwrap.dedent(
     """\
     import pytest
     from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     def test_dummy_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
         benchmark_test(
             target_opcode=Op.JUMPDEST,
@@ -26,6 +36,7 @@ def test_dummy_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
     from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     def test_dummy_no_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
         benchmark_test(
             target_opcode=Op.JUMPDEST,
@@ -40,6 +51,7 @@ def test_dummy_no_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
     from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     @pytest.mark.repricing
     def test_benchmark_with_repricing(benchmark_test: BenchmarkTestFiller) -> None:
         benchmark_test(
@@ -48,6 +60,7 @@ def test_benchmark_with_repricing(benchmark_test: BenchmarkTestFiller) -> None:
         )
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     def test_benchmark_without_repricing(benchmark_test: BenchmarkTestFiller) -> None:
         benchmark_test(
             target_opcode=Op.JUMPDEST,
@@ -62,12 +75,14 @@ def test_benchmark_without_repricing(benchmark_test: BenchmarkTestFiller) -> Non
     from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     def test_with_gas_benchmark_value(state_test, gas_benchmark_value: int) -> None:
         # This test intentionally uses state_test instead of benchmark_test
         # to verify that --fixed-opcode-count filters it out
         state_test(pre={}, post={}, tx=None)
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     def test_with_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
         benchmark_test(
             target_opcode=Op.JUMPDEST,
@@ -79,28 +94,32 @@ def test_with_benchmark_test(benchmark_test: BenchmarkTestFiller) -> None:
 test_module_with_repricing_kwargs = textwrap.dedent(
     """\
     import pytest
-    from execution_testing import BenchmarkTestFiller, ExtCallGenerator, Op
+    from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     @pytest.mark.repricing(opcode=Op.ADD)
     @pytest.mark.parametrize("opcode", [Op.ADD, Op.SUB, Op.MUL])
     def test_parametrized_with_repricing_kwargs(
         benchmark_test: BenchmarkTestFiller, opcode
     ) -> None:
+        # Use JUMPDEST for actual benchmarking; opcode param is just for filtering
         benchmark_test(
-            target_opcode=opcode,
-            code_generator=ExtCallGenerator(attack_block=opcode),
+            target_opcode=Op.JUMPDEST,
+            code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST),
         )
 
     @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
     @pytest.mark.repricing
     @pytest.mark.parametrize("opcode", [Op.ADD, Op.SUB])
     def test_parametrized_with_repricing_no_kwargs(
         benchmark_test: BenchmarkTestFiller, opcode
     ) -> None:
+        # Use JUMPDEST for actual benchmarking; opcode param is just for filtering
         benchmark_test(
-            target_opcode=opcode,
-            code_generator=ExtCallGenerator(attack_block=opcode),
+            target_opcode=Op.JUMPDEST,
+            code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST),
         )
     """
 )
@@ -474,3 +493,462 @@ def test_without_repricing_flag_collects_all_tests(
     assert any(
         "test_benchmark_without_repricing" in line for line in result.outlines
     )
+
+
+def test_fixed_opcode_count_exact_match_priority() -> None:
+    """
+    Exact match takes priority over regex patterns.
+
+    When using a config file, patterns are matched against test names. An exact
+    string match should take priority over a regex pattern that also matches.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "test_dup": [10],
+            "test_dup.*": [1],
+        },
+        default_counts=[99],
+    )
+
+    params = config.get_test_parameters("test_dup")
+    assert params[0].values[0] == 10
+
+
+def test_fixed_opcode_count_longest_pattern_wins() -> None:
+    """
+    Longest matching pattern takes priority.
+
+    When using a config file, if multiple regex patterns match a test name, the
+    longest pattern should win. This allows more specific patterns to override
+    broader ones.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "test_dup.*": [1],
+            "test_dup.*DUP1.*": [5],
+        },
+        default_counts=[99],
+    )
+
+    # Longer pattern should win for DUP1
+    params = config.get_test_parameters(
+        "test_dup[fork_Prague-opcount_1K-opcode_DUP1]"
+    )
+    assert params[0].values[0] == 5
+
+    # Shorter pattern should match for DUP2
+    params = config.get_test_parameters(
+        "test_dup[fork_Prague-opcount_1K-opcode_DUP2]"
+    )
+    assert params[0].values[0] == 1
+
+
+def test_fixed_opcode_count_default_fallback() -> None:
+    """
+    Default counts are used when no pattern matches.
+
+    When using a config file, if no pattern matches the test name, the
+    default_counts should be used as a fallback.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "test_dup.*": [1],
+        },
+        default_counts=[99],
+    )
+
+    params = config.get_test_parameters("test_other")
+    assert params[0].values[0] == 99
+
+
+def test_fixed_opcode_count_multiple_patterns() -> None:
+    """
+    Multiple overlapping patterns are handled correctly.
+
+    Verifies that multiple overlapping patterns of different lengths are handled
+    correctly. The most specific (longest) matching pattern should win.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "test_.*": [1],
+            "test_bitwise.*": [2],
+            "test_bitwise.*AND.*": [3],
+        },
+        default_counts=[99],
+    )
+
+    # Most specific pattern should win
+    params = config.get_test_parameters("test_bitwise[fork_Prague-opcode_AND]")
+    assert params[0].values[0] == 3
+
+    # Middle specificity
+    params = config.get_test_parameters("test_bitwise[fork_Prague-opcode_OR]")
+    assert params[0].values[0] == 2
+
+    # Least specific
+    params = config.get_test_parameters("test_other[fork_Prague]")
+    assert params[0].values[0] == 1
+
+
+@pytest.mark.parametrize(
+    "cli_input,expected_counts",
+    [
+        ("1", [1]),  # Single integer
+        ("1,2,3", [1, 2, 3]),  # Multiple integers
+        ("0.5", [0.5]),  # Single float
+        ("0.1,0.5,1", [0.1, 0.5, 1]),  # Multiple floats
+        ("1,0.5,2", [1, 0.5, 2]),  # Mixed int/float
+        # 10 mixed values
+        (
+            "0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2,3",
+            [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 3],
+        ),
+    ],
+)
+def test_fixed_opcode_count_valid_input(
+    cli_input: str, expected_counts: list
+) -> None:
+    """
+    Valid comma-separated numbers are accepted.
+
+    The flag accepts comma-separated numbers (integers or floats) as default
+    opcode counts. This test verifies valid inputs are parsed correctly.
+    """
+    mock_config = MagicMock()
+    mock_config.rootpath = Path("/tmp")
+
+    result = OpcodeCountsConfig.from_parameter_value(mock_config, cli_input)
+    assert result is not None
+    assert result.default_counts == expected_counts
+
+
+def test_fixed_opcode_count_invalid_input() -> None:
+    """
+    Invalid values like test paths are rejected.
+
+    The flag should reject invalid inputs like test paths that get accidentally
+    consumed by argparse. This prevents confusing errors when users forget to
+    specify opcode counts before the test path.
+    """
+    mock_config = MagicMock()
+    mock_config.rootpath = Path("/tmp")
+
+    with pytest.raises(pytest.UsageError) as exc_info:
+        OpcodeCountsConfig.from_parameter_value(
+            mock_config, "tests/benchmark/compute/test_foo.py"
+        )
+
+    assert "Invalid value for --fixed-opcode-count" in str(exc_info.value)
+
+
+def test_fixed_opcode_count_missing_config() -> None:
+    """
+    Missing config file raises UsageError with helpful message.
+
+    When used without arguments, it expects to load config from
+    .fixed_opcode_counts.json. If the file is missing, a helpful UsageError
+    should be raised explaining where to create the config file.
+    """
+    mock_config = MagicMock()
+    mock_config.rootpath = Path("/nonexistent/path")
+
+    with pytest.raises(pytest.UsageError) as exc_info:
+        OpcodeCountsConfig.from_parameter_value(mock_config, "")
+
+    assert ".fixed_opcode_counts.json" in str(exc_info.value)
+    assert "was not found" in str(exc_info.value)
+
+
+def test_fixed_opcode_count_float_values() -> None:
+    """
+    Float values are supported for sub-1K opcode iterations.
+
+    For expensive precompiles that can't run 1000+ iterations within gas limits,
+    float values like 0.001 (1 opcode) or 0.5 (500 opcodes) can be used.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "test_precompile.*": [0.001, 0.01, 0.1],
+        },
+        default_counts=[1.0],
+    )
+
+    counts = config.get_opcode_counts("test_precompile_bn128")
+    assert counts == [0.001, 0.01, 0.1]
+
+    params = config.get_test_parameters("test_precompile_bn128")
+    assert len(params) == 3
+    assert params[0].id == "opcount_0.001K"
+    assert params[1].id == "opcount_0.01K"
+    assert params[2].id == "opcount_0.1K"
+
+
+def test_fixed_opcode_count_invalid_regex_raises_error() -> None:
+    """
+    Invalid regex patterns raise an error.
+
+    If a pattern in the config file contains invalid regex syntax, it should
+    raise a ValueError with a helpful message indicating which pattern is invalid.
+    """
+    config = OpcodeCountsConfig(
+        scenario_configs={
+            "[invalid(regex": [10.0],  # Invalid regex
+            "test_valid.*": [5.0],
+        },
+        default_counts=[1.0],
+    )
+
+    # Should raise error when trying to match against invalid regex
+    with pytest.raises(ValueError) as exc_info:
+        config.get_opcode_counts("test_other")
+
+    assert "Invalid regex pattern" in str(exc_info.value)
+    assert "[invalid(regex" in str(exc_info.value)
+
+
+@pytest.mark.parametrize(
+    "config_counts,expected_tests,expected_ids",
+    [
+        pytest.param([1], 2, ["opcount_1"], id="single_int"),
+        pytest.param(
+            [1, 2, 3],
+            6,
+            ["opcount_1", "opcount_2", "opcount_3"],
+            id="multiple_ints",
+        ),
+        pytest.param([0.5], 2, ["opcount_0.5"], id="single_float"),
+        pytest.param(
+            [0.5, 1, 2],
+            6,
+            ["opcount_0.5", "opcount_1", "opcount_2"],
+            id="multiple_floats",
+        ),
+        pytest.param(
+            [1, 0.5, 2],
+            6,
+            ["opcount_1", "opcount_0.5", "opcount_2"],
+            id="mixed_int_float",
+        ),
+        pytest.param(
+            [1, 2, 3, 5],
+            8,
+            ["opcount_1", "opcount_2", "opcount_3", "opcount_5"],
+            id="four_ints",
+        ),
+    ],
+)
+def test_fixed_opcode_count_config_file_parametrized(
+    pytester: pytest.Pytester,
+    config_counts: list,
+    expected_tests: int,
+    expected_ids: list,
+) -> None:
+    """
+    Config file opcode counts create correct test variants.
+
+    The config file can specify single counts, multiple counts, or float values.
+    Each should parametrize tests correctly.
+    """
+    setup_test_directory_structure(
+        pytester, test_module_dummy, "test_config_counts.py"
+    )
+
+    config_file = pytester.path / ".fixed_opcode_counts.json"
+    config_file.write_text(
+        json.dumps(
+            {
+                "scenario_configs": {
+                    "test_dummy_benchmark_test.*": config_counts
+                }
+            }
+        )
+    )
+
+    # Place --fixed-opcode-count after test path to avoid argparse consuming
+    # the path as the option value (nargs='?' behavior)
+    result = pytester.runpytest(
+        "-c",
+        "pytest-fill.ini",
+        "--fork",
+        "Prague",
+        "tests/benchmark/dummy_test_module/",
+        f"--evm-bin={BENCHMARK_EVM_T8N}",
+        "--fixed-opcode-count",
+        "-v",
+    )
+
+    assert result.ret == 0
+    # Check expected number of tests (2 test types * len(counts))
+    assert any(f"{expected_tests} passed" in line for line in result.outlines)
+    # Check opcode count IDs are present
+    for expected_id in expected_ids:
+        assert any(expected_id in line for line in result.outlines)
+
+
+# Test module with parametrized test for per-parameter pattern matching
+test_module_parametrized = textwrap.dedent(
+    """\
+    import pytest
+    from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
+
+    @pytest.mark.valid_at("Prague")
+    @pytest.mark.benchmark
+    @pytest.mark.parametrize("size", [0, 32, 256, 1024])
+    def test_parametrized_benchmark(benchmark_test: BenchmarkTestFiller, size: int) -> None:
+        benchmark_test(
+            target_opcode=Op.JUMPDEST,
+            code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST),
+        )
+    """
+)
+
+
+@pytest.mark.parametrize(
+    "config,expected_test_ids",
+    [
+        # Single count per parameter - different counts for different sizes
+        pytest.param(
+            {
+                "test_parametrized_benchmark.*size_0.*": [5],
+                "test_parametrized_benchmark.*size_256.*": [3],
+                "test_parametrized_benchmark.*size_1024.*": [2],
+            },
+            [
+                # size_0 -> 5, size_32 -> default (1), size_256 -> 3, size_1024 -> 2
+                "size_0-opcount_5",
+                "size_32-opcount_1",
+                "size_256-opcount_3",
+                "size_1024-opcount_2",
+            ],
+            id="single_count_per_param",
+        ),
+        # Multiple counts per parameter (floats and ints)
+        pytest.param(
+            {
+                "test_parametrized_benchmark.*size_0.*": [0.5, 1, 2],
+                "test_parametrized_benchmark.*size_1024.*": [0.5, 0.75],
+            },
+            [
+                # size_0 gets [0.5, 1, 2], size_32 gets default [1], size_1024 gets [0.5, 0.75]
+                "size_0-opcount_0.5",
+                "size_0-opcount_1",
+                "size_0-opcount_2",
+                "size_32-opcount_1",
+                "size_256-opcount_1",
+                "size_1024-opcount_0.5",
+                "size_1024-opcount_0.75",
+            ],
+            id="multiple_counts_per_param",
+        ),
+        # Per-param patterns with test_.* fallback for unmatched params
+        pytest.param(
+            {
+                "test_parametrized_benchmark.*size_0.*": [5],
+                "test_parametrized_benchmark.*size_1024.*": [10],
+                "test_.*": [2, 3],  # Fallback for size_32, size_256
+            },
+            [
+                # size_0 -> [5] (specific), size_32 -> [2,3] (fallback),
+                # size_256 -> [2,3] (fallback), size_1024 -> [10] (specific)
+                "size_0-opcount_5",
+                "size_32-opcount_2",
+                "size_32-opcount_3",
+                "size_256-opcount_2",
+                "size_256-opcount_3",
+                "size_1024-opcount_10",
+            ],
+            id="per_param_with_fallback",
+        ),
+        # All params same counts via broad pattern
+        pytest.param(
+            {
+                "test_parametrized_benchmark.*": [1, 2, 3],
+            },
+            [
+                # All sizes get [1, 2, 3]
+                "size_0-opcount_1",
+                "size_0-opcount_2",
+                "size_0-opcount_3",
+                "size_32-opcount_1",
+                "size_1024-opcount_3",
+            ],
+            id="all_same_counts",
+        ),
+    ],
+)
+def test_fixed_opcode_count_per_parameter_patterns(
+    pytester: pytest.Pytester,
+    config: dict,
+    expected_test_ids: List[str],
+) -> None:
+    """
+    Per-parameter opcode count patterns work correctly.
+
+    Patterns like "test_foo.*size_256.*" should match tests with that specific
+    parameter value and apply the corresponding opcode counts.
+    """
+    setup_test_directory_structure(
+        pytester, test_module_parametrized, "test_param_benchmark.py"
+    )
+
+    config_file = pytester.path / ".fixed_opcode_counts.json"
+    config_file.write_text(json.dumps({"scenario_configs": config}))
+
+    result = pytester.runpytest(
+        "-c",
+        "pytest-fill.ini",
+        "--fork",
+        "Prague",
+        "tests/benchmark/dummy_test_module/",
+        f"--evm-bin={BENCHMARK_EVM_T8N}",
+        "--fixed-opcode-count",
+        "-v",
+    )
+
+    assert result.ret == 0
+
+    # Verify expected test IDs are present
+    output = "\n".join(result.outlines)
+    for expected_id in expected_test_ids:
+        assert expected_id in output, (
+            f"Expected '{expected_id}' in output but not found.\n"
+            f"Output:\n{output}"
+        )
+
+
+def test_cli_mode_ignores_per_parameter_patterns(
+    pytester: pytest.Pytester,
+) -> None:
+    """
+    CLI mode applies same counts to all parameters.
+
+    When using --fixed-opcode-count=1,5 (explicit CLI values), all test
+    variants should get the same opcode counts regardless of their parameters.
+    This verifies CLI mode doesn't accidentally use per-parameter matching.
+    """
+    setup_test_directory_structure(
+        pytester, test_module_parametrized, "test_cli_mode.py"
+    )
+
+    result = pytester.runpytest(
+        "-c",
+        "pytest-fill.ini",
+        "--fork",
+        "Prague",
+        "--fixed-opcode-count=1,5",
+        "tests/benchmark/dummy_test_module/",
+        f"--evm-bin={BENCHMARK_EVM_T8N}",
+        "-v",
+    )
+
+    assert result.ret == 0
+    output = "\n".join(result.outlines)
+
+    # All size variants should have both opcount_1 and opcount_5
+    for size in ["size_0", "size_32", "size_256", "size_1024"]:
+        assert (
+            f"{size}-opcount_1.0K" in output or f"{size}-opcount_1K" in output
+        ), f"Expected {size} with opcount_1 in output"
+        assert (
+            f"{size}-opcount_5.0K" in output or f"{size}-opcount_5K" in output
+        ), f"Expected {size} with opcount_5 in output"
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
index 03e261770c..04f4db57be 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
@@ -41,7 +41,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         const="",
         help=(
             "Opcode counts (in thousands) for benchmark tests. "
-            "Example: '1,10,100' runs tests with 1K, 10K, 100K opcodes. "
+            "Granularity rules (for ≤10%% CALL overhead): "
+            "cheap ops (1-2 gas): integers only, no sub-1K; "
+            "medium ops (3-5 gas): 0.5 increments, min 0.5K; "
+            "expensive ops (6+ gas): 0.25 increments, min 0.25K; "
+            "very expensive (100+ gas): 0.25 increments, min 0.01K. "
+            "Example: '0.5,1,2' runs 500, 1K, 2K opcodes. "
             "Without value, uses .fixed_opcode_counts.json config. "
             f"Cannot be used with {GasBenchmarkValues.flag}."
         ),
@@ -134,12 +139,12 @@ class GasBenchmarkValues(RootModel, BenchmarkParametrizer):
 
     @classmethod
     def from_parameter_value(
-        cls, config: pytest.Config, value: str
+        cls, _config: pytest.Config, value: str
     ) -> Self | None:
         """Given the parameter value and config, return the expected object."""
         return cls.model_validate(value.split(","))
 
-    def get_test_parameters(self, test_name: str) -> list[ParameterSet]:
+    def get_test_parameters(self, _test_name: str) -> list[ParameterSet]:
         """Get benchmark values. All tests have the same list."""
         return [
             pytest.param(
@@ -153,8 +158,9 @@ def get_test_parameters(self, test_name: str) -> list[ParameterSet]:
 class OpcodeCountsConfig(BaseModel, BenchmarkParametrizer):
     """Opcode counts configuration object."""
 
-    scenario_configs: Dict[str, List[int]] = Field(default_factory=dict)
-    default_counts: List[int] = Field(default_factory=lambda: [1])
+    scenario_configs: Dict[str, List[float]] = Field(default_factory=dict)
+    default_counts: List[float] = Field(default_factory=lambda: [1.0])
+    uses_config_file: bool = Field(default=False)
 
     default_config_file_name: ClassVar[str] = ".fixed_opcode_counts.json"
     flag: ClassVar[str] = "--fixed-opcode-count"
@@ -169,48 +175,176 @@ def from_parameter_value(
         if value == "":
             default_file = Path(config.rootpath) / cls.default_config_file_name
             if default_file.exists():
-                return cls.model_validate_json(default_file.read_bytes())
+                data = default_file.read_bytes()
+                instance = cls.model_validate_json(data)
+                instance.uses_config_file = True
+                return instance
             else:
-                pytest.UsageError(
+                raise pytest.UsageError(
                     "--fixed-opcode-count was provided without a value, but "
                     f"{cls.default_config_file_name} was not found. "
                     "Run 'uv run benchmark_parser' to generate it, or provide "
                     "explicit values (e.g., --fixed-opcode-count 1,10,100)."
                 )
-        return cls.model_validate({"default_counts": value.split(",")})
+        # Validate that value looks like comma-separated numbers (int or float)
+        # This catches the case where argparse greedily consumes a test path
+        parts = value.split(",")
+
+        def is_number(s: str) -> bool:
+            try:
+                float(s.strip())
+                return True
+            except ValueError:
+                return False
+
+        if not all(is_number(part) for part in parts):
+            raise pytest.UsageError(
+                f"Invalid value for --fixed-opcode-count: '{value}'. "
+                "Expected comma-separated numbers (e.g., '1,10,100' or '0.25,0.5,1') or no value "
+                "to use the config file. If providing a value, use --fixed-opcode-count=VALUE "
+                "syntax to avoid argparse consuming test paths as the value."
+            )
+        return cls.model_validate(
+            {"default_counts": parts, "uses_config_file": False}
+        )
 
-    def get_test_parameters(self, test_name: str) -> list[ParameterSet]:
+    def get_opcode_counts(self, test_name: str) -> list[float]:
         """
-        Get opcode counts for a test using regex pattern matching.
+        Get opcode counts for a test using pattern matching.
+
+        Matching priority:
+        1. Exact match in scenario_configs
+        2. Regex pattern match (longest pattern wins for specificity)
+        3. Default counts as fallback
+
+        Example with config:
+            {"test_dup": [10], "test_dup.*": [1], "test_dup.*DUP1.*": [5]}
+
+        - "test_dup" -> [10] (exact match)
+        - "test_dup[fork_Prague-opcode_DUP1]" -> [5] (longest pattern matches)
+        - "test_dup[fork_Prague-opcode_DUP2]" -> [1] (matches "test_dup.*")
+        - "test_other" -> default_counts (no match)
+
+        Note: In config file mode, test names don't have opcount yet when this
+        is called - we look up the count first, then add it to the test name.
         """
         counts = self.default_counts
-        # Try exact match first (faster)
+
         if test_name in self.scenario_configs:
             counts = self.scenario_configs[test_name]
         else:
-            # Try regex patterns
+            matches: list[tuple[str, list[float]]] = []
             for pattern, pattern_counts in self.scenario_configs.items():
                 if pattern == test_name:
                     continue
                 try:
                     if re.search(pattern, test_name):
-                        counts = pattern_counts
-                        break
-                except re.error:
-                    continue
+                        matches.append((pattern, pattern_counts))
+                except re.error as e:
+                    raise ValueError(
+                        f"Invalid regex pattern '{pattern}' in config: {e}"
+                    )
+
+            if matches:
+                matches.sort(key=lambda x: len(x[0]), reverse=True)
+                counts = matches[0][1]
+
+        return counts
+
+    def get_test_parameters(self, test_name: str) -> list[ParameterSet]:
+        """Get opcode counts as pytest parameters."""
+        # Deduplicate while preserving order
+        unique_counts = list(dict.fromkeys(self.get_opcode_counts(test_name)))
         return [
-            pytest.param(
-                opcode_count,
-                id=f"opcount_{opcode_count}K",
-            )
-            for opcode_count in counts
+            pytest.param(opcode_count, id=f"opcount_{opcode_count}K")
+            for opcode_count in unique_counts
         ]
 
+    def parametrize(self, metafunc: pytest.Metafunc) -> None:
+        """
+        Parametrize a test with opcode counts.
+
+        In config file mode with existing parametrizations (from metafunc._calls),
+        generates opcode counts per-parameter by matching patterns against simulated
+        test IDs built from existing params.
+
+        In CLI mode (explicit counts), uses the function name for pattern matching.
+        """
+        # Check for direct or indirect use of fixed_opcode_count.
+        # The benchmark_test fixture depends on fixed_opcode_count, so if the test
+        # uses benchmark_test, we need to parametrize fixed_opcode_count.
+        if self.parameter_name not in metafunc.fixturenames:
+            if "benchmark_test" not in metafunc.fixturenames:
+                return
+            # benchmark_test uses fixed_opcode_count - add it to fixtures
+            metafunc.fixturenames.append(self.parameter_name)
+
+        test_name = metafunc.function.__name__
+
+        if (
+            self.uses_config_file
+            and hasattr(metafunc, "_calls")
+            and metafunc._calls
+        ):
+            # Config file mode with existing parametrizations:
+            # Build simulated IDs from existing params and match patterns
+            self._parametrize_with_existing_params(metafunc, test_name)
+        elif self.uses_config_file:
+            # Config file mode, no existing params: match against function name
+            metafunc.parametrize(
+                self.parameter_name,
+                self.get_test_parameters(test_name),
+                scope="function",
+            )
+        else:
+            # CLI mode: use function name matching (original behavior)
+            metafunc.parametrize(
+                self.parameter_name,
+                self.get_test_parameters(test_name),
+                scope="function",
+            )
+
+    def _parametrize_with_existing_params(
+        self, metafunc: pytest.Metafunc, test_name: str
+    ) -> None:
+        """
+        Parametrize opcode counts based on existing test parameters.
+
+        For each existing parameter combination in metafunc._calls, build a simulated
+        test ID and match patterns to get the appropriate opcode counts.
+
+        We collect ALL unique counts across all parameter combinations and add them
+        as a simple parametrization. This creates all combinations (cartesian product).
+        Unwanted combinations are filtered out later in pytest_collection_modifyitems.
+        """
+        # Collect opcode counts for each call (indexed by position)
+        all_unique_counts: set[float] = set()
+
+        for call in metafunc._calls:
+            # Build simulated test ID using call.id which is already properly formatted
+            # Format: test_name[fork_<FORK>-<fixture_format>-<user_params>]
+            simulated_id = f"{test_name}[{call.id}]" if call.id else test_name
+
+            # Get opcode counts for this simulated ID and add to unique set
+            counts = self.get_opcode_counts(simulated_id)
+            all_unique_counts.update(counts)
+
+        # Add all unique counts as simple parametrization (multiplies with existing)
+        # Unwanted combinations will be filtered in pytest_collection_modifyitems
+        metafunc.parametrize(
+            self.parameter_name,
+            [
+                pytest.param(count, id=f"opcount_{count}K")
+                for count in sorted(all_unique_counts)
+            ],
+            scope="function",
+        )
+
 
 def pytest_collection_modifyitems(
     config: pytest.Config, items: list[pytest.Item]
 ) -> None:
-    """Filter tests based on repricing marker."""
+    """Filter tests based on repricing marker and opcode count patterns."""
     gas_benchmark_value = GasBenchmarkValues.from_config(config)
     fixed_opcode_count = OpcodeCountsConfig.from_config(config)
 
@@ -234,6 +368,10 @@ def pytest_collection_modifyitems(
                 filtered.append(item)
         items[:] = filtered
 
+        # Filter per-parameter opcode counts if using config file mode
+        if fixed_opcode_count.uses_config_file:
+            _filter_opcode_count_combinations(items, fixed_opcode_count)
+
     # Extract the specified flag from the command line.
     # If the `-m repricing` flag is not specified, or is negated,
     # we skip filtering tests by the repricing marker.
@@ -266,8 +404,60 @@ def pytest_collection_modifyitems(
     items[:] = filtered
 
 
+def _filter_opcode_count_combinations(
+    items: list[pytest.Item], opcode_config: "OpcodeCountsConfig"
+) -> None:
+    """
+    Filter test items to only keep valid opcode count combinations.
+
+    When using config file mode with per-parameter patterns, we generate all
+    combinations (cartesian product) in pytest_generate_tests. Here we filter
+    out combinations where the opcode count doesn't match the pattern for
+    that specific parameter combination.
+    """
+    filtered = []
+
+    for item in items:
+        if not hasattr(item, "callspec"):
+            filtered.append(item)
+            continue
+
+        params = item.callspec.params
+        opcode_count = params.get(OpcodeCountsConfig.parameter_name)
+
+        if opcode_count is None:
+            filtered.append(item)
+            continue
+
+        # Build simulated test ID WITHOUT the opcode count for pattern matching
+        # The test name format is: test_func[fork_X-fixture_format-params-opcount_Y]
+        # We need: test_func[fork_X-fixture_format-params]
+        test_name = item.name
+
+        # Remove the opcode count part from the test ID for pattern matching
+        # Pattern: -opcount_X.XK or -opcount_XK at the end before ]
+        import re
+
+        simulated_id = re.sub(r"-opcount_[\d.]+K\]$", "]", test_name)
+
+        # Get valid counts for this parameter combination
+        valid_counts = opcode_config.get_opcode_counts(simulated_id)
+
+        # Keep item only if its opcode count is valid for this combination
+        if opcode_count in valid_counts:
+            filtered.append(item)
+
+    items[:] = filtered
+
+
+@pytest.hookimpl(trylast=True)
 def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
-    """Generate tests for the gas benchmark values and fixed opcode counts."""
+    """
+    Generate tests for the gas benchmark values and fixed opcode counts.
+
+    Uses trylast=True to run after other parametrizations so we can access
+    existing parameters in metafunc._calls for pattern matching.
+    """
     parametrizer = GasBenchmarkValues.from_config(
         metafunc.config
     ) or OpcodeCountsConfig.from_config(metafunc.config)
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
index ce4cb2aca1..d7c587eb5e 100644
--- a/packages/testing/src/execution_testing/specs/benchmark.py
+++ b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -54,7 +54,7 @@ class BenchmarkCodeGenerator(ABC):
     setup: Bytecode = field(default_factory=Bytecode)
     cleanup: Bytecode = field(default_factory=Bytecode)
     tx_kwargs: Dict[str, Any] = field(default_factory=dict)
-    fixed_opcode_count: int | None = None
+    fixed_opcode_count: float | None = None
     code_padding_opcode: Op | None = None
     _contract_address: Address | None = None
     _inner_iterations: int = 1000
@@ -78,10 +78,13 @@ def deploy_fix_count_contracts(self, *, pre: Alloc, fork: Fork) -> Address:
             "fixed_opcode_count is not set"
         )
         # Adjust outer loop iterations based on inner iterations
-        # If inner is 500 instead of 1000, double the outer loop
-        outer_multiplier = 1000 // self._inner_iterations
-        iterations = self.fixed_opcode_count * outer_multiplier
-
+        if self.fixed_opcode_count < 1.0:
+            # < 1000 opcodes, outer = 1 as inner already set to exact count
+            iterations = 1
+        else:
+            # >= 1000: calculate outer iterations from target / inner
+            target_opcodes = int(self.fixed_opcode_count * 1000)
+            iterations = target_opcodes // self._inner_iterations
         prefix = Op.CALLDATACOPY(
             Op.PUSH0, Op.PUSH0, Op.CALLDATASIZE
         ) + Op.PUSH4(iterations)
@@ -193,9 +196,40 @@ def generate_repeated_code(
         #
         # 2a. If N is 1000: Set M = fixed_opcode_count. (Total ops: fixed_opcode_count * 1000)
         # 2b. If N is 500: Set M = fixed_opcode_count * 2. (Total ops: (fixed_opcode_count * 2) * 500 = fixed_opcode_count * 1000)
+        #
+        # --- 3. Sub-1K Case (fixed_opcode_count < 1.0) ---
+        # For Sub-1K counts (e.g., 0.25 = 250 opcodes), set N = exact count, M = 1.
         if self.fixed_opcode_count is not None:
-            inner_iterations = 1000 if max_iterations >= 1000 else 500
-            self._inner_iterations = min(max_iterations, inner_iterations)
+            if self.fixed_opcode_count < 0.001:
+                raise ValueError(
+                    f"fixed_opcode_count must be >= 0.001 (1 opcode), "
+                    f"got {self.fixed_opcode_count}"
+                )
+            if self.fixed_opcode_count < 1.0:
+                # < 1000 opcodes, inner = exact count, outer = 1
+                self._inner_iterations = min(
+                    max_iterations, int(self.fixed_opcode_count * 1000)
+                )
+            else:
+                # >= 1000 opcodes: use 250 inner iterations (0.25K granularity)
+                target_opcodes = int(self.fixed_opcode_count * 1000)
+
+                if max_iterations >= 250 and target_opcodes % 250 == 0:
+                    inner_iterations = 250
+                elif max_iterations >= target_opcodes:
+                    # Use exact count as inner with outer = 1
+                    inner_iterations = target_opcodes
+                else:
+                    raise ValueError(
+                        f"fixed_opcode_count {self.fixed_opcode_count} ({target_opcodes} opcodes) "
+                        f"exceeds max contract size for this attack block.\n"
+                        f"Contract size limit allows up to {max_iterations} opcodes "
+                        f"({max_iterations / 1000:.3f}K) in the inner loop.\n"
+                        f"For counts above this limit, use multiples of 0.25K "
+                        f"(e.g., {((target_opcodes // 250) * 250) / 1000:.2f} or "
+                        f"{((target_opcodes // 250 + 1) * 250) / 1000:.2f})."
+                    )
+                self._inner_iterations = inner_iterations
 
         # TODO: Unify the PUSH0 and PUSH1 usage.
         iterations = (
@@ -247,7 +281,7 @@ class BenchmarkTest(BaseTest):
     gas_benchmark_value: int = Field(
         default_factory=lambda: int(Environment().gas_limit)
     )
-    fixed_opcode_count: int | None = None
+    fixed_opcode_count: float | None = None
     target_opcode: Op | None = None
     code_generator: BenchmarkCodeGenerator | None = None
 
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
new file mode 100644
index 0000000000..c1a8aafffa
--- /dev/null
+++ b/tests/benchmark/README.md
@@ -0,0 +1,298 @@
+# Benchmark Tests
+
+Benchmark tests measure EVM opcode and precompile performance to support gas repricing analysis. These tests are designed to stress specific operations under controlled conditions, allowing accurate measurement of execution costs.
+
+## Two Benchmarking Modes
+
+The framework supports two distinct benchmarking approaches:
+
+| Mode | Flag | Unit | Use Case |
+|------|------|------|----------|
+| Gas-based | `--gas-benchmark-values` | Millions of gas | CI pipelines, traditional benchmarking |
+| Fixed opcode count | `--fixed-opcode-count` | Thousands of opcodes | Fast iteration for gas repricing research |
+
+## Gas Benchmark Values
+
+The gas-based mode runs benchmark tests with specified gas limits. This is the traditional approach used in CI pipelines. Values are specified in **millions** of gas (e.g., `100` means 100 million gas).
+
+```bash
+uv run fill --fork Prague --gas-benchmark-values 1 -m benchmark ./tests/benchmark --evm-bin=evmone-t8n
+```
+
+This mode is useful for measuring how much work can be done within a given gas budget, simulating real-world block execution conditions.
+
+## Fixed Opcode Count
+
+The fixed opcode count mode runs benchmark tests with a predetermined number of opcode iterations rather than gas-based limits. This approach enables rapid iteration when analyzing gas costs for repricing proposals, as you can directly compare execution times across different opcode counts.
+
+**Important:** Tests must be marked with `@pytest.mark.repricing` to be compatible with fixed opcode count mode. This marker identifies tests that have been specifically designed for gas repricing analysis with proper code generators.
+
+### CLI Mode
+
+When you want to apply the same opcode counts to all tests, pass the values directly on the command line:
+
+```bash
+# Run all repricing tests with 1K opcodes
+uv run fill --fork Prague --fixed-opcode-count=1 -m repricing tests/benchmark --evm-bin=evmone-t8n
+
+# Run with multiple counts (1K, 5K, and 10K opcodes)
+uv run fill --fork Prague --fixed-opcode-count=1,5,10 -m repricing tests/benchmark --evm-bin=evmone-t8n
+```
+
+### Config File Mode
+
+For more granular control, you can use a configuration file that specifies different opcode counts for different tests. This is particularly useful when benchmarking a mix of cheap and expensive operations that require different iteration counts.
+
+```bash
+uv run fill --fork Prague --fixed-opcode-count -m repricing tests/benchmark --evm-bin=evmone-t8n
+```
+
+When invoked without a value, the framework reads from `.fixed_opcode_counts.json` in the repository root.
+
+### Config File Format
+
+Create a `.fixed_opcode_counts.json` file in the repository root with the following structure:
+
+```json
+{
+  "scenario_configs": {
+    "test_add.*": [1, 5, 10],
+    "test_ecrecover.*": [0.01, 0.1],
+    "test_bn128.*size_256.*": [0.001],
+    "test_.*": [1]
+  }
+}
+```
+
+The pattern matching system works as follows:
+
+- **Regex patterns**: Each key in `scenario_configs` is a regular expression matched against test names
+- **Longest match wins**: When multiple patterns match, the longest (most specific) pattern takes precedence
+- **Per-parameter matching**: Patterns can target specific parametrized test variants (see below)
+- **Global fallback**: Use `"test_.*"` as a catch-all pattern for any unmatched tests
+
+### Per-Parameter Pattern Matching
+
+For parametrized tests, you can specify different opcode counts for each parameter value. This is essential when different parameter values have significantly different execution costs.
+
+Consider a `test_codecopy_benchmark` test parametrized by `code_size`. Larger code sizes are more expensive per iteration, so you need fewer iterations to achieve meaningful measurements:
+
+```json
+{
+  "scenario_configs": {
+    "test_codecopy_benchmark.*code_size_0.*": [2500, 5000, 7500, 10000],
+    "test_codecopy_benchmark.*code_size_32.*": [2000, 4000, 6000, 8000],
+    "test_codecopy_benchmark.*code_size_256.*": [750, 1500, 2250, 3000],
+    "test_codecopy_benchmark.*code_size_1024.*": [250, 500, 750, 1000],
+    "test_codecopy_benchmark.*code_size_24576.*": [10, 20, 30, 40],
+    "test_codecopy_benchmark.*": [1]
+  }
+}
+```
+
+**How pattern ordering works:**
+
+1. When a test like `test_codecopy_benchmark[code_size_256]` runs, the framework checks all patterns
+2. Multiple patterns may match: both `test_codecopy_benchmark.*code_size_256.*` and `test_codecopy_benchmark.*`
+3. The **longest matching pattern wins**, so `code_size_256` gets `[750, 1500, 2250, 3000]`
+4. The shorter fallback pattern `test_codecopy_benchmark.*` only applies to parameter values without specific patterns
+
+**Best practices for per-parameter configs:**
+
+- Start with specific patterns for each parameter value you want to customize
+- Add a broader fallback pattern (e.g., `test_foo.*`) for any unconfigured parameter values
+- Use the `test_.*` pattern as a global default for entirely unmatched tests
+- Order doesn't matter in the JSON - the framework always uses longest match, not first match
+
+### Generating the Config File
+
+The benchmark parser tool can automatically generate and update the configuration file by scanning your test modules:
+
+```bash
+# Generate or update .fixed_opcode_counts.json
+uv run benchmark_parser
+
+# Validate that config is in sync
+uv run benchmark_parser --check
+```
+
+The parser preserves any custom counts you've configured while adding new tests with default values.
+
+### Understanding Opcode Count Values
+
+Values represent **thousands of opcodes**:
+
+- `1` = 1,000 opcodes (1K)
+- `0.5` = 500 opcodes
+- `0.25` = 250 opcodes
+- `0.001` = 1 opcode
+
+The minimum supported value is `0.001` (a single opcode iteration).
+
+### Granularity Rules
+
+The benchmark framework uses outer CALL loops to achieve the target opcode count. Each CALL adds approximately 150 gas of overhead. To keep this overhead at or below 10% of the measured work, follow these granularity guidelines when setting the opcode count:
+
+| Opcode Gas Cost | Recommended Granularity | Minimum Value | Example Opcodes |
+|-----------------|------------------------|---------------|-----------------|
+| 1-2 gas | Integers only | 1K | JUMPDEST, POP, PUSH |
+| 3-5 gas | 0.5 increments | 0.5K | ADD, MUL, SUB, DIV |
+| 6+ gas | 0.25 increments | 0.25K | ADDMOD, MULMOD, EXP |
+| 100+ gas | 0.25 increments | 0.01K | CALL, SLOAD, precompiles |
+
+For example, testing JUMPDEST (1 gas) with only 100 iterations would mean the CALL overhead dominates the measurement. Using 1,000+ iterations ensures the actual opcode cost is the primary factor.
+
+### Example Configuration
+
+Here's a comprehensive configuration demonstrating granularity rules and per-parameter matching:
+
+```json
+{
+  "scenario_configs": {
+    "test_jumpdest.*": [1, 2, 5],
+    "test_add.*": [0.5, 1, 2],
+    "test_keccak.*": [0.25, 0.5, 1],
+    "test_ecrecover.*": [0.01, 0.1],
+    "test_bn128_pairing.*": [0.001],
+
+    "test_codecopy.*code_size_0.*": [2500, 5000, 10000],
+    "test_codecopy.*code_size_256.*": [750, 1500, 3000],
+    "test_codecopy.*code_size_24576.*": [10, 20, 50],
+    "test_codecopy.*": [1],
+
+    "test_.*": [1]
+  }
+}
+```
+
+This configuration:
+
+1. **Cheap opcodes** (JUMPDEST, ADD): Use integer counts ≥1K
+2. **Medium opcodes** (KECCAK256): Use 0.25K-1K range
+3. **Expensive precompiles** (ECRECOVER, BN128): Use sub-1K counts
+4. **Per-parameter tests** (CODECOPY): Different counts based on `code_size` parameter
+5. **Global fallback** (`test_.*`): Catches any test not explicitly configured
+
+## The Repricing Marker
+
+Tests intended for gas repricing analysis must be marked with `@pytest.mark.repricing`. This marker serves two purposes:
+
+1. **Filtering**: Use `-m repricing` to run only repricing-relevant tests
+2. **Compatibility**: Fixed opcode count mode requires this marker to ensure tests have proper code generators
+
+### Listing Repricing Tests
+
+To see all available repricing tests without running them:
+
+```bash
+# List all repricing test names
+uv run fill --fork Prague --fixed-opcode-count -m repricing tests/benchmark --collect-only -q
+
+# List with full test IDs (includes parameters)
+uv run fill --fork Prague --fixed-opcode-count -m repricing tests/benchmark --collect-only
+```
+
+### Running Repricing Tests
+
+```bash
+# Run only repricing tests (recommended for fixed opcode count)
+uv run fill --fork Prague --fixed-opcode-count -m repricing tests/benchmark --evm-bin=evmone-t8n
+
+# Run all benchmark tests (gas-based mode typically)
+uv run fill --fork Prague --gas-benchmark-values 1 -m benchmark tests/benchmark --evm-bin=evmone-t8n
+```
+
+## Execute Remote
+
+To run benchmarks against a live network for real-world performance measurement:
+
+```bash
+uv run execute remote --fixed-opcode-count --fork Prague -m repricing tests/benchmark \
+  --rpc-seed-key <key> --rpc-endpoint <url> --chain-id <id>
+```
+
+## Writing Benchmark Tests
+
+Here's an example of a properly structured benchmark test for fixed opcode count mode:
+
+```python
+import pytest
+from execution_testing import BenchmarkTestFiller, JumpLoopGenerator, Op
+
+@pytest.mark.valid_at("Prague")
+@pytest.mark.repricing  # Required for fixed opcode count mode
+def test_add(benchmark_test: BenchmarkTestFiller) -> None:
+    """Benchmark the ADD opcode with representative operands."""
+    benchmark_test(
+        target_opcode=Op.ADD,
+        code_generator=JumpLoopGenerator(
+            attack_block=Op.ADD(1, 2) + Op.POP,
+        ),
+    )
+```
+
+Key elements:
+
+- `@pytest.mark.repricing`: Marks this test for gas repricing analysis
+- `benchmark_test` fixture: Provides the `BenchmarkTestFiller` for fixed opcode count mode
+- `target_opcode`: The opcode being measured (used for validation)
+- `code_generator`: Defines the bytecode pattern to repeat
+
+## Continuous Integration
+
+The benchmark CI pipeline runs automatically on pushes to `mainnet` and `forks/**` branches, as well as on pull requests. It validates benchmark functionality and generates fixture artifacts.
+
+### CI Workflow
+
+The `.github/workflows/benchmark.yaml` workflow runs in stages:
+
+1. **Unit Tests**: Runs benchmark framework unit tests
+2. **Sanity Checks**: Validates both benchmarking modes work correctly
+3. **Build Artifact**: Generates benchmark fixture artifacts (on push only)
+
+### Tox Environments
+
+Run these locally to validate benchmark functionality:
+
+```bash
+# Run benchmark framework unit tests
+uvx tox -e tests_benchmark_pytest_py3
+
+# Test gas-based benchmarking mode
+uvx tox -e benchmark-gas-values
+
+# Test fixed opcode count CLI mode
+uvx tox -e benchmark-fixed-opcode-cli
+
+# Test fixed opcode count config file mode (runs benchmark_parser first)
+uvx tox -e benchmark-fixed-opcode-config
+```
+
+| Tox Environment | Description |
+|-----------------|-------------|
+| `tests_benchmark_pytest_py3` | Unit tests for the benchmarking plugin |
+| `benchmark-gas-values` | Fills tests with `--gas-benchmark-values 1` |
+| `benchmark-fixed-opcode-cli` | Fills tests with `--fixed-opcode-count 1` |
+| `benchmark-fixed-opcode-config` | Runs `benchmark_parser`, then fills with config file |
+
+### Fixture Configurations
+
+The `.github/configs/feature.yaml` defines benchmark fixture builds:
+
+| Config | Fork | Gas Values | Description |
+|--------|------|------------|-------------|
+| `benchmark` | Prague | 1,5,10,30,60,100,150 | Full benchmark suite |
+| `benchmark_develop` | Osaka | 1,5,10,30,60,100,150 | Development fork benchmarks |
+| `benchmark_fast` | Prague | 100 | Quick benchmark for CI artifacts |
+
+### Artifact Generation
+
+Every time a PR touching benchmark code is merged to `mainnet` or `forks/**` branches, the CI automatically generates benchmark fixture artifacts. These artifacts use the `benchmark_fast` configuration with **100M gas only** (not the full 1-150M range) to keep build times reasonable.
+
+The artifacts are published as GitHub releases and can be used for execution testing against client implementations.
+
+To build the same fixtures locally:
+
+```bash
+uv run fill --fork Prague --gas-benchmark-values 100 -m benchmark tests/benchmark --evm-bin=evmone-t8n
+```
diff --git a/tests/benchmark/compute/instruction/test_arithmetic.py b/tests/benchmark/compute/instruction/test_arithmetic.py
index 765541e4e9..c6a7261fe1 100644
--- a/tests/benchmark/compute/instruction/test_arithmetic.py
+++ b/tests/benchmark/compute/instruction/test_arithmetic.py
@@ -185,7 +185,6 @@ def test_arithmetic(
     )
 
 
-@pytest.mark.repricing(mod_bits=127)
 @pytest.mark.parametrize("mod_bits", [255, 191, 127, 63])
 @pytest.mark.parametrize("opcode", [Op.MOD, Op.SMOD])
 def test_mod(
diff --git a/tests/benchmark/compute/instruction/test_log.py b/tests/benchmark/compute/instruction/test_log.py
index 956dc8acb2..5e904552d1 100644
--- a/tests/benchmark/compute/instruction/test_log.py
+++ b/tests/benchmark/compute/instruction/test_log.py
@@ -21,11 +21,11 @@
 @pytest.mark.parametrize(
     "opcode",
     [
-        pytest.param(Op.LOG0, id="log0"),
-        pytest.param(Op.LOG1, id="log1"),
-        pytest.param(Op.LOG2, id="log2"),
-        pytest.param(Op.LOG3, id="log3"),
-        pytest.param(Op.LOG4, id="log4"),
+        Op.LOG0,
+        Op.LOG1,
+        Op.LOG2,
+        Op.LOG3,
+        Op.LOG4,
     ],
 )
 @pytest.mark.parametrize(
@@ -88,11 +88,11 @@ def test_log(
 @pytest.mark.parametrize(
     "opcode",
     [
-        pytest.param(Op.LOG0, id="log0"),
-        pytest.param(Op.LOG1, id="log1"),
-        pytest.param(Op.LOG2, id="log2"),
-        pytest.param(Op.LOG3, id="log3"),
-        pytest.param(Op.LOG4, id="log4"),
+        Op.LOG0,
+        Op.LOG1,
+        Op.LOG2,
+        Op.LOG3,
+        Op.LOG4,
     ],
 )
 @pytest.mark.parametrize("mem_size", [0, 32, 256, 1024])
diff --git a/tests/benchmark/compute/instruction/test_system.py b/tests/benchmark/compute/instruction/test_system.py
index b7fbdc10a9..365d8c39b7 100644
--- a/tests/benchmark/compute/instruction/test_system.py
+++ b/tests/benchmark/compute/instruction/test_system.py
@@ -352,8 +352,17 @@ def test_create(
     )
 
     if opcode == Op.CREATE2:
-        # For CREATE2, we provide an initial salt.
-        setup += Op.PUSH1(42)
+        # For CREATE2, load salt from storage (persist across outer loop calls)
+        # If storage is 0 (first call), use initial salt of 42.
+        # Stack after setup: [..., value, code_size, salt]
+        setup += (
+            Op.SLOAD(0)  # Load saved salt
+            + Op.DUP1  # Duplicate for check
+            + Op.ISZERO  # Check if zero
+            + Op.PUSH1(42)  # Default salt
+            + Op.MUL  # 42 if zero, 0 if not
+            + Op.ADD  # Add to get final salt (saved or 42)
+        )
 
     attack_block = (
         # For CREATE:
@@ -363,10 +372,16 @@ def test_create(
         if opcode == Op.CREATE
         # For CREATE2: we manually push the arguments because we leverage the
         # return value of previous CREATE2 calls as salt for the next CREATE2
-        # call.
+        # call. After CREATE2, save result to storage for next outer loop call.
         # - DUP4 is targeting the PUSH1(value) from the code_prefix.
         # - DUP3 is targeting the EXTCODESIZE value pushed in code_prefix.
-        else Op.DUP3 + Op.PUSH0 + Op.DUP4 + Op.CREATE2
+        else Op.DUP3
+        + Op.PUSH0
+        + Op.DUP4
+        + Op.CREATE2
+        + Op.DUP1
+        + Op.PUSH0
+        + Op.SSTORE
     )
 
     benchmark_test(
diff --git a/tests/benchmark/compute/precompile/test_ecrecover.py b/tests/benchmark/compute/precompile/test_ecrecover.py
index 63c4c72e08..22aca3b5ca 100644
--- a/tests/benchmark/compute/precompile/test_ecrecover.py
+++ b/tests/benchmark/compute/precompile/test_ecrecover.py
@@ -12,6 +12,7 @@
 from tests.benchmark.compute.helpers import concatenate_parameters
 
 
+@pytest.mark.repricing
 @pytest.mark.parametrize(
     "precompile_address,calldata",
     [
@@ -28,7 +29,6 @@
                 ]
             ),
             id="ecrecover",
-            marks=pytest.mark.repricing,
         )
     ],
 )
diff --git a/tests/benchmark/compute/precompile/test_point_evaluation.py b/tests/benchmark/compute/precompile/test_point_evaluation.py
index e179f27526..7597a9fa1d 100644
--- a/tests/benchmark/compute/precompile/test_point_evaluation.py
+++ b/tests/benchmark/compute/precompile/test_point_evaluation.py
@@ -13,6 +13,7 @@
 from tests.cancun.eip4844_blobs.spec import Spec as BlobsSpec
 
 
+@pytest.mark.repricing
 @pytest.mark.parametrize(
     "precompile_address,calldata",
     [
@@ -28,7 +29,6 @@
                 ]
             ),
             id="point_evaluation",
-            marks=pytest.mark.repricing,
         ),
     ],
 )
diff --git a/whitelist.txt b/whitelist.txt
index 4f73730fd5..0b2d511e4f 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -494,6 +494,7 @@ epilog
 eq
 ERC
 Erigon
+errlines
 esbenp
 etc
 ETH
@@ -770,6 +771,7 @@ makereport
 marcin
 marioevz
 markdownlint
+markexpr
 master
 matchers
 mcopy
@@ -856,6 +858,7 @@ ommers
 oneliner
 oob
 opc
+opcount
 opcode's
 OpenSSL
 oprypin
@@ -876,6 +879,7 @@ P7692
 paradigmxyz
 param
 parametrization
+parametrizations
 parametrize
 parametrized
 parametrizer