ethereum · LouisTsai-Csie · Nov 13, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py
@@ -372,6 +372,8 @@ def base_test_parametrizer_func(
         eth_rpc: EthRPC,
         engine_rpc: EngineRPC | None,
         collector: Collector,
+        gas_benchmark_value: int,
+        fixed_opcode_count: int | None,
     ) -> Type[BaseTest]:
         """
         Fixture used to instantiate an auto-fillable BaseTest object from
@@ -404,9 +406,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                     )
                 # Set default for expected_benchmark_gas_used
                 if "expected_benchmark_gas_used" not in kwargs:
-                    kwargs["expected_benchmark_gas_used"] = (
-                        request.getfixturevalue("gas_benchmark_value")
-                    )
+                    kwargs["expected_benchmark_gas_used"] = gas_benchmark_value
                 kwargs["fork"] = fork
                 kwargs |= {
                     p: request.getfixturevalue(p)

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py
@@ -1321,6 +1321,7 @@ def base_test_parametrizer_func(
         test_case_description: str,
         fixture_source_url: str,
         gas_benchmark_value: int,
+        fixed_opcode_count: int | None,
         witness_generator: Any,
     ) -> Any:
         """

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
@@ -20,19 +20,75 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         default=None,
         help="Specify gas benchmark values for tests as a comma-separated list.",
     )
+    evm_group.addoption(
+        "--fixed-opcode-count",
+        action="store",
+        dest="fixed_opcode_count",
+        type=str,
+        default=None,
+        help="Specify fixed opcode counts (in thousands) for benchmark tests as a comma-separated list.",
+    )
 
 
 @pytest.hookimpl(tryfirst=True)
 def pytest_configure(config: pytest.Config) -> None:
     """Configure the fill and execute mode to benchmarking."""
+    config.addinivalue_line(
+        "markers",
+        "repricing: Mark test as reference test for gas repricing analysis",
+    )
     if config.getoption("gas_benchmark_value"):
         config.op_mode = OpMode.BENCHMARKING  # type: ignore[attr-defined]
 
 
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: list[pytest.Item]
+) -> None:
+    """Remove non-repricing tests when --fixed-opcode-count is specified."""
+    fixed_opcode_count = config.getoption("fixed_opcode_count")
+    if not fixed_opcode_count:
+        # If --fixed-opcode-count is not specified, don't filter anything
+        return
+
+    filtered = []
+    for item in items:
+        if not item.get_closest_marker("benchmark"):
+            continue
+
+        repricing_marker = item.get_closest_marker("repricing")
+        if not repricing_marker:
+            continue
+
+        if not repricing_marker.kwargs:
+            filtered.append(item)
+            continue
+
+        if hasattr(item, "callspec"):
+            if all(
+                item.callspec.params.get(key) == value
+                for key, value in repricing_marker.kwargs.items()
+            ):
+                filtered.append(item)
+        else:
+            if not repricing_marker.kwargs:
+                filtered.append(item)
+
+    items[:] = filtered
+
+
 def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
-    """Generate tests for the gas benchmark values."""
+    """Generate tests for the gas benchmark values and fixed opcode counts."""
+    gas_benchmark_values = metafunc.config.getoption("gas_benchmark_value")
+    fixed_opcode_counts = metafunc.config.getoption("fixed_opcode_count")
+
+    # Ensure mutual exclusivity
+    if gas_benchmark_values and fixed_opcode_counts:
+        raise pytest.UsageError(
+            "--gas-benchmark-values and --fixed-opcode-count are mutually exclusive. "
+            "Use only one at a time."
+        )
+
     if "gas_benchmark_value" in metafunc.fixturenames:
-        gas_benchmark_values = metafunc.config.getoption("gas_benchmark_value")
         if gas_benchmark_values:
             gas_values = [
                 int(x.strip()) for x in gas_benchmark_values.split(",")
@@ -48,17 +104,54 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
                 "gas_benchmark_value", gas_parameters, scope="function"
             )
 
+    if "fixed_opcode_count" in metafunc.fixturenames:
+        # Only parametrize if test has repricing marker
+        has_repricing = (
+            metafunc.definition.get_closest_marker("repricing") is not None
+        )
+        if has_repricing:
+            if fixed_opcode_counts:
+                opcode_counts = [
+                    int(x.strip()) for x in fixed_opcode_counts.split(",")
+                ]
+                opcode_count_parameters = [
+                    pytest.param(
+                        opcode_count,
+                        id=f"opcount_{opcode_count}K",
+                    )
+                    for opcode_count in opcode_counts
+                ]
+                metafunc.parametrize(
+                    "fixed_opcode_count",
+                    opcode_count_parameters,
+                    scope="function",
+                )
+
 
 @pytest.fixture(scope="function")
 def gas_benchmark_value(request: pytest.FixtureRequest) -> int:
     """Return a single gas benchmark value for the current test."""
     if hasattr(request, "param"):
         return request.param
 
+    # If --fixed-opcode-count is specified, use high gas limit to avoid gas constraints
+    if request.config.getoption("fixed_opcode_count"):
+        return HIGH_GAS_LIMIT
+
     return EnvironmentDefaults.gas_limit
 
 
+@pytest.fixture(scope="function")
+def fixed_opcode_count(request: pytest.FixtureRequest) -> int | None:
+    """Return a fixed opcode count for the current test, or None if not set."""
+    if hasattr(request, "param"):
+        return request.param
+
+    return None
+
+
 BENCHMARKING_MAX_GAS = 1_000_000_000_000
+HIGH_GAS_LIMIT = 1_000_000_000
 
 
 @pytest.fixture

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py
@@ -19,6 +19,7 @@
 
 ALL_FIXTURE_PARAMETERS = {
     "gas_benchmark_value",
+    "fixed_opcode_count",
     "genesis_environment",
     "env",
 }

diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -53,6 +53,7 @@ class BenchmarkCodeGenerator(ABC):
     setup: Bytecode = field(default_factory=Bytecode)
     cleanup: Bytecode = field(default_factory=Bytecode)
     tx_kwargs: Dict[str, Any] = field(default_factory=dict)
+    fixed_opcode_count: int | None = None
     code_padding_opcode: Op | None = None
     _contract_address: Address | None = None
 
@@ -61,6 +62,50 @@ def deploy_contracts(self, *, pre: Alloc, fork: Fork) -> Address:
         """Deploy any contracts needed for the benchmark."""
         ...
 
+    def deploy_fix_count_contracts(self, *, pre: Alloc, fork: Fork) -> Address:
+        """Deploy the contract with a fixed opcode count."""
+        code = self.generate_repeated_code(
+            repeated_code=self.attack_block,
+            setup=self.setup,
+            cleanup=self.cleanup,
+            fork=fork,
+        )
+        self._target_contract_address = pre.deploy_contract(code=code)
+
+        iterations = self.fixed_opcode_count
+        assert iterations is not None, "fixed_opcode_count is not set"
+
+        prefix = Op.CALLDATACOPY(
+            Op.PUSH0, Op.PUSH0, Op.CALLDATASIZE
+        ) + Op.PUSH4(iterations)
+        opcode = (
+            prefix
+            + Op.JUMPDEST
+            + Op.POP(
+                Op.STATICCALL(
+                    gas=Op.GAS,
+                    address=self._target_contract_address,
+                    args_offset=0,
+                    args_size=Op.CALLDATASIZE,
+                    ret_offset=0,
+                    ret_size=0,
+                )
+            )
+            + Op.PUSH1(1)
+            + Op.SWAP1
+            + Op.SUB
+            + Op.DUP1
+            + Op.ISZERO
+            + Op.ISZERO
+            + Op.PUSH1(len(prefix))
+            + Op.JUMPI
+            + Op.STOP
+        )
+        self._validate_code_size(opcode, fork)
+
+        self._contract_address = pre.deploy_contract(code=opcode)
+        return self._contract_address
+
     def generate_transaction(
         self, *, pre: Alloc, gas_benchmark_value: int
     ) -> Transaction:
@@ -102,9 +147,18 @@ def generate_repeated_code(
         available_space = max_code_size - overhead
         max_iterations = available_space // len(repeated_code)
 
+        # Use fixed_opcode_count if provided, otherwise fill to max
+        if self.fixed_opcode_count is not None:
+            max_iterations = min(max_iterations, 1000)
+
+        print(f"max_iterations: {max_iterations}")
+
         # TODO: Unify the PUSH0 and PUSH1 usage.
-        code = setup + Op.JUMPDEST + repeated_code * max_iterations + cleanup
-        code += Op.JUMP(len(setup)) if len(setup) > 0 else Op.PUSH0 + Op.JUMP
+        code = setup + Op.JUMPDEST + repeated_code * max_iterations
+        if self.fixed_opcode_count is None:
+            code += cleanup + (
+                Op.JUMP(len(setup)) if len(setup) > 0 else Op.PUSH0 + Op.JUMP
+            )
         # Pad the code to the maximum code size.
         if self.code_padding_opcode is not None:
             code += self.code_padding_opcode * (max_code_size - len(code))
@@ -142,6 +196,7 @@ class BenchmarkTest(BaseTest):
     gas_benchmark_value: int = Field(
         default_factory=lambda: int(Environment().gas_limit)
     )
+    fixed_opcode_count: int | None = None
     code_generator: BenchmarkCodeGenerator | None = None
 
     supported_fixture_formats: ClassVar[
@@ -163,6 +218,7 @@ class BenchmarkTest(BaseTest):
     supported_markers: ClassVar[Dict[str, str]] = {
         "blockchain_test_engine_only": "Only generate a blockchain test engine fixture",
         "blockchain_test_only": "Only generate a blockchain test fixture",
+        "repricing": "Mark test as reference test for gas repricing analysis",
     }
 
     def model_post_init(self, __context: Any, /) -> None:
@@ -193,7 +249,18 @@ def model_post_init(self, __context: Any, /) -> None:
         blocks: List[Block] = self.setup_blocks
 
         if self.code_generator is not None:
-            generated_blocks = self.generate_blocks_from_code_generator()
+            # Inject fixed_opcode_count into the code generator if provided
+            self.code_generator.fixed_opcode_count = self.fixed_opcode_count
+
+            # In fixed opcode count mode, skip gas validation since we're
+            # measuring performance by operation count, not gas usage
+            if self.fixed_opcode_count is not None:
+                self.skip_gas_used_validation = True
+                generated_blocks = (
+                    self.generate_fixed_opcode_count_transactions()
+                )
+            else:
+                generated_blocks = self.generate_blocks_from_code_generator()
             blocks += generated_blocks
 
         elif self.blocks is not None:
@@ -294,6 +361,22 @@ def generate_blocks_from_code_generator(self) -> List[Block]:
 
         return [execution_block]
 
+    def generate_fixed_opcode_count_transactions(self) -> List[Block]:
+        """Generate transactions with a fixed opcode count."""
+        if self.code_generator is None:
+            raise Exception("Code generator is not set")
+        self.code_generator.deploy_fix_count_contracts(
+            pre=self.pre, fork=self.fork
+        )
+        gas_limit = (
+            self.fork.transaction_gas_limit_cap() or self.gas_benchmark_value
+        )
+        benchmark_tx = self.code_generator.generate_transaction(
+            pre=self.pre, gas_benchmark_value=gas_limit
+        )
+        execution_block = Block(txs=[benchmark_tx])
+        return [execution_block]
+
     def generate_blockchain_test(self) -> BlockchainTest:
         """Create a BlockchainTest from this BenchmarkTest."""
         return BlockchainTest.from_test(

diff --git a/tests/benchmark/compute/instruction/test_account_query.py b/tests/benchmark/compute/instruction/test_account_query.py
@@ -40,6 +40,7 @@
 )
 
 
+@pytest.mark.repricing(contract_balance=0)
 @pytest.mark.parametrize("contract_balance", [0, 1])
 def test_selfbalance(
     benchmark_test: BenchmarkTestFiller,
@@ -54,6 +55,7 @@ def test_selfbalance(
     )
 
 
+@pytest.mark.repricing
 def test_codesize(
     benchmark_test: BenchmarkTestFiller,
 ) -> None:
@@ -345,6 +347,7 @@ def test_extcodecopy_warm(
     benchmark_test(tx=tx)
 
 
+@pytest.mark.repricing(absent_target=False)
 @pytest.mark.parametrize(
     "opcode",
     [