diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py index 1bdde10539..9736480f44 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py @@ -649,7 +649,6 @@ def base_test_parametrizer_func( class BaseTestWrapper(cls): # type: ignore def __init__(self, *args: Any, **kwargs: Any) -> None: - kwargs["t8n_dump_dir"] = None if "pre" not in kwargs: kwargs["pre"] = pre elif kwargs["pre"] != pre: diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py index df35ffa019..099754eef2 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py @@ -8,14 +8,12 @@ import configparser import datetime -import hashlib import json import os import warnings -from collections import OrderedDict from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, Generator, List, Self, Set, Tuple, Type +from typing import Any, Dict, Generator, List, Self, Set, Type import pytest import xdist @@ -33,7 +31,7 @@ from execution_testing.cli.gen_index import ( generate_fixtures_index, ) -from execution_testing.client_clis import TransitionTool, TransitionToolOutput +from execution_testing.client_clis import TransitionTool from execution_testing.client_clis.clis.geth import FixtureConsumerTool from execution_testing.fixtures import ( BaseFixture, @@ -46,7 +44,7 @@ PreAllocGroupBuilders, PreAllocGroups, TestInfo, - strip_fixture_format_from_nodeid, + strip_fixture_format_from_node, ) from execution_testing.forks import ( Fork, @@ -73,51 +71,6 @@ from .fixture_output import FixtureOutput -class T8nOutputCache: - """ - Bounded LRU cache for t8n outputs. - - With xdist_group ensuring related formats run consecutively on the same - worker, we only need to hold entries for 1-2 test cases at a time. - """ - - def __init__(self, maxsize: int = 3): - """Initialize the cache with a maximum size.""" - self._cache: OrderedDict[ - Tuple[str, str, int], TransitionToolOutput - ] = OrderedDict() - self._maxsize = maxsize - self.hits = 0 - self.misses = 0 - - def get(self, key: Tuple[str, str, int]) -> TransitionToolOutput | None: - """Get a value from the cache, marking it as recently used.""" - if key in self._cache: - self._cache.move_to_end(key) - self.hits += 1 - return self._cache[key] - self.misses += 1 - return None - - def set( - self, key: Tuple[str, str, int], value: TransitionToolOutput - ) -> None: - """Set a value in the cache, evicting oldest if at capacity.""" - if key in self._cache: - self._cache.move_to_end(key) - self._cache[key] = value - else: - if len(self._cache) >= self._maxsize: - self._cache.popitem(last=False) - self._cache[key] = value - - def stats(self) -> str: - """Return cache statistics.""" - total = self.hits + self.misses - rate = (self.hits / total * 100) if total > 0 else 0 - return f"hits={self.hits}, misses={self.misses}, rate={rate:.1f}%" - - @dataclass(kw_only=True) class PhaseManager: """ @@ -283,7 +236,6 @@ class FillingSession: format_selector: FormatSelector pre_alloc_groups: PreAllocGroups | None = None pre_alloc_group_builders: PreAllocGroupBuilders | None = None - t8n_output_cache: T8nOutputCache = field(default_factory=T8nOutputCache) @classmethod def from_config(cls, config: pytest.Config) -> "Self": @@ -413,6 +365,16 @@ def save_pre_alloc_groups(self) -> None: self.pre_alloc_group_builders.to_folder(pre_alloc_folder) +@dataclass(kw_only=True) +class TransitionToolCacheStats: + """Stats for caching of the transition tool requests.""" + + key_test_hits: int = 0 + key_test_miss: int = 0 + subkey_test_hits: int = 0 + subkey_test_miss: int = 0 + + def calculate_post_state_diff( post_state: Alloc, genesis_state: Alloc ) -> Alloc: @@ -881,6 +843,20 @@ def pytest_terminal_summary( yield if config.fixture_output.is_stdout or hasattr(config, "workerinput"): # type: ignore[attr-defined] return + t8n_cache_stats: TransitionToolCacheStats | None = getattr( + config, "transition_tool_cache_stats", None + ) + if t8n_cache_stats is not None: + terminalreporter.write_sep( + "=", + f" Transition tool cache stats:" + f"- Key test hits: {t8n_cache_stats.key_test_hits}" + f"- Key test misses: {t8n_cache_stats.key_test_miss}" + f"- Subkey test hits: {t8n_cache_stats.subkey_test_hits}" + f"- Subkey test misses: {t8n_cache_stats.subkey_test_miss}", + bold=True, + green=True, + ) stats = terminalreporter.stats if "passed" in stats and stats["passed"]: # Custom message for Phase 1 (pre-allocation group generation) @@ -1042,7 +1018,7 @@ def verify_fixtures_bin(request: pytest.FixtureRequest) -> Path | None: @pytest.fixture(autouse=True, scope="session") -def t8n( +def session_t8n( request: pytest.FixtureRequest, ) -> Generator[TransitionTool, None, None]: """Return configured transition tool.""" @@ -1060,6 +1036,59 @@ def t8n( t8n.shutdown() +def get_t8n_cache_key(request: pytest.FixtureRequest) -> str | None: + """Get the cache key to be used for the current test, if any.""" + mark: pytest.Mark = request.node.get_closest_marker( + "transition_tool_cache_key" + ) + if mark is not None and len(mark.args) == 1: + return f"{strip_fixture_format_from_node(request.node)}-{mark.args[0]}" + return None + + +@pytest.fixture(autouse=True, scope="session") +def transition_tool_cache_stats( + request: pytest.FixtureRequest, +) -> Generator[TransitionToolCacheStats, None, None]: + """Get the transition tool cache stats.""" + stats = TransitionToolCacheStats() + yield stats + request.config.transition_tool_cache_stats = stats # type: ignore[attr-defined] + + +@pytest.fixture(autouse=True, scope="function") +def t8n( + request: pytest.FixtureRequest, + session_t8n: TransitionTool, + dump_dir_parameter_level: Path | None, + transition_tool_cache_stats: TransitionToolCacheStats, +) -> Generator[TransitionTool, None, None]: + """Set the transition tool up for the current test.""" + if transition_tool_cache_key := get_t8n_cache_key(request): + # This test is allowed to cache results + if session_t8n.set_cache(key=transition_tool_cache_key): + transition_tool_cache_stats.key_test_hits += 1 + else: + transition_tool_cache_stats.key_test_miss += 1 + else: + # Test cannot use output cache, remove it + session_t8n.remove_cache() + # Reset the traces + session_t8n.reset_traces() + session_t8n.call_counter = 0 + session_t8n.debug_dump_dir = dump_dir_parameter_level + # TODO: Configure the transition tool to count opcodes only when required. + session_t8n.reset_opcode_count() + yield session_t8n + if session_t8n.output_cache is not None: + transition_tool_cache_stats.subkey_test_hits += ( + session_t8n.output_cache.hits + ) + transition_tool_cache_stats.subkey_test_miss += ( + session_t8n.output_cache.misses + ) + + @pytest.fixture(scope="session") def do_fixture_verification( request: pytest.FixtureRequest, verify_fixtures_bin: Path | None @@ -1381,7 +1410,6 @@ def base_test_parametrizer_func( reference_spec: ReferenceSpec, pre: Alloc, output_dir: Path, - dump_dir_parameter_level: Path | None, fixture_collector: FixtureCollector, test_case_description: str, fixture_source_url: str, @@ -1412,7 +1440,6 @@ def base_test_parametrizer_func( class BaseTestWrapper(cls): # type: ignore def __init__(self, *args: Any, **kwargs: Any) -> None: - kwargs["t8n_dump_dir"] = dump_dir_parameter_level if "pre" not in kwargs: kwargs["pre"] = pre if "expected_benchmark_gas_used" not in kwargs: @@ -1653,13 +1680,14 @@ def pytest_collection_modifyitems( # Skip if test already has an xdist_group marker (e.g., bigmem). for item in items: if not item.get_closest_marker("xdist_group"): - base_nodeid = strip_fixture_format_from_nodeid(item.nodeid) - h = hashlib.md5(base_nodeid.encode()).hexdigest()[:8] - item.add_marker(pytest.mark.xdist_group(name=f"t8n-cache-{h}")) + base_nodeid = strip_fixture_format_from_node(item) + item.add_marker( + pytest.mark.xdist_group(name=f"t8n-cache-{base_nodeid}") + ) # Sort items so related formats run consecutively for cache hits. # This ensures deterministic execution order within xdist workers. - items.sort(key=lambda item: strip_fixture_format_from_nodeid(item.nodeid)) + items.sort(key=lambda item: strip_fixture_format_from_node(item)) def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py index 991d00117e..5f519ff29c 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py @@ -1,266 +1,206 @@ """Unit tests for the t8n output cache functionality.""" import hashlib -from dataclasses import dataclass, field from typing import Any import pytest from execution_testing.fixtures import ( - get_all_fixture_format_names, - strip_fixture_format_from_nodeid, + BlockchainEngineFixture, + BlockchainFixture, + FixtureFormat, + LabeledFixtureFormat, + StateFixture, + strip_fixture_format_from_node, ) -from ..filler import T8nOutputCache, _strip_xdist_group_suffix +from ...shared.helpers import labeled_format_parameter_set +from ..filler import _strip_xdist_group_suffix -class TestT8nOutputCache: - """Test cases for the T8nOutputCache LRU cache.""" - - def test_cache_basic_operations(self) -> None: - """Test basic get/set operations.""" - cache = T8nOutputCache(maxsize=3) - - key = ("test.py::test[fork_Osaka]", "Osaka", 0) - value = object() # Mock TransitionToolOutput - - assert cache.get(key) is None - cache.set(key, value) # type: ignore[arg-type] - assert cache.get(key) is value - - def test_cache_lru_eviction(self) -> None: - """Test LRU eviction when cache exceeds maxsize.""" - cache = T8nOutputCache(maxsize=2) - - key1 = ("test1", "Osaka", 0) - key2 = ("test2", "Osaka", 0) - key3 = ("test3", "Osaka", 0) - - cache.set(key1, "value1") # type: ignore[arg-type] - cache.set(key2, "value2") # type: ignore[arg-type] - - # Both should be present. - assert cache.get(key1) == "value1" - assert cache.get(key2) == "value2" - - # Adding key3 should evict key1 (oldest after key2 access). - cache.set(key3, "value3") # type: ignore[arg-type] - assert cache.get(key1) is None # Evicted - assert cache.get(key2) == "value2" - assert cache.get(key3) == "value3" - - def test_cache_lru_access_updates_order(self) -> None: - """Test that get() updates LRU order.""" - cache = T8nOutputCache(maxsize=2) - - key1 = ("test1", "Osaka", 0) - key2 = ("test2", "Osaka", 0) - key3 = ("test3", "Osaka", 0) - - cache.set(key1, "value1") # type: ignore[arg-type] - cache.set(key2, "value2") # type: ignore[arg-type] - - # Access key1 to make it recently used. - cache.get(key1) - - # Adding key3 should evict key2 (now oldest). - cache.set(key3, "value3") # type: ignore[arg-type] - assert cache.get(key1) == "value1" - assert cache.get(key2) is None # Evicted - assert cache.get(key3) == "value3" - - def test_cache_hit_miss_tracking(self) -> None: - """Test that hits and misses are tracked correctly.""" - cache = T8nOutputCache(maxsize=3) - - key = ("test", "Osaka", 0) - - # Miss. - cache.get(key) - assert cache.hits == 0 - assert cache.misses == 1 - - # Set and hit. - cache.set(key, "value") # type: ignore[arg-type] - cache.get(key) - assert cache.hits == 1 - assert cache.misses == 1 - - # Another hit. - cache.get(key) - assert cache.hits == 2 - assert cache.misses == 1 - - def test_cache_stats(self) -> None: - """Test stats() output format.""" - cache = T8nOutputCache(maxsize=3) - - key = ("test", "Osaka", 0) - cache.set(key, "value") # type: ignore[arg-type] - cache.get(key) # Hit - cache.get(("other", "Osaka", 0)) # Miss - - stats = cache.stats() - assert "hits=1" in stats - assert "misses=1" in stats - assert "rate=50.0%" in stats - - def test_cache_stats_empty(self) -> None: - """Test stats() with no operations.""" - cache = T8nOutputCache(maxsize=3) - stats = cache.stats() - assert "hits=0" in stats - assert "misses=0" in stats - assert "rate=0.0%" in stats - - def test_cache_update_existing_key(self) -> None: - """Test that setting an existing key updates value and LRU order.""" - cache = T8nOutputCache(maxsize=2) - - key1 = ("test1", "Osaka", 0) - key2 = ("test2", "Osaka", 0) - key3 = ("test3", "Osaka", 0) - - cache.set(key1, "old") # type: ignore[arg-type] - cache.set(key2, "value2") # type: ignore[arg-type] +class MockItem: + """Mock pytest.Item for testing collection sorting behavior.""" - # Update key1 (moves to end). - cache.set(key1, "new") # type: ignore[arg-type] - assert cache.get(key1) == "new" + nodeid: str + name: str + _markers: list[pytest.Mark] + + def __init__( + self, + nodeid: str, + fixture_format: LabeledFixtureFormat | FixtureFormat | None, + name: str | None = None, + ) -> None: + """Initialize name from nodeid if not provided.""" + self.nodeid = nodeid + if not name: + parts = nodeid.split("::") + name = parts[-1] if "::" in nodeid else nodeid + self.name = name + self._markers = [] + if fixture_format is not None: + param = labeled_format_parameter_set(fixture_format) + for mark in param.marks: + self._markers.append(mark) # type: ignore[arg-type] + + def get_closest_marker(self, name: str) -> pytest.Mark | None: + """Return marker by name if present.""" + for marker in self._markers: + if marker.name == name: + return marker + return None - # Adding key3 should evict key2 (now oldest). - cache.set(key3, "value3") # type: ignore[arg-type] - assert cache.get(key1) == "new" - assert cache.get(key2) is None + def add_marker(self, marker: Any) -> None: + """Add a marker to the item.""" + self._markers.append(marker) class TestStripFixtureFormatFromNodeid: - """Test cases for strip_fixture_format_from_nodeid function.""" + """Test cases for strip_fixture_format_from_node function.""" def test_strip_blockchain_test(self) -> None: """Test stripping blockchain_test format.""" - nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test]" - expected = "tests/test.py::test_foo[fork_Osaka]" - assert strip_fixture_format_from_nodeid(nodeid) == expected + item = MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test]", + BlockchainFixture, + ) + expected = "tests/test.py::test_foo[fork_Osaka-]" + assert strip_fixture_format_from_node(item) == expected def test_strip_blockchain_test_engine(self) -> None: """Test stripping blockchain_test_engine format.""" - nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]" - expected = "tests/test.py::test_foo[fork_Osaka]" - assert strip_fixture_format_from_nodeid(nodeid) == expected + item = MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ) + expected = "tests/test.py::test_foo[fork_Osaka-]" + assert strip_fixture_format_from_node(item) == expected def test_strip_state_test(self) -> None: """Test stripping state_test format.""" - nodeid = "tests/test.py::test_foo[fork_Osaka-state_test]" - expected = "tests/test.py::test_foo[fork_Osaka]" - assert strip_fixture_format_from_nodeid(nodeid) == expected + item = MockItem( + "tests/test.py::test_foo[fork_Osaka-state_test]", + StateFixture, + ) + expected = "tests/test.py::test_foo[fork_Osaka-]" + assert strip_fixture_format_from_node(item) == expected def test_strip_format_in_middle(self) -> None: """Test stripping format when it's in the middle of params.""" - nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]" - expected = "tests/test.py::test_foo[fork_Osaka-param1]" - assert strip_fixture_format_from_nodeid(nodeid) == expected + item = MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]", + BlockchainFixture, + ) + expected = "tests/test.py::test_foo[fork_Osaka--param1]" + assert strip_fixture_format_from_node(item) == expected def test_no_format_unchanged(self) -> None: """Test that nodeids without fixture format are unchanged.""" - nodeid = "tests/test.py::test_foo[fork_Osaka-some_param]" - assert strip_fixture_format_from_nodeid(nodeid) == nodeid + item = MockItem( + "tests/test.py::test_foo[fork_Osaka-some_param]", + None, + ) + assert strip_fixture_format_from_node(item) == item.nodeid def test_no_params_unchanged(self) -> None: """Test that nodeids without parameters are unchanged.""" - nodeid = "tests/test.py::test_foo" - assert strip_fixture_format_from_nodeid(nodeid) == nodeid + item = MockItem( + "tests/test.py::test_foo", + None, + ) + assert strip_fixture_format_from_node(item) == item.nodeid def test_empty_params_unchanged(self) -> None: """Test that nodeids with empty params are unchanged.""" - nodeid = "tests/test.py::test_foo[]" - assert strip_fixture_format_from_nodeid(nodeid) == nodeid + item = MockItem( + "tests/test.py::test_foo[]", + None, + ) + assert strip_fixture_format_from_node(item) == item.nodeid def test_format_at_start(self) -> None: """Test stripping format at start of params.""" - nodeid = "tests/test.py::test_foo[blockchain_test-fork_Osaka]" - expected = "tests/test.py::test_foo[fork_Osaka]" - assert strip_fixture_format_from_nodeid(nodeid) == expected + item = MockItem( + "tests/test.py::test_foo[blockchain_test-fork_Osaka]", + BlockchainFixture, + ) + expected = "tests/test.py::test_foo[-fork_Osaka]" + assert strip_fixture_format_from_node(item) == expected + + def test_only_format(self) -> None: + """Test stripping format at start of params.""" + item = MockItem( + "tests/test.py::test_foo[blockchain_test]", + BlockchainFixture, + ) + expected = "tests/test.py::test_foo[]" + assert strip_fixture_format_from_node(item) == expected def test_related_formats_same_base(self) -> None: """Test that related formats produce the same base nodeid.""" - base_nodeid = "tests/test.py::test_foo[fork_Osaka-param1]" + base_nodeid = "tests/test.py::test_foo[fork_Osaka--param1]" - nodeid_bt = ( - "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]" + node_bt = MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]", + BlockchainFixture, ) - nodeid_bte = ( - "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine-param1]" + node_bte = MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine-param1]", + BlockchainEngineFixture, ) # Both should strip to the same base. - assert strip_fixture_format_from_nodeid(nodeid_bt) == base_nodeid - assert strip_fixture_format_from_nodeid(nodeid_bte) == base_nodeid + assert strip_fixture_format_from_node(node_bt) == base_nodeid + assert strip_fixture_format_from_node(node_bte) == base_nodeid def test_longer_format_matched_first(self) -> None: """Test that longer format names are matched before shorter ones.""" # blockchain_test_engine should match before blockchain_test. - nodeid = "tests/test.py::test[fork_Osaka-blockchain_test_engine]" - expected = "tests/test.py::test[fork_Osaka]" - result = strip_fixture_format_from_nodeid(nodeid) + node = MockItem( + "tests/test.py::test[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ) + expected = "tests/test.py::test[fork_Osaka-]" + result = strip_fixture_format_from_node(node) assert result == expected # Verify it didn't partially match blockchain_test. assert "blockchain_test" not in result -class TestGetAllFixtureFormatNames: - """Test cases for get_all_fixture_format_names function.""" - - def test_returns_tuple(self) -> None: - """Test that function returns a tuple (hashable for lru_cache).""" - result = get_all_fixture_format_names() - assert isinstance(result, tuple) - - def test_contains_known_formats(self) -> None: - """Test that common fixture formats are included.""" - formats = get_all_fixture_format_names() - assert "blockchain_test" in formats - assert "state_test" in formats - - def test_sorted_by_length_descending(self) -> None: - """Test that formats are sorted by length (longest first).""" - formats = get_all_fixture_format_names() - lengths = [len(f) for f in formats] - assert lengths == sorted(lengths, reverse=True) - - def test_blockchain_test_engine_before_blockchain_test(self) -> None: - """Test that longer names come before their prefixes.""" - formats = get_all_fixture_format_names() - # blockchain_test_engine is longer and should come first. - has_engine = "blockchain_test_engine" in formats - has_bt = "blockchain_test" in formats - if has_engine and has_bt: - idx_engine = formats.index("blockchain_test_engine") - idx_bt = formats.index("blockchain_test") - assert idx_engine < idx_bt - - class TestCacheKeyConsistency: """Test that cache keys are consistent across fixture formats.""" @pytest.mark.parametrize( - "format_name", + "labeled_fixture_format,format_name", [ - "blockchain_test", - "blockchain_test_engine", - "state_test", - "blockchain_test_from_state_test", - "blockchain_test_engine_from_state_test", + (BlockchainFixture, "blockchain_test"), + (BlockchainEngineFixture, "blockchain_test_engine"), + (StateFixture, "state_test"), + ( + LabeledFixtureFormat( + BlockchainFixture, "blockchain_test_from_state_test", "" + ), + "blockchain_test_from_state_test", + ), + ( + LabeledFixtureFormat( + BlockchainEngineFixture, + "blockchain_test_engine_from_state_test", + "", + ), + "blockchain_test_engine_from_state_test", + ), ], ) def test_format_stripping_produces_consistent_key( - self, format_name: str + self, labeled_fixture_format: LabeledFixtureFormat, format_name: str ) -> None: """Test that all format variants produce the same base key.""" - base = "tests/test.py::test_case[fork_Osaka-param1]" + base = "tests/test.py::test_case[fork_Osaka--param1]" nodeid = f"tests/test.py::test_case[fork_Osaka-{format_name}-param1]" + node = MockItem(nodeid, labeled_fixture_format) - result = strip_fixture_format_from_nodeid(nodeid) + result = strip_fixture_format_from_node(node) assert result == base, f"Format {format_name} did not strip correctly" @@ -306,22 +246,34 @@ def test_blockchain_test_sorts_before_blockchain_test_engine(self) -> None: def test_related_formats_group_together_when_sorted(self) -> None: """Test that sorting by base nodeid groups related formats together.""" - nodeids = [ - "tests/test.py::test_foo[fork_Osaka-blockchain_test]", - "tests/test.py::test_bar[fork_Osaka-blockchain_test]", - "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", - "tests/test.py::test_bar[fork_Osaka-blockchain_test_engine]", + nodes = [ + MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_bar[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ), + MockItem( + "tests/test.py::test_bar[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ), ] # Sort by base nodeid (as the collection hook does). - sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid) + sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node) # Related formats should be adjacent after sorting. test_bar_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_bar" in n + i for i, n in enumerate(sorted_nodes) if "test_bar" in n.nodeid ] test_foo_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_foo" in n + i for i, n in enumerate(sorted_nodes) if "test_foo" in n.nodeid ] # Check adjacency: indices should be consecutive. @@ -330,21 +282,30 @@ def test_related_formats_group_together_when_sorted(self) -> None: def test_related_formats_grouped_when_sorted(self) -> None: """Test sorting groups related formats together (same base nodeid).""" - nodeids = [ - "tests/test.py::test_foo[fork_Osaka-blockchain_test]", - "tests/test.py::test_bar[fork_Osaka-blockchain_test]", - "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", + nodes = [ + MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_bar[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ), ] # Sort by base nodeid. - sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid) + sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node) # test_bar items should be adjacent, test_foo items should be adjacent. foo_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_foo" in n + i for i, n in enumerate(sorted_nodes) if "test_foo" in n.nodeid ] bar_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_bar" in n + i for i, n in enumerate(sorted_nodes) if "test_bar" in n.nodeid ] # Check foo items are adjacent (difference is 1). @@ -354,23 +315,35 @@ def test_related_formats_grouped_when_sorted(self) -> None: def test_sorting_groups_multiple_tests_by_base_nodeid(self) -> None: """Test sorting groups items by base nodeid.""" - nodeids = [ + nodes = [ # Deliberately interleaved: test_a and test_b formats mixed. - "tests/test.py::test_b[fork_Osaka-blockchain_test_engine]", - "tests/test.py::test_a[fork_Osaka-blockchain_test]", - "tests/test.py::test_b[fork_Osaka-blockchain_test]", - "tests/test.py::test_a[fork_Osaka-blockchain_test_engine]", + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, + ), ] # Sort by base nodeid. - sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid) + sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node) # After sorting, test_a formats should be adjacent, test_b adjacent. test_a_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_a" in n + i for i, n in enumerate(sorted_nodes) if "test_a" in n.nodeid ] test_b_indices = [ - i for i, n in enumerate(sorted_nodeids) if "test_b" in n + i for i, n in enumerate(sorted_nodes) if "test_b" in n.nodeid ] # Check test_a items are adjacent. @@ -379,45 +352,17 @@ def test_sorting_groups_multiple_tests_by_base_nodeid(self) -> None: assert max(test_b_indices) - min(test_b_indices) == 1 -@dataclass -class MockItem: - """Mock pytest.Item for testing collection sorting behavior.""" - - nodeid: str - name: str = "" - _markers: list[Any] = field(default_factory=list) - - def __post_init__(self) -> None: - """Initialize name from nodeid if not provided.""" - if not self.name: - parts = self.nodeid.split("::") - self.name = parts[-1] if "::" in self.nodeid else self.nodeid - - def get_closest_marker(self, name: str) -> Any: - """Return marker by name if present.""" - for marker in self._markers: - if hasattr(marker, "name") and marker.name == name: - return marker - return None - - def add_marker(self, marker: Any) -> None: - """Add a marker to the item.""" - self._markers.append(marker) - - class TestCollectionSortingBehavior: """Test collection sorting behavior ensures cache hits.""" def _sort_items_by_base_nodeid(self, items: list[MockItem]) -> None: """Sort items by base nodeid (cache-friendly order).""" - items.sort( - key=lambda item: strip_fixture_format_from_nodeid(item.nodeid) - ) + items.sort(key=lambda item: strip_fixture_format_from_node(item)) def _add_xdist_markers(self, items: list[MockItem]) -> None: """Add xdist_group markers based on base nodeid hash.""" for item in items: - base_nodeid = strip_fixture_format_from_nodeid(item.nodeid) + base_nodeid = strip_fixture_format_from_node(item) h = hashlib.md5(base_nodeid.encode()).hexdigest()[:8] item.add_marker(pytest.mark.xdist_group(name=f"t8n-cache-{h}")) @@ -446,8 +391,14 @@ def _simulate_collection_with_xdist_fixed( def test_items_sorted_without_xdist(self) -> None: """Test that items are sorted when xdist is NOT enabled.""" items = [ - MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"), - MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"), + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), ] self._simulate_collection_without_xdist(items) @@ -459,8 +410,14 @@ def test_items_sorted_without_xdist(self) -> None: def test_items_sorted_with_xdist_current_behavior(self) -> None: """Test current behavior: items ARE sorted with xdist (FIXED).""" items = [ - MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"), - MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"), + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), ] self._simulate_collection_with_xdist_current(items) @@ -474,8 +431,14 @@ def test_items_sorted_with_xdist_current_behavior(self) -> None: def test_items_should_be_sorted_with_xdist(self) -> None: """Test items SHOULD be sorted with xdist (expected fix).""" items = [ - MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"), - MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"), + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), ] self._simulate_collection_with_xdist_fixed(items) @@ -487,9 +450,13 @@ def test_items_should_be_sorted_with_xdist(self) -> None: def test_xdist_groups_have_consistent_hash(self) -> None: """Test xdist_group markers use consistent hashes.""" items = [ - MockItem("tests/test.py::test_foo[fork_Osaka-blockchain_test]"), MockItem( - "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]" + "tests/test.py::test_foo[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]", + BlockchainEngineFixture, ), ] @@ -512,18 +479,24 @@ def test_xdist_groups_have_consistent_hash(self) -> None: def test_current_matches_expected_behavior_xdist(self) -> None: """Test CURRENT xdist behavior matches EXPECTED (FIXED).""" - nodeids = [ - "tests/test.py::test_b[fork_Osaka-blockchain_test]", - "tests/test.py::test_a[fork_Osaka-blockchain_test]", + items = [ + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), ] # Simulate current (fixed) behavior. - current_items = [MockItem(n) for n in nodeids] + current_items = items[:] self._simulate_collection_with_xdist_current(current_items) current_order = [i.nodeid for i in current_items] # Simulate expected behavior. - expected_items = [MockItem(n) for n in nodeids] + expected_items = items[:] self._simulate_collection_with_xdist_fixed(expected_items) expected_order = [i.nodeid for i in expected_items] @@ -534,13 +507,17 @@ def test_current_matches_expected_behavior_xdist(self) -> None: def test_xdist_sorting_required_for_cache_hits(self) -> None: """Test xdist collection sorts items for deterministic cache hits.""" - nodeids = [ - "tests/test.py::test_b[fork_Osaka-blockchain_test]", - "tests/test.py::test_a[fork_Osaka-blockchain_test]", + items = [ + MockItem( + "tests/test.py::test_b[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), + MockItem( + "tests/test.py::test_a[fork_Osaka-blockchain_test]", + BlockchainFixture, + ), ] - # Simulate current behavior with xdist. - items = [MockItem(n) for n in nodeids] self._simulate_collection_with_xdist_current(items) # FIXED: Items are sorted (test_a before test_b). diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py index b059420838..a497517a16 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py @@ -183,6 +183,15 @@ def pytest_configure(config: pytest.Config) -> None: "markers", "fully_tagged: Marks a static test as fully tagged with all metadata.", ) + config.addinivalue_line( + "markers", + "fixture_format_id: ID used to describe the fixture format.", + ) + config.addinivalue_line( + "markers", + "transition_tool_cache_key: Key used to match the transition tool " + "cache for the test during fill.", + ) @pytest.fixture(scope="function") diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py index 1da0a41fd9..8b37e01909 100644 --- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py +++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py @@ -46,12 +46,22 @@ def labeled_format_parameter_set( The label will be used in the test id and also will be added as a marker to the generated test case when filling/executing the test. """ + transition_tool_cache_key = getattr( + format_with_or_without_label, "transition_tool_cache_key", "" + ) + if transition_tool_cache_key: + marks = [ + pytest.mark.transition_tool_cache_key(transition_tool_cache_key), + ] + else: + marks = [] if isinstance( format_with_or_without_label, LabeledExecuteFormat ) or isinstance(format_with_or_without_label, LabeledFixtureFormat): + parameter_id = format_with_or_without_label.label return pytest.param( format_with_or_without_label.format, - id=format_with_or_without_label.label, + id=parameter_id, marks=[ getattr( pytest.mark, @@ -59,20 +69,25 @@ def labeled_format_parameter_set( ), getattr( pytest.mark, - format_with_or_without_label.label.lower(), + parameter_id.lower(), ), - ], + pytest.mark.fixture_format_id(parameter_id), + ] + + marks, ) else: + parameter_id = format_with_or_without_label.format_name.lower() return pytest.param( format_with_or_without_label, - id=format_with_or_without_label.format_name.lower(), + id=parameter_id, marks=[ getattr( pytest.mark, - format_with_or_without_label.format_name.lower(), - ) - ], + parameter_id, + ), + pytest.mark.fixture_format_id(parameter_id), + ] + + marks, ) diff --git a/packages/testing/src/execution_testing/client_clis/clis/besu.py b/packages/testing/src/execution_testing/client_clis/clis/besu.py index 3a75c202d4..72690da7da 100644 --- a/packages/testing/src/execution_testing/client_clis/clis/besu.py +++ b/packages/testing/src/execution_testing/client_clis/clis/besu.py @@ -21,6 +21,7 @@ from ..cli_types import TransitionToolOutput from ..transition_tool import ( + Profiler, TransitionTool, dump_files_to_directory, model_dump_config, @@ -113,15 +114,16 @@ def shutdown(self) -> None: if self.besu_trace_dir: self.besu_trace_dir.cleanup() - def evaluate( + def _evaluate( self, *, transition_tool_data: TransitionTool.TransitionToolData, - debug_output_path: str = "", - slow_request: bool = False, + debug_output_path: Path | None, + slow_request: bool, + profiler: Profiler, ) -> TransitionToolOutput: """Execute `evm t8n` with the specified arguments.""" - del slow_request + del slow_request, profiler if not self.process: self.start_server() diff --git a/packages/testing/src/execution_testing/client_clis/clis/evmone.py b/packages/testing/src/execution_testing/client_clis/clis/evmone.py index e474e752aa..8157d29f93 100644 --- a/packages/testing/src/execution_testing/client_clis/clis/evmone.py +++ b/packages/testing/src/execution_testing/client_clis/clis/evmone.py @@ -132,7 +132,7 @@ def _consume_debug_dump( """ ) dump_files_to_directory( - str(debug_output_path), + debug_output_path, { "consume_direct_args.py": command, "consume_direct_returncode.txt": result.returncode, diff --git a/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py b/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py index 7f181df9cc..0d3e091037 100644 --- a/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py +++ b/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py @@ -19,6 +19,7 @@ dump_files_to_directory, ) from execution_testing.client_clis.transition_tool import ( + Profiler, TransitionTool, model_dump_config, ) @@ -62,17 +63,18 @@ def is_fork_supported(self, fork: Fork) -> bool: """Return True if the fork is supported by the tool.""" return fork.transition_tool_name() in get_supported_forks() - def evaluate( + def _evaluate( self, *, transition_tool_data: TransitionTool.TransitionToolData, - debug_output_path: str = "", - slow_request: bool = False, + debug_output_path: Path | None, + slow_request: bool, + profiler: Profiler, ) -> TransitionToolOutput: """ Evaluate using the EELS T8N entry point. """ - del slow_request + del slow_request, profiler request_data = transition_tool_data.get_request_data() request_data_json = request_data.model_dump( mode="json", **model_dump_config diff --git a/packages/testing/src/execution_testing/client_clis/clis/geth.py b/packages/testing/src/execution_testing/client_clis/clis/geth.py index e2e2c439d6..813b74736e 100644 --- a/packages/testing/src/execution_testing/client_clis/clis/geth.py +++ b/packages/testing/src/execution_testing/client_clis/clis/geth.py @@ -231,7 +231,7 @@ def _consume_debug_dump( """ ) dump_files_to_directory( - str(debug_output_path), + debug_output_path, { "consume_direct_args.py": command, "consume_direct_returncode.txt": result.returncode, diff --git a/packages/testing/src/execution_testing/client_clis/clis/nethermind.py b/packages/testing/src/execution_testing/client_clis/clis/nethermind.py index bb23b04aa0..b3465af955 100644 --- a/packages/testing/src/execution_testing/client_clis/clis/nethermind.py +++ b/packages/testing/src/execution_testing/client_clis/clis/nethermind.py @@ -83,7 +83,7 @@ def _consume_debug_dump( ) dump_files_to_directory( - str(debug_output_path), + debug_output_path, { "consume_direct_args.py": command, "consume_direct_returncode.txt": result.returncode, diff --git a/packages/testing/src/execution_testing/client_clis/file_utils.py b/packages/testing/src/execution_testing/client_clis/file_utils.py index 8e53f1729e..5b2047a6e7 100644 --- a/packages/testing/src/execution_testing/client_clis/file_utils.py +++ b/packages/testing/src/execution_testing/client_clis/file_utils.py @@ -3,6 +3,7 @@ import os import stat from json import dump +from pathlib import Path from typing import Any, Dict from pydantic import BaseModel, RootModel @@ -14,9 +15,9 @@ ) -def dump_files_to_directory(output_path: str, files: Dict[str, Any]) -> None: +def dump_files_to_directory(output_path: Path, files: Dict[str, Any]) -> None: """Dump the files to the given directory.""" - os.makedirs(output_path, exist_ok=True) + output_path.mkdir(parents=True, exist_ok=True) for file_rel_path_flags, file_contents in files.items(): file_rel_path, flags = ( file_rel_path_flags.split("+") @@ -25,8 +26,8 @@ def dump_files_to_directory(output_path: str, files: Dict[str, Any]) -> None: ) rel_path = os.path.dirname(file_rel_path) if rel_path: - os.makedirs(os.path.join(output_path, rel_path), exist_ok=True) - file_path = os.path.join(output_path, file_rel_path) + os.makedirs(output_path / rel_path, exist_ok=True) + file_path = output_path / file_rel_path with open(file_path, "w") as f: if isinstance(file_contents, (LazyAllocStr, LazyAllocJson)): if isinstance(file_contents, LazyAllocJson): diff --git a/packages/testing/src/execution_testing/client_clis/transition_tool.py b/packages/testing/src/execution_testing/client_clis/transition_tool.py index 223a0cda71..0231ff86a7 100644 --- a/packages/testing/src/execution_testing/client_clis/transition_tool.py +++ b/packages/testing/src/execution_testing/client_clis/transition_tool.py @@ -122,6 +122,40 @@ def get_valid_transition_tool_names() -> set[str]: return {fork.transition_tool_name() for fork in all_available_forks} +class OutputCache: + """ + Unbounded cache for t8n outputs. + + With xdist_group ensuring related formats run consecutively on the same + worker. + """ + + def __init__(self, *, key: str): + """Initialize the cache with a maximum size.""" + self._cache: Dict[int, TransitionToolOutput] = {} + self.key = key + self.hits = 0 + self.misses = 0 + + def get(self, subkey: int) -> TransitionToolOutput | None: + """Get a value from the cache, marking it as recently used.""" + if subkey in self._cache: + self.hits += 1 + return self._cache[subkey] + self.misses += 1 + return None + + def set(self, subkey: int, value: TransitionToolOutput) -> None: + """Set a value in the cache, evicting oldest if at capacity.""" + self._cache[subkey] = value + + def stats(self) -> str: + """Return cache statistics.""" + total = self.hits + self.misses + rate = (self.hits / total * 100) if total > 0 else 0 + return f"hits={self.hits}, misses={self.misses}, rate={rate:.1f}%" + + class TransitionTool(EthereumCLI): """ Transition tool abstract base class which should be inherited by all @@ -141,8 +175,12 @@ class TransitionTool(EthereumCLI): t8n_use_server: bool = False server_url: str | None = None process: Optional[subprocess.Popen] = None - supports_opcode_count: ClassVar[bool] = False + output_cache: OutputCache | None = None + debug_dump_dir: Path | None = None + call_counter: int = 0 + opcode_count: OpcodeCount | None = None + supports_opcode_count: ClassVar[bool] = False supports_xdist: ClassVar[bool] = True supports_blob_params: ClassVar[bool] = False fork_name_map: ClassVar[Dict[str, str]] = {} @@ -186,15 +224,14 @@ def shutdown(self) -> None: def reset_traces(self) -> None: """Reset the internal trace storage for a new test to begin.""" - self.traces = None + self.traces = [] def append_traces(self, new_traces: Traces) -> None: """ Append a list of traces of a state transition to the current list. """ - if self.traces is None: - self.traces = [] + assert self.traces is not None self.traces.append(new_traces) def get_traces(self) -> List[Traces] | None: @@ -205,7 +242,7 @@ def collect_traces( self, receipts: List[TransactionReceipt], temp_dir: tempfile.TemporaryDirectory, - debug_output_path: str = "", + debug_output_path: Path | None, ) -> Traces: """ Collect the traces from the t8n tool output and store them in the @@ -225,6 +262,34 @@ def collect_traces( self.append_traces(traces) return traces + def set_cache(self, *, key: str) -> bool: + """Sets the output cache.""" + if self.output_cache is None or key != self.output_cache.key: + self.output_cache = OutputCache(key=key) + return False + self.output_cache.hits = 0 + self.output_cache.misses = 0 + return True + + def remove_cache(self) -> None: + """ + Remove the output cache to be garbage collected since it + will no longer be used. + """ + self.output_cache = None + + def reset_opcode_count(self) -> None: + """ + Reset the opcode count to zero. + """ + self.opcode_count = OpcodeCount({}) + + def remove_opcode_count(self) -> None: + """ + Remove the opcode counter. + """ + self.opcode_count = None + @dataclass class TransitionToolData: """Transition tool files and data to pass between methods.""" @@ -305,7 +370,7 @@ def _evaluate_filesystem( self, *, t8n_data: TransitionToolData, - debug_output_path: str = "", + debug_output_path: Path | None, profiler: Profiler, ) -> TransitionToolOutput: """ @@ -359,7 +424,7 @@ def _evaluate_filesystem( "--state.chainid", str(t8n_data.chain_id), ] - if self.supports_opcode_count: + if self.supports_opcode_count and self.opcode_count is not None: args.extend( [ "--opcode.count", @@ -429,7 +494,7 @@ def _evaluate_filesystem( temp_dir_path / "output", context={"exception_mapper": self.exception_mapper}, ) - if self.supports_opcode_count: + if self.supports_opcode_count and self.opcode_count is not None: opcode_count_file_path = Path(temp_dir.name) / "opcodes.json" if opcode_count_file_path.exists(): opcode_count = OpcodeCount.model_validate_json( @@ -507,7 +572,7 @@ def _evaluate_server( self, *, t8n_data: TransitionToolData, - debug_output_path: str = "", + debug_output_path: Path | None, timeout: int, profiler: Profiler, ) -> TransitionToolOutput: @@ -595,7 +660,7 @@ def _evaluate_stream( self, *, t8n_data: TransitionToolData, - debug_output_path: str = "", + debug_output_path: Path | None, profiler: Profiler, ) -> TransitionToolOutput: """ @@ -617,10 +682,11 @@ def _evaluate_stream( stderr=subprocess.PIPE, ) - with profiler.pause(): - self.dump_debug_stream( - debug_output_path, temp_dir, stdin, args, result - ) + if debug_output_path: + with profiler.pause(): + self.dump_debug_stream( + debug_output_path, temp_dir, stdin, args, result + ) if result.returncode != 0: raise Exception("failed to evaluate: " + result.stderr.decode()) @@ -716,7 +782,7 @@ def construct_args_stream( def dump_debug_stream( self, - debug_output_path: str, + debug_output_path: Path, temp_dir: tempfile.TemporaryDirectory, stdin: TransitionToolInput, args: List[str], @@ -725,9 +791,6 @@ def dump_debug_stream( """ Export debug files if requested when interacting with t8n via streams. """ - if not debug_output_path: - return - t8n_call = " ".join(args) t8n_output_base_dir = os.path.join(debug_output_path, "t8n.sh.out") if self.trace: @@ -761,11 +824,79 @@ def dump_debug_stream( }, ) + def _evaluate( + self, + *, + transition_tool_data: TransitionToolData, + debug_output_path: Path | None, + slow_request: bool, + profiler: Profiler, + ) -> TransitionToolOutput: + """ + Execute the relevant evaluate method as required by the `t8n` tool. + + If a client's `t8n` tool varies from the default behavior, this method + can be overridden. + """ + if self.t8n_use_server: + if not self.server_url: + self.start_server() + return self._evaluate_server( + t8n_data=transition_tool_data, + debug_output_path=debug_output_path, + timeout=SLOW_REQUEST_TIMEOUT + if slow_request + else NORMAL_SERVER_TIMEOUT, + profiler=profiler, + ) + + elif self.t8n_use_stream: + return self._evaluate_stream( + t8n_data=transition_tool_data, + debug_output_path=debug_output_path, + profiler=profiler, + ) + else: + return self._evaluate_filesystem( + t8n_data=transition_tool_data, + debug_output_path=debug_output_path, + profiler=profiler, + ) + + def get_next_transition_tool_output_path( + self, call_id: int + ) -> Path | None: + """Return path to the next transition tool output file.""" + debug_dump_dir = self.debug_dump_dir + if debug_dump_dir is None: + return None + return debug_dump_dir / str(call_id) + + def increment_call_counter(self) -> int: + """Increment the call counter by one and return the previous value.""" + previous_value = self.call_counter + self.call_counter += 1 + return previous_value + + def process_result( + self, result: TransitionToolOutput + ) -> TransitionToolOutput: + """ + Process the result of the transition tool evaluation performing the + following operations: + - Add opcode count to the result if available. + """ + if ( + result.result.opcode_count is not None + and self.opcode_count is not None + ): + self.opcode_count += result.result.opcode_count + return result + def evaluate( self, *, transition_tool_data: TransitionToolData, - debug_output_path: str = "", slow_request: bool = False, ) -> TransitionToolOutput: """ @@ -774,33 +905,26 @@ def evaluate( If a client's `t8n` tool varies from the default behavior, this method can be overridden. """ + current_call_id = self.increment_call_counter() + if self.output_cache is not None: + cached_result = self.output_cache.get(current_call_id) + if cached_result: + return self.process_result(cached_result) + debug_output_path = self.get_next_transition_tool_output_path( + current_call_id + ) with Profiler( - enabled=debug_output_path != "", - filename=Path(debug_output_path) / "profile.out" + enabled=debug_output_path is not None, + filename=debug_output_path / "profile.out" if debug_output_path else None, ) as profiler: - if self.t8n_use_server: - if not self.server_url: - self.start_server() - return self._evaluate_server( - t8n_data=transition_tool_data, - debug_output_path=debug_output_path, - timeout=SLOW_REQUEST_TIMEOUT - if slow_request - else NORMAL_SERVER_TIMEOUT, - profiler=profiler, - ) - - elif self.t8n_use_stream: - return self._evaluate_stream( - t8n_data=transition_tool_data, - debug_output_path=debug_output_path, - profiler=profiler, - ) - else: - return self._evaluate_filesystem( - t8n_data=transition_tool_data, - debug_output_path=debug_output_path, - profiler=profiler, - ) + result = self._evaluate( + transition_tool_data=transition_tool_data, + debug_output_path=debug_output_path, + slow_request=slow_request, + profiler=profiler, + ) + if self.output_cache is not None: + self.output_cache.set(current_call_id, result) + return self.process_result(result) diff --git a/packages/testing/src/execution_testing/fixtures/__init__.py b/packages/testing/src/execution_testing/fixtures/__init__.py index a549c2d77b..60c6f9b340 100644 --- a/packages/testing/src/execution_testing/fixtures/__init__.py +++ b/packages/testing/src/execution_testing/fixtures/__init__.py @@ -5,8 +5,7 @@ FixtureFillingPhase, FixtureFormat, LabeledFixtureFormat, - get_all_fixture_format_names, - strip_fixture_format_from_nodeid, + strip_fixture_format_from_node, ) from .blockchain import ( BlockchainEngineFixture, @@ -39,14 +38,13 @@ "FixtureConsumer", "FixtureFillingPhase", "FixtureFormat", - "get_all_fixture_format_names", "LabeledFixtureFormat", "PreAllocGroup", "PreAllocGroupBuilder", "PreAllocGroupBuilders", "PreAllocGroups", "StateFixture", - "strip_fixture_format_from_nodeid", + "strip_fixture_format_from_node", "TestInfo", "TransactionFixture", ] diff --git a/packages/testing/src/execution_testing/fixtures/base.py b/packages/testing/src/execution_testing/fixtures/base.py index cb38348593..ce0395dc0a 100644 --- a/packages/testing/src/execution_testing/fixtures/base.py +++ b/packages/testing/src/execution_testing/fixtures/base.py @@ -2,10 +2,19 @@ import hashlib import json -import re from enum import Enum, auto -from functools import cached_property, lru_cache -from typing import Annotated, Any, ClassVar, Dict, List, Set, Type, Union +from functools import cached_property +from typing import ( + Annotated, + Any, + ClassVar, + Dict, + List, + Protocol, + Set, + Type, + Union, +) import pytest from pydantic import ( @@ -72,6 +81,7 @@ class BaseFixture(CamelModel): format_phases: ClassVar[Set[FixtureFillingPhase]] = { FixtureFillingPhase.FILL } + transition_tool_cache_key: ClassVar[str] = "" @classmethod def output_base_dir_name(cls) -> str: @@ -231,6 +241,11 @@ def format_phases(self) -> Set[FixtureFillingPhase]: """Get the filling format phases where it should be included.""" return self.format.format_phases + @property + def transition_tool_cache_key(self) -> str: + """Get the transition tool cache key.""" + return self.format.transition_tool_cache_key + def __eq__(self, other: Any) -> bool: """ Check if two labeled fixture formats are equal. @@ -255,29 +270,22 @@ def __eq__(self, other: Any) -> bool: ] -@lru_cache(maxsize=1) -def get_all_fixture_format_names() -> tuple[str, ...]: - """ - Get all fixture format names (base formats + labeled formats). - - Returns names sorted by length descending so longer patterns match first - when used for nodeid parsing. - """ - all_names: set[str] = set() - all_names.update(BaseFixture.formats.keys()) - all_names.update(LabeledFixtureFormat.registered_labels.keys()) - return tuple(sorted(all_names, key=len, reverse=True)) +class PytestItemProtocol(Protocol): + """Protocol that resembles pytest.Item.""" + @property + def nodeid(self) -> str: + """Return the nodeid of the item.""" + ... -@lru_cache(maxsize=1) -def _get_fixture_format_pattern() -> re.Pattern[str]: - """Compile regex pattern for matching fixture format names in nodeids.""" - formats = "|".join(re.escape(f) for f in get_all_fixture_format_names()) - # Match format name with hyphen boundaries or at string edges - return re.compile(rf"(^|-)({formats})(-|$)") + def get_closest_marker(self, name: str) -> pytest.Mark | None: + """Return the closest marker with the given name.""" + ... -def strip_fixture_format_from_nodeid(nodeid: str) -> str: +def strip_fixture_format_from_node( + item: PytestItemProtocol, +) -> str: """ Remove fixture format suffix from a test nodeid. @@ -288,20 +296,12 @@ def strip_fixture_format_from_nodeid(nodeid: str) -> str: 'test.py::test[fork_Osaka-state_test]' -> 'test.py::test[fork_Osaka]' """ - if "[" not in nodeid: + fixture_format_id_marker = item.get_closest_marker("fixture_format_id") + nodeid = item.nodeid + if fixture_format_id_marker is None: return nodeid - - bracket_pos = nodeid.rfind("[") - params_section = nodeid[bracket_pos + 1 : -1] - - def _replace(match: re.Match[str]) -> str: - prefix, _, suffix = match.groups() - # If both boundaries are hyphens, keep one hyphen - if prefix == "-" and suffix == "-": - return "-" - return "" - - pattern = _get_fixture_format_pattern() - params_section = pattern.sub(_replace, params_section, count=1) - - return nodeid[: bracket_pos + 1] + params_section + "]" + assert len(fixture_format_id_marker.args) == 1 + fixture_id = fixture_format_id_marker.args[0] + if fixture_id not in nodeid: + return nodeid + return nodeid.replace(fixture_id, "") diff --git a/packages/testing/src/execution_testing/fixtures/blockchain.py b/packages/testing/src/execution_testing/fixtures/blockchain.py index fa85d4b60b..f9b5f48f7b 100644 --- a/packages/testing/src/execution_testing/fixtures/blockchain.py +++ b/packages/testing/src/execution_testing/fixtures/blockchain.py @@ -745,6 +745,7 @@ class BlockchainFixture(BlockchainFixtureCommon): genesis_rlp: Bytes = Field(..., alias="genesisRLP") blocks: List[FixtureBlock | InvalidFixtureBlock] seal_engine: Literal["NoProof"] = Field("NoProof") + transition_tool_cache_key: ClassVar[str] = "blockchain_test" @post_state_validator() @@ -789,6 +790,7 @@ class BlockchainEngineFixture(BlockchainEngineFixtureCommon): payloads: List[FixtureEngineNewPayload] = Field( ..., alias="engineNewPayloads" ) + transition_tool_cache_key: ClassVar[str] = "blockchain_test" @post_state_validator(alternate_field="post_state_diff") @@ -812,6 +814,7 @@ class BlockchainEngineXFixture(BlockchainEngineFixtureCommon): FixtureFillingPhase.FILL, FixtureFillingPhase.PRE_ALLOC_GENERATION, } + can_use_cache: ClassVar[bool] = False pre_hash: str """Hash of the pre-allocation group this test belongs to.""" @@ -843,6 +846,7 @@ class BlockchainEngineSyncFixture(BlockchainEngineFixture): "Tests that generate a blockchain test fixture for Engine API " "testing with client sync." ) + can_use_cache: ClassVar[bool] = False sync_payload: FixtureEngineNewPayload | None = None @classmethod diff --git a/packages/testing/src/execution_testing/specs/base.py b/packages/testing/src/execution_testing/specs/base.py index c06ee44c91..53edd4247f 100644 --- a/packages/testing/src/execution_testing/specs/base.py +++ b/packages/testing/src/execution_testing/specs/base.py @@ -6,10 +6,7 @@ from abc import abstractmethod from enum import StrEnum, unique from functools import reduce -from os import path -from pathlib import Path from typing import ( - TYPE_CHECKING, Any, Callable, ClassVar, @@ -17,23 +14,15 @@ Generator, List, Sequence, - Tuple, Type, ) -if TYPE_CHECKING: - # Guarded import to avoid circular dependency: filler.py imports BaseTest - from execution_testing.cli.pytest_commands.plugins.filler.filler import ( - FillingSession, - ) - import pytest -from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +from pydantic import BaseModel, ConfigDict, PrivateAttr from typing_extensions import Self from execution_testing.base_types import to_hex from execution_testing.client_clis import Result, TransitionTool -from execution_testing.client_clis.cli_types import OpcodeCount from execution_testing.execution import ( BaseExecute, ExecuteFormat, @@ -44,7 +33,6 @@ FixtureFormat, LabeledFixtureFormat, PreAllocGroupBuilders, - strip_fixture_format_from_nodeid, ) from execution_testing.forks import Fork from execution_testing.forks.base_fork import BaseFork @@ -113,17 +101,12 @@ class BaseTest(BaseModel): _operation_mode: OpMode | None = PrivateAttr(None) _gas_optimization: int | None = PrivateAttr(None) _gas_optimization_max_gas_limit: int | None = PrivateAttr(None) - _opcode_count: OpcodeCount | None = PrivateAttr(None) expected_benchmark_gas_used: int | None = None skip_gas_used_validation: bool = False spec_types: ClassVar[Dict[str, Type["BaseTest"]]] = {} - # Transition tool specific fields - t8n_dump_dir: Path | None = Field(None, exclude=True) - t8n_call_counter: int = Field(0, exclude=True) - supported_fixture_formats: ClassVar[ Sequence[FixtureFormat | LabeledFixtureFormat] ] = [] @@ -176,14 +159,12 @@ def from_test( new_instance = cls( tag=base_test.tag, fork=base_test.fork, - t8n_dump_dir=base_test.t8n_dump_dir, expected_benchmark_gas_used=base_test.expected_benchmark_gas_used, skip_gas_used_validation=base_test.skip_gas_used_validation, **kwargs, ) new_instance._request = base_test._request new_instance._operation_mode = base_test._operation_mode - new_instance._opcode_count = base_test._opcode_count return new_instance @classmethod @@ -232,17 +213,6 @@ def pytest_parameter_name(cls) -> str: lambda x, y: x + ("_" if y.isupper() else "") + y, cls.__name__ ).lower() - def get_next_transition_tool_output_path(self) -> str: - """Return path to the next transition tool output file.""" - if not self.t8n_dump_dir: - return "" - current_value = self.t8n_call_counter - self.t8n_call_counter += 1 - return path.join( - self.t8n_dump_dir, - str(current_value), - ) - def is_tx_gas_heavy_test(self) -> bool: """Check if the test is gas-heavy for transaction execution.""" if self._request is not None and hasattr(self._request, "node"): @@ -269,11 +239,11 @@ def is_exception_test(self) -> bool | None: ) return None - def node_id(self) -> str: + def node(self) -> pytest.Item | pytest.Function | None: """Return the node ID of the test.""" if self._request is not None and hasattr(self._request, "node"): - return self._request.node.nodeid - return "" + return self._request.node + return None def check_exception_test( self, @@ -366,21 +336,5 @@ def compute_pre_alloc_group_hash(self) -> str: return f"0x{combined_hash:016x}" - def _get_base_nodeid(self) -> str: - """Get base nodeid without fixture_format for cache key.""" - return strip_fixture_format_from_nodeid(self.node_id()) - - def _get_t8n_cache_key( - self, fork_name: str, block_index: int = 0 - ) -> Tuple[str, str, int]: - """Get cache key for t8n output.""" - return (self._get_base_nodeid(), fork_name, block_index) - - def _get_filling_session(self) -> "FillingSession | None": - """Get filling session if available.""" - if self._request is not None and hasattr(self._request, "config"): - return getattr(self._request.config, "filling_session", None) - return None - TestSpec = Callable[[Fork], Generator[BaseTest, None, None]] diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py index 0777d69b7c..3c6d09d1d6 100644 --- a/packages/testing/src/execution_testing/specs/benchmark.py +++ b/packages/testing/src/execution_testing/specs/benchmark.py @@ -499,7 +499,10 @@ def generate( self.target_opcode is not None and self.fixed_opcode_count is not None ): - self._verify_target_opcode_count(blockchain_test._opcode_count) + opcode_count = t8n.opcode_count + if opcode_count is None: + raise Exception("Opcode count is not available") + self._verify_target_opcode_count(opcode_count) return fixture else: raise Exception(f"Unsupported fixture format: {fixture_format}") diff --git a/packages/testing/src/execution_testing/specs/blockchain.py b/packages/testing/src/execution_testing/specs/blockchain.py index e798911b13..35eb705e41 100644 --- a/packages/testing/src/execution_testing/specs/blockchain.py +++ b/packages/testing/src/execution_testing/specs/blockchain.py @@ -601,52 +601,20 @@ def generate_block_data( "exception must be the last transaction in the block" ) - # Try cache lookup (blockchain_test -> blockchain_test_engine reuse). - # On cache hit, the t8n call is skipped. - # Skip cache for engine_x/engine_sync variants (different execution). - filling_session = self._get_filling_session() - nodeid = self.node_id() - cache_key = self._get_t8n_cache_key( - self.fork.name(), block_index=int(env.number) - ) - - transition_tool_output = None - cache = None - if filling_session is not None: - cache = filling_session.t8n_output_cache - can_use_cache = ( - "engine_x" not in nodeid and "engine_sync" not in nodeid - ) - - if cache is not None and can_use_cache: - transition_tool_output = cache.get(cache_key) - - if transition_tool_output is None: - transition_tool_output = t8n.evaluate( - transition_tool_data=TransitionTool.TransitionToolData( - alloc=previous_alloc, - txs=txs, - env=env, - fork=self.fork, - chain_id=self.chain_id, - reward=self.fork.get_reward( - block_number=env.number, timestamp=env.timestamp - ), - blob_schedule=self.fork.blob_schedule(), + transition_tool_output = t8n.evaluate( + transition_tool_data=TransitionTool.TransitionToolData( + alloc=previous_alloc, + txs=txs, + env=env, + fork=self.fork, + chain_id=self.chain_id, + reward=self.fork.get_reward( + block_number=env.number, timestamp=env.timestamp ), - debug_output_path=self.get_next_transition_tool_output_path(), - slow_request=self.is_tx_gas_heavy_test(), - ) - if cache is not None and can_use_cache: - cache.set(cache_key, transition_tool_output) - - if transition_tool_output.result.opcode_count is not None: - if self._opcode_count is None: - self._opcode_count = transition_tool_output.result.opcode_count - else: - self._opcode_count += ( - transition_tool_output.result.opcode_count - ) + blob_schedule=self.fork.blob_schedule(), + ), + slow_request=self.is_tx_gas_heavy_test(), + ) # One special case of the invalid transactions is the blob gas used, # since this value is not included in the transition tool result, but @@ -905,8 +873,8 @@ def make_fixture( alloc = alloc.get() if isinstance(alloc, LazyAlloc) else alloc self.verify_post_state(t8n, t8n_state=alloc) info = {} - if self._opcode_count is not None: - info["opcode_count"] = self._opcode_count.model_dump() + if t8n.opcode_count is not None: + info["opcode_count"] = t8n.opcode_count.model_dump() return BlockchainFixture( fork=self.fork, genesis=genesis.header, @@ -993,8 +961,8 @@ def make_hive_fixture( # Create base fixture data, common to all fixture formats info = {} - if self._opcode_count is not None: - info["opcode_count"] = self._opcode_count.model_dump() + if t8n.opcode_count is not None: + info["opcode_count"] = t8n.opcode_count.model_dump() fixture_data = { "fork": self.fork, "genesis": genesis.header, @@ -1072,7 +1040,6 @@ def generate( fixture_format: FixtureFormat, ) -> BaseFixture: """Generate the BlockchainTest fixture.""" - t8n.reset_traces() if fixture_format in [ BlockchainEngineFixture, BlockchainEngineXFixture, diff --git a/packages/testing/src/execution_testing/specs/state.py b/packages/testing/src/execution_testing/specs/state.py index e4d5f9c61c..50c0ae9a0f 100644 --- a/packages/testing/src/execution_testing/specs/state.py +++ b/packages/testing/src/execution_testing/specs/state.py @@ -152,7 +152,6 @@ def verify_modified_gas_limit( blob_schedule=fork.blob_schedule(), state_test=True, ), - debug_output_path=self.get_next_transition_tool_output_path(), slow_request=self.is_tx_gas_heavy_test(), ) modified_traces = modified_tool_output.result.traces @@ -366,7 +365,6 @@ def make_state_test_fixture( blob_schedule=fork.blob_schedule(), state_test=True, ), - debug_output_path=self.get_next_transition_tool_output_path(), slow_request=self.is_tx_gas_heavy_test(), ) output_alloc = transition_tool_output.alloc.get()