diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py
index 1bdde10539..9736480f44 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/execute/execute.py
@@ -649,7 +649,6 @@ def base_test_parametrizer_func(
 
         class BaseTestWrapper(cls):  # type: ignore
             def __init__(self, *args: Any, **kwargs: Any) -> None:
-                kwargs["t8n_dump_dir"] = None
                 if "pre" not in kwargs:
                     kwargs["pre"] = pre
                 elif kwargs["pre"] != pre:
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py
index df35ffa019..099754eef2 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py
@@ -8,14 +8,12 @@
 
 import configparser
 import datetime
-import hashlib
 import json
 import os
 import warnings
-from collections import OrderedDict
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, Generator, List, Self, Set, Tuple, Type
+from typing import Any, Dict, Generator, List, Self, Set, Type
 
 import pytest
 import xdist
@@ -33,7 +31,7 @@
 from execution_testing.cli.gen_index import (
     generate_fixtures_index,
 )
-from execution_testing.client_clis import TransitionTool, TransitionToolOutput
+from execution_testing.client_clis import TransitionTool
 from execution_testing.client_clis.clis.geth import FixtureConsumerTool
 from execution_testing.fixtures import (
     BaseFixture,
@@ -46,7 +44,7 @@
     PreAllocGroupBuilders,
     PreAllocGroups,
     TestInfo,
-    strip_fixture_format_from_nodeid,
+    strip_fixture_format_from_node,
 )
 from execution_testing.forks import (
     Fork,
@@ -73,51 +71,6 @@
 from .fixture_output import FixtureOutput
 
 
-class T8nOutputCache:
-    """
-    Bounded LRU cache for t8n outputs.
-
-    With xdist_group ensuring related formats run consecutively on the same
-    worker, we only need to hold entries for 1-2 test cases at a time.
-    """
-
-    def __init__(self, maxsize: int = 3):
-        """Initialize the cache with a maximum size."""
-        self._cache: OrderedDict[
-            Tuple[str, str, int], TransitionToolOutput
-        ] = OrderedDict()
-        self._maxsize = maxsize
-        self.hits = 0
-        self.misses = 0
-
-    def get(self, key: Tuple[str, str, int]) -> TransitionToolOutput | None:
-        """Get a value from the cache, marking it as recently used."""
-        if key in self._cache:
-            self._cache.move_to_end(key)
-            self.hits += 1
-            return self._cache[key]
-        self.misses += 1
-        return None
-
-    def set(
-        self, key: Tuple[str, str, int], value: TransitionToolOutput
-    ) -> None:
-        """Set a value in the cache, evicting oldest if at capacity."""
-        if key in self._cache:
-            self._cache.move_to_end(key)
-            self._cache[key] = value
-        else:
-            if len(self._cache) >= self._maxsize:
-                self._cache.popitem(last=False)
-            self._cache[key] = value
-
-    def stats(self) -> str:
-        """Return cache statistics."""
-        total = self.hits + self.misses
-        rate = (self.hits / total * 100) if total > 0 else 0
-        return f"hits={self.hits}, misses={self.misses}, rate={rate:.1f}%"
-
-
 @dataclass(kw_only=True)
 class PhaseManager:
     """
@@ -283,7 +236,6 @@ class FillingSession:
     format_selector: FormatSelector
     pre_alloc_groups: PreAllocGroups | None = None
     pre_alloc_group_builders: PreAllocGroupBuilders | None = None
-    t8n_output_cache: T8nOutputCache = field(default_factory=T8nOutputCache)
 
     @classmethod
     def from_config(cls, config: pytest.Config) -> "Self":
@@ -413,6 +365,16 @@ def save_pre_alloc_groups(self) -> None:
         self.pre_alloc_group_builders.to_folder(pre_alloc_folder)
 
 
+@dataclass(kw_only=True)
+class TransitionToolCacheStats:
+    """Stats for caching of the transition tool requests."""
+
+    key_test_hits: int = 0
+    key_test_miss: int = 0
+    subkey_test_hits: int = 0
+    subkey_test_miss: int = 0
+
+
 def calculate_post_state_diff(
     post_state: Alloc, genesis_state: Alloc
 ) -> Alloc:
@@ -881,6 +843,20 @@ def pytest_terminal_summary(
     yield
     if config.fixture_output.is_stdout or hasattr(config, "workerinput"):  # type: ignore[attr-defined]
         return
+    t8n_cache_stats: TransitionToolCacheStats | None = getattr(
+        config, "transition_tool_cache_stats", None
+    )
+    if t8n_cache_stats is not None:
+        terminalreporter.write_sep(
+            "=",
+            f" Transition tool cache stats:"
+            f"- Key test hits: {t8n_cache_stats.key_test_hits}"
+            f"- Key test misses: {t8n_cache_stats.key_test_miss}"
+            f"- Subkey test hits: {t8n_cache_stats.subkey_test_hits}"
+            f"- Subkey test misses: {t8n_cache_stats.subkey_test_miss}",
+            bold=True,
+            green=True,
+        )
     stats = terminalreporter.stats
     if "passed" in stats and stats["passed"]:
         # Custom message for Phase 1 (pre-allocation group generation)
@@ -1042,7 +1018,7 @@ def verify_fixtures_bin(request: pytest.FixtureRequest) -> Path | None:
 
 
 @pytest.fixture(autouse=True, scope="session")
-def t8n(
+def session_t8n(
     request: pytest.FixtureRequest,
 ) -> Generator[TransitionTool, None, None]:
     """Return configured transition tool."""
@@ -1060,6 +1036,59 @@ def t8n(
     t8n.shutdown()
 
 
+def get_t8n_cache_key(request: pytest.FixtureRequest) -> str | None:
+    """Get the cache key to be used for the current test, if any."""
+    mark: pytest.Mark = request.node.get_closest_marker(
+        "transition_tool_cache_key"
+    )
+    if mark is not None and len(mark.args) == 1:
+        return f"{strip_fixture_format_from_node(request.node)}-{mark.args[0]}"
+    return None
+
+
+@pytest.fixture(autouse=True, scope="session")
+def transition_tool_cache_stats(
+    request: pytest.FixtureRequest,
+) -> Generator[TransitionToolCacheStats, None, None]:
+    """Get the transition tool cache stats."""
+    stats = TransitionToolCacheStats()
+    yield stats
+    request.config.transition_tool_cache_stats = stats  # type: ignore[attr-defined]
+
+
+@pytest.fixture(autouse=True, scope="function")
+def t8n(
+    request: pytest.FixtureRequest,
+    session_t8n: TransitionTool,
+    dump_dir_parameter_level: Path | None,
+    transition_tool_cache_stats: TransitionToolCacheStats,
+) -> Generator[TransitionTool, None, None]:
+    """Set the transition tool up for the current test."""
+    if transition_tool_cache_key := get_t8n_cache_key(request):
+        # This test is allowed to cache results
+        if session_t8n.set_cache(key=transition_tool_cache_key):
+            transition_tool_cache_stats.key_test_hits += 1
+        else:
+            transition_tool_cache_stats.key_test_miss += 1
+    else:
+        # Test cannot use output cache, remove it
+        session_t8n.remove_cache()
+    # Reset the traces
+    session_t8n.reset_traces()
+    session_t8n.call_counter = 0
+    session_t8n.debug_dump_dir = dump_dir_parameter_level
+    # TODO: Configure the transition tool to count opcodes only when required.
+    session_t8n.reset_opcode_count()
+    yield session_t8n
+    if session_t8n.output_cache is not None:
+        transition_tool_cache_stats.subkey_test_hits += (
+            session_t8n.output_cache.hits
+        )
+        transition_tool_cache_stats.subkey_test_miss += (
+            session_t8n.output_cache.misses
+        )
+
+
 @pytest.fixture(scope="session")
 def do_fixture_verification(
     request: pytest.FixtureRequest, verify_fixtures_bin: Path | None
@@ -1381,7 +1410,6 @@ def base_test_parametrizer_func(
         reference_spec: ReferenceSpec,
         pre: Alloc,
         output_dir: Path,
-        dump_dir_parameter_level: Path | None,
         fixture_collector: FixtureCollector,
         test_case_description: str,
         fixture_source_url: str,
@@ -1412,7 +1440,6 @@ def base_test_parametrizer_func(
 
         class BaseTestWrapper(cls):  # type: ignore
             def __init__(self, *args: Any, **kwargs: Any) -> None:
-                kwargs["t8n_dump_dir"] = dump_dir_parameter_level
                 if "pre" not in kwargs:
                     kwargs["pre"] = pre
                 if "expected_benchmark_gas_used" not in kwargs:
@@ -1653,13 +1680,14 @@ def pytest_collection_modifyitems(
         # Skip if test already has an xdist_group marker (e.g., bigmem).
         for item in items:
             if not item.get_closest_marker("xdist_group"):
-                base_nodeid = strip_fixture_format_from_nodeid(item.nodeid)
-                h = hashlib.md5(base_nodeid.encode()).hexdigest()[:8]
-                item.add_marker(pytest.mark.xdist_group(name=f"t8n-cache-{h}"))
+                base_nodeid = strip_fixture_format_from_node(item)
+                item.add_marker(
+                    pytest.mark.xdist_group(name=f"t8n-cache-{base_nodeid}")
+                )
 
     # Sort items so related formats run consecutively for cache hits.
     # This ensures deterministic execution order within xdist workers.
-    items.sort(key=lambda item: strip_fixture_format_from_nodeid(item.nodeid))
+    items.sort(key=lambda item: strip_fixture_format_from_node(item))
 
 
 def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py
index 991d00117e..5f519ff29c 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_t8n_cache.py
@@ -1,266 +1,206 @@
 """Unit tests for the t8n output cache functionality."""
 
 import hashlib
-from dataclasses import dataclass, field
 from typing import Any
 
 import pytest
 
 from execution_testing.fixtures import (
-    get_all_fixture_format_names,
-    strip_fixture_format_from_nodeid,
+    BlockchainEngineFixture,
+    BlockchainFixture,
+    FixtureFormat,
+    LabeledFixtureFormat,
+    StateFixture,
+    strip_fixture_format_from_node,
 )
 
-from ..filler import T8nOutputCache, _strip_xdist_group_suffix
+from ...shared.helpers import labeled_format_parameter_set
+from ..filler import _strip_xdist_group_suffix
 
 
-class TestT8nOutputCache:
-    """Test cases for the T8nOutputCache LRU cache."""
-
-    def test_cache_basic_operations(self) -> None:
-        """Test basic get/set operations."""
-        cache = T8nOutputCache(maxsize=3)
-
-        key = ("test.py::test[fork_Osaka]", "Osaka", 0)
-        value = object()  # Mock TransitionToolOutput
-
-        assert cache.get(key) is None
-        cache.set(key, value)  # type: ignore[arg-type]
-        assert cache.get(key) is value
-
-    def test_cache_lru_eviction(self) -> None:
-        """Test LRU eviction when cache exceeds maxsize."""
-        cache = T8nOutputCache(maxsize=2)
-
-        key1 = ("test1", "Osaka", 0)
-        key2 = ("test2", "Osaka", 0)
-        key3 = ("test3", "Osaka", 0)
-
-        cache.set(key1, "value1")  # type: ignore[arg-type]
-        cache.set(key2, "value2")  # type: ignore[arg-type]
-
-        # Both should be present.
-        assert cache.get(key1) == "value1"
-        assert cache.get(key2) == "value2"
-
-        # Adding key3 should evict key1 (oldest after key2 access).
-        cache.set(key3, "value3")  # type: ignore[arg-type]
-        assert cache.get(key1) is None  # Evicted
-        assert cache.get(key2) == "value2"
-        assert cache.get(key3) == "value3"
-
-    def test_cache_lru_access_updates_order(self) -> None:
-        """Test that get() updates LRU order."""
-        cache = T8nOutputCache(maxsize=2)
-
-        key1 = ("test1", "Osaka", 0)
-        key2 = ("test2", "Osaka", 0)
-        key3 = ("test3", "Osaka", 0)
-
-        cache.set(key1, "value1")  # type: ignore[arg-type]
-        cache.set(key2, "value2")  # type: ignore[arg-type]
-
-        # Access key1 to make it recently used.
-        cache.get(key1)
-
-        # Adding key3 should evict key2 (now oldest).
-        cache.set(key3, "value3")  # type: ignore[arg-type]
-        assert cache.get(key1) == "value1"
-        assert cache.get(key2) is None  # Evicted
-        assert cache.get(key3) == "value3"
-
-    def test_cache_hit_miss_tracking(self) -> None:
-        """Test that hits and misses are tracked correctly."""
-        cache = T8nOutputCache(maxsize=3)
-
-        key = ("test", "Osaka", 0)
-
-        # Miss.
-        cache.get(key)
-        assert cache.hits == 0
-        assert cache.misses == 1
-
-        # Set and hit.
-        cache.set(key, "value")  # type: ignore[arg-type]
-        cache.get(key)
-        assert cache.hits == 1
-        assert cache.misses == 1
-
-        # Another hit.
-        cache.get(key)
-        assert cache.hits == 2
-        assert cache.misses == 1
-
-    def test_cache_stats(self) -> None:
-        """Test stats() output format."""
-        cache = T8nOutputCache(maxsize=3)
-
-        key = ("test", "Osaka", 0)
-        cache.set(key, "value")  # type: ignore[arg-type]
-        cache.get(key)  # Hit
-        cache.get(("other", "Osaka", 0))  # Miss
-
-        stats = cache.stats()
-        assert "hits=1" in stats
-        assert "misses=1" in stats
-        assert "rate=50.0%" in stats
-
-    def test_cache_stats_empty(self) -> None:
-        """Test stats() with no operations."""
-        cache = T8nOutputCache(maxsize=3)
-        stats = cache.stats()
-        assert "hits=0" in stats
-        assert "misses=0" in stats
-        assert "rate=0.0%" in stats
-
-    def test_cache_update_existing_key(self) -> None:
-        """Test that setting an existing key updates value and LRU order."""
-        cache = T8nOutputCache(maxsize=2)
-
-        key1 = ("test1", "Osaka", 0)
-        key2 = ("test2", "Osaka", 0)
-        key3 = ("test3", "Osaka", 0)
-
-        cache.set(key1, "old")  # type: ignore[arg-type]
-        cache.set(key2, "value2")  # type: ignore[arg-type]
+class MockItem:
+    """Mock pytest.Item for testing collection sorting behavior."""
 
-        # Update key1 (moves to end).
-        cache.set(key1, "new")  # type: ignore[arg-type]
-        assert cache.get(key1) == "new"
+    nodeid: str
+    name: str
+    _markers: list[pytest.Mark]
+
+    def __init__(
+        self,
+        nodeid: str,
+        fixture_format: LabeledFixtureFormat | FixtureFormat | None,
+        name: str | None = None,
+    ) -> None:
+        """Initialize name from nodeid if not provided."""
+        self.nodeid = nodeid
+        if not name:
+            parts = nodeid.split("::")
+            name = parts[-1] if "::" in nodeid else nodeid
+        self.name = name
+        self._markers = []
+        if fixture_format is not None:
+            param = labeled_format_parameter_set(fixture_format)
+            for mark in param.marks:
+                self._markers.append(mark)  # type: ignore[arg-type]
+
+    def get_closest_marker(self, name: str) -> pytest.Mark | None:
+        """Return marker by name if present."""
+        for marker in self._markers:
+            if marker.name == name:
+                return marker
+        return None
 
-        # Adding key3 should evict key2 (now oldest).
-        cache.set(key3, "value3")  # type: ignore[arg-type]
-        assert cache.get(key1) == "new"
-        assert cache.get(key2) is None
+    def add_marker(self, marker: Any) -> None:
+        """Add a marker to the item."""
+        self._markers.append(marker)
 
 
 class TestStripFixtureFormatFromNodeid:
-    """Test cases for strip_fixture_format_from_nodeid function."""
+    """Test cases for strip_fixture_format_from_node function."""
 
     def test_strip_blockchain_test(self) -> None:
         """Test stripping blockchain_test format."""
-        nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test]"
-        expected = "tests/test.py::test_foo[fork_Osaka]"
-        assert strip_fixture_format_from_nodeid(nodeid) == expected
+        item = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
+            BlockchainFixture,
+        )
+        expected = "tests/test.py::test_foo[fork_Osaka-]"
+        assert strip_fixture_format_from_node(item) == expected
 
     def test_strip_blockchain_test_engine(self) -> None:
         """Test stripping blockchain_test_engine format."""
-        nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]"
-        expected = "tests/test.py::test_foo[fork_Osaka]"
-        assert strip_fixture_format_from_nodeid(nodeid) == expected
+        item = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
+            BlockchainEngineFixture,
+        )
+        expected = "tests/test.py::test_foo[fork_Osaka-]"
+        assert strip_fixture_format_from_node(item) == expected
 
     def test_strip_state_test(self) -> None:
         """Test stripping state_test format."""
-        nodeid = "tests/test.py::test_foo[fork_Osaka-state_test]"
-        expected = "tests/test.py::test_foo[fork_Osaka]"
-        assert strip_fixture_format_from_nodeid(nodeid) == expected
+        item = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-state_test]",
+            StateFixture,
+        )
+        expected = "tests/test.py::test_foo[fork_Osaka-]"
+        assert strip_fixture_format_from_node(item) == expected
 
     def test_strip_format_in_middle(self) -> None:
         """Test stripping format when it's in the middle of params."""
-        nodeid = "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]"
-        expected = "tests/test.py::test_foo[fork_Osaka-param1]"
-        assert strip_fixture_format_from_nodeid(nodeid) == expected
+        item = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]",
+            BlockchainFixture,
+        )
+        expected = "tests/test.py::test_foo[fork_Osaka--param1]"
+        assert strip_fixture_format_from_node(item) == expected
 
     def test_no_format_unchanged(self) -> None:
         """Test that nodeids without fixture format are unchanged."""
-        nodeid = "tests/test.py::test_foo[fork_Osaka-some_param]"
-        assert strip_fixture_format_from_nodeid(nodeid) == nodeid
+        item = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-some_param]",
+            None,
+        )
+        assert strip_fixture_format_from_node(item) == item.nodeid
 
     def test_no_params_unchanged(self) -> None:
         """Test that nodeids without parameters are unchanged."""
-        nodeid = "tests/test.py::test_foo"
-        assert strip_fixture_format_from_nodeid(nodeid) == nodeid
+        item = MockItem(
+            "tests/test.py::test_foo",
+            None,
+        )
+        assert strip_fixture_format_from_node(item) == item.nodeid
 
     def test_empty_params_unchanged(self) -> None:
         """Test that nodeids with empty params are unchanged."""
-        nodeid = "tests/test.py::test_foo[]"
-        assert strip_fixture_format_from_nodeid(nodeid) == nodeid
+        item = MockItem(
+            "tests/test.py::test_foo[]",
+            None,
+        )
+        assert strip_fixture_format_from_node(item) == item.nodeid
 
     def test_format_at_start(self) -> None:
         """Test stripping format at start of params."""
-        nodeid = "tests/test.py::test_foo[blockchain_test-fork_Osaka]"
-        expected = "tests/test.py::test_foo[fork_Osaka]"
-        assert strip_fixture_format_from_nodeid(nodeid) == expected
+        item = MockItem(
+            "tests/test.py::test_foo[blockchain_test-fork_Osaka]",
+            BlockchainFixture,
+        )
+        expected = "tests/test.py::test_foo[-fork_Osaka]"
+        assert strip_fixture_format_from_node(item) == expected
+
+    def test_only_format(self) -> None:
+        """Test stripping format at start of params."""
+        item = MockItem(
+            "tests/test.py::test_foo[blockchain_test]",
+            BlockchainFixture,
+        )
+        expected = "tests/test.py::test_foo[]"
+        assert strip_fixture_format_from_node(item) == expected
 
     def test_related_formats_same_base(self) -> None:
         """Test that related formats produce the same base nodeid."""
-        base_nodeid = "tests/test.py::test_foo[fork_Osaka-param1]"
+        base_nodeid = "tests/test.py::test_foo[fork_Osaka--param1]"
 
-        nodeid_bt = (
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]"
+        node_bt = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-blockchain_test-param1]",
+            BlockchainFixture,
         )
-        nodeid_bte = (
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine-param1]"
+        node_bte = MockItem(
+            "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine-param1]",
+            BlockchainEngineFixture,
         )
 
         # Both should strip to the same base.
-        assert strip_fixture_format_from_nodeid(nodeid_bt) == base_nodeid
-        assert strip_fixture_format_from_nodeid(nodeid_bte) == base_nodeid
+        assert strip_fixture_format_from_node(node_bt) == base_nodeid
+        assert strip_fixture_format_from_node(node_bte) == base_nodeid
 
     def test_longer_format_matched_first(self) -> None:
         """Test that longer format names are matched before shorter ones."""
         # blockchain_test_engine should match before blockchain_test.
-        nodeid = "tests/test.py::test[fork_Osaka-blockchain_test_engine]"
-        expected = "tests/test.py::test[fork_Osaka]"
-        result = strip_fixture_format_from_nodeid(nodeid)
+        node = MockItem(
+            "tests/test.py::test[fork_Osaka-blockchain_test_engine]",
+            BlockchainEngineFixture,
+        )
+        expected = "tests/test.py::test[fork_Osaka-]"
+        result = strip_fixture_format_from_node(node)
         assert result == expected
         # Verify it didn't partially match blockchain_test.
         assert "blockchain_test" not in result
 
 
-class TestGetAllFixtureFormatNames:
-    """Test cases for get_all_fixture_format_names function."""
-
-    def test_returns_tuple(self) -> None:
-        """Test that function returns a tuple (hashable for lru_cache)."""
-        result = get_all_fixture_format_names()
-        assert isinstance(result, tuple)
-
-    def test_contains_known_formats(self) -> None:
-        """Test that common fixture formats are included."""
-        formats = get_all_fixture_format_names()
-        assert "blockchain_test" in formats
-        assert "state_test" in formats
-
-    def test_sorted_by_length_descending(self) -> None:
-        """Test that formats are sorted by length (longest first)."""
-        formats = get_all_fixture_format_names()
-        lengths = [len(f) for f in formats]
-        assert lengths == sorted(lengths, reverse=True)
-
-    def test_blockchain_test_engine_before_blockchain_test(self) -> None:
-        """Test that longer names come before their prefixes."""
-        formats = get_all_fixture_format_names()
-        # blockchain_test_engine is longer and should come first.
-        has_engine = "blockchain_test_engine" in formats
-        has_bt = "blockchain_test" in formats
-        if has_engine and has_bt:
-            idx_engine = formats.index("blockchain_test_engine")
-            idx_bt = formats.index("blockchain_test")
-            assert idx_engine < idx_bt
-
-
 class TestCacheKeyConsistency:
     """Test that cache keys are consistent across fixture formats."""
 
     @pytest.mark.parametrize(
-        "format_name",
+        "labeled_fixture_format,format_name",
         [
-            "blockchain_test",
-            "blockchain_test_engine",
-            "state_test",
-            "blockchain_test_from_state_test",
-            "blockchain_test_engine_from_state_test",
+            (BlockchainFixture, "blockchain_test"),
+            (BlockchainEngineFixture, "blockchain_test_engine"),
+            (StateFixture, "state_test"),
+            (
+                LabeledFixtureFormat(
+                    BlockchainFixture, "blockchain_test_from_state_test", ""
+                ),
+                "blockchain_test_from_state_test",
+            ),
+            (
+                LabeledFixtureFormat(
+                    BlockchainEngineFixture,
+                    "blockchain_test_engine_from_state_test",
+                    "",
+                ),
+                "blockchain_test_engine_from_state_test",
+            ),
         ],
     )
     def test_format_stripping_produces_consistent_key(
-        self, format_name: str
+        self, labeled_fixture_format: LabeledFixtureFormat, format_name: str
     ) -> None:
         """Test that all format variants produce the same base key."""
-        base = "tests/test.py::test_case[fork_Osaka-param1]"
+        base = "tests/test.py::test_case[fork_Osaka--param1]"
         nodeid = f"tests/test.py::test_case[fork_Osaka-{format_name}-param1]"
+        node = MockItem(nodeid, labeled_fixture_format)
 
-        result = strip_fixture_format_from_nodeid(nodeid)
+        result = strip_fixture_format_from_node(node)
         assert result == base, f"Format {format_name} did not strip correctly"
 
 
@@ -306,22 +246,34 @@ def test_blockchain_test_sorts_before_blockchain_test_engine(self) -> None:
 
     def test_related_formats_group_together_when_sorted(self) -> None:
         """Test that sorting by base nodeid groups related formats together."""
-        nodeids = [
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_bar[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
-            "tests/test.py::test_bar[fork_Osaka-blockchain_test_engine]",
+        nodes = [
+            MockItem(
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_bar[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_bar[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
+            ),
         ]
 
         # Sort by base nodeid (as the collection hook does).
-        sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid)
+        sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node)
 
         # Related formats should be adjacent after sorting.
         test_bar_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_bar" in n
+            i for i, n in enumerate(sorted_nodes) if "test_bar" in n.nodeid
         ]
         test_foo_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_foo" in n
+            i for i, n in enumerate(sorted_nodes) if "test_foo" in n.nodeid
         ]
 
         # Check adjacency: indices should be consecutive.
@@ -330,21 +282,30 @@ def test_related_formats_group_together_when_sorted(self) -> None:
 
     def test_related_formats_grouped_when_sorted(self) -> None:
         """Test sorting groups related formats together (same base nodeid)."""
-        nodeids = [
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_bar[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
+        nodes = [
+            MockItem(
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_bar[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
+            ),
         ]
 
         # Sort by base nodeid.
-        sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid)
+        sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node)
 
         # test_bar items should be adjacent, test_foo items should be adjacent.
         foo_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_foo" in n
+            i for i, n in enumerate(sorted_nodes) if "test_foo" in n.nodeid
         ]
         bar_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_bar" in n
+            i for i, n in enumerate(sorted_nodes) if "test_bar" in n.nodeid
         ]
 
         # Check foo items are adjacent (difference is 1).
@@ -354,23 +315,35 @@ def test_related_formats_grouped_when_sorted(self) -> None:
 
     def test_sorting_groups_multiple_tests_by_base_nodeid(self) -> None:
         """Test sorting groups items by base nodeid."""
-        nodeids = [
+        nodes = [
             # Deliberately interleaved: test_a and test_b formats mixed.
-            "tests/test.py::test_b[fork_Osaka-blockchain_test_engine]",
-            "tests/test.py::test_a[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_b[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_a[fork_Osaka-blockchain_test_engine]",
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
+            ),
         ]
 
         # Sort by base nodeid.
-        sorted_nodeids = sorted(nodeids, key=strip_fixture_format_from_nodeid)
+        sorted_nodes = sorted(nodes, key=strip_fixture_format_from_node)
 
         # After sorting, test_a formats should be adjacent, test_b adjacent.
         test_a_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_a" in n
+            i for i, n in enumerate(sorted_nodes) if "test_a" in n.nodeid
         ]
         test_b_indices = [
-            i for i, n in enumerate(sorted_nodeids) if "test_b" in n
+            i for i, n in enumerate(sorted_nodes) if "test_b" in n.nodeid
         ]
 
         # Check test_a items are adjacent.
@@ -379,45 +352,17 @@ def test_sorting_groups_multiple_tests_by_base_nodeid(self) -> None:
         assert max(test_b_indices) - min(test_b_indices) == 1
 
 
-@dataclass
-class MockItem:
-    """Mock pytest.Item for testing collection sorting behavior."""
-
-    nodeid: str
-    name: str = ""
-    _markers: list[Any] = field(default_factory=list)
-
-    def __post_init__(self) -> None:
-        """Initialize name from nodeid if not provided."""
-        if not self.name:
-            parts = self.nodeid.split("::")
-            self.name = parts[-1] if "::" in self.nodeid else self.nodeid
-
-    def get_closest_marker(self, name: str) -> Any:
-        """Return marker by name if present."""
-        for marker in self._markers:
-            if hasattr(marker, "name") and marker.name == name:
-                return marker
-        return None
-
-    def add_marker(self, marker: Any) -> None:
-        """Add a marker to the item."""
-        self._markers.append(marker)
-
-
 class TestCollectionSortingBehavior:
     """Test collection sorting behavior ensures cache hits."""
 
     def _sort_items_by_base_nodeid(self, items: list[MockItem]) -> None:
         """Sort items by base nodeid (cache-friendly order)."""
-        items.sort(
-            key=lambda item: strip_fixture_format_from_nodeid(item.nodeid)
-        )
+        items.sort(key=lambda item: strip_fixture_format_from_node(item))
 
     def _add_xdist_markers(self, items: list[MockItem]) -> None:
         """Add xdist_group markers based on base nodeid hash."""
         for item in items:
-            base_nodeid = strip_fixture_format_from_nodeid(item.nodeid)
+            base_nodeid = strip_fixture_format_from_node(item)
             h = hashlib.md5(base_nodeid.encode()).hexdigest()[:8]
             item.add_marker(pytest.mark.xdist_group(name=f"t8n-cache-{h}"))
 
@@ -446,8 +391,14 @@ def _simulate_collection_with_xdist_fixed(
     def test_items_sorted_without_xdist(self) -> None:
         """Test that items are sorted when xdist is NOT enabled."""
         items = [
-            MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"),
-            MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"),
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
         ]
 
         self._simulate_collection_without_xdist(items)
@@ -459,8 +410,14 @@ def test_items_sorted_without_xdist(self) -> None:
     def test_items_sorted_with_xdist_current_behavior(self) -> None:
         """Test current behavior: items ARE sorted with xdist (FIXED)."""
         items = [
-            MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"),
-            MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"),
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
         ]
 
         self._simulate_collection_with_xdist_current(items)
@@ -474,8 +431,14 @@ def test_items_sorted_with_xdist_current_behavior(self) -> None:
     def test_items_should_be_sorted_with_xdist(self) -> None:
         """Test items SHOULD be sorted with xdist (expected fix)."""
         items = [
-            MockItem("tests/test.py::test_b[fork_Osaka-blockchain_test]"),
-            MockItem("tests/test.py::test_a[fork_Osaka-blockchain_test]"),
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
         ]
 
         self._simulate_collection_with_xdist_fixed(items)
@@ -487,9 +450,13 @@ def test_items_should_be_sorted_with_xdist(self) -> None:
     def test_xdist_groups_have_consistent_hash(self) -> None:
         """Test xdist_group markers use consistent hashes."""
         items = [
-            MockItem("tests/test.py::test_foo[fork_Osaka-blockchain_test]"),
             MockItem(
-                "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]"
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_foo[fork_Osaka-blockchain_test_engine]",
+                BlockchainEngineFixture,
             ),
         ]
 
@@ -512,18 +479,24 @@ def test_xdist_groups_have_consistent_hash(self) -> None:
 
     def test_current_matches_expected_behavior_xdist(self) -> None:
         """Test CURRENT xdist behavior matches EXPECTED (FIXED)."""
-        nodeids = [
-            "tests/test.py::test_b[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+        items = [
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
         ]
 
         # Simulate current (fixed) behavior.
-        current_items = [MockItem(n) for n in nodeids]
+        current_items = items[:]
         self._simulate_collection_with_xdist_current(current_items)
         current_order = [i.nodeid for i in current_items]
 
         # Simulate expected behavior.
-        expected_items = [MockItem(n) for n in nodeids]
+        expected_items = items[:]
         self._simulate_collection_with_xdist_fixed(expected_items)
         expected_order = [i.nodeid for i in expected_items]
 
@@ -534,13 +507,17 @@ def test_current_matches_expected_behavior_xdist(self) -> None:
 
     def test_xdist_sorting_required_for_cache_hits(self) -> None:
         """Test xdist collection sorts items for deterministic cache hits."""
-        nodeids = [
-            "tests/test.py::test_b[fork_Osaka-blockchain_test]",
-            "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+        items = [
+            MockItem(
+                "tests/test.py::test_b[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
+            MockItem(
+                "tests/test.py::test_a[fork_Osaka-blockchain_test]",
+                BlockchainFixture,
+            ),
         ]
-
         # Simulate current behavior with xdist.
-        items = [MockItem(n) for n in nodeids]
         self._simulate_collection_with_xdist_current(items)
 
         # FIXED: Items are sorted (test_a before test_b).
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py
index b059420838..a497517a16 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/execute_fill.py
@@ -183,6 +183,15 @@ def pytest_configure(config: pytest.Config) -> None:
         "markers",
         "fully_tagged: Marks a static test as fully tagged with all metadata.",
     )
+    config.addinivalue_line(
+        "markers",
+        "fixture_format_id: ID used to describe the fixture format.",
+    )
+    config.addinivalue_line(
+        "markers",
+        "transition_tool_cache_key: Key used to match the transition tool "
+        "cache for the test during fill.",
+    )
 
 
 @pytest.fixture(scope="function")
diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py
index 1da0a41fd9..8b37e01909 100644
--- a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py
+++ b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/helpers.py
@@ -46,12 +46,22 @@ def labeled_format_parameter_set(
     The label will be used in the test id and also will be added as a marker to
     the generated test case when filling/executing the test.
     """
+    transition_tool_cache_key = getattr(
+        format_with_or_without_label, "transition_tool_cache_key", ""
+    )
+    if transition_tool_cache_key:
+        marks = [
+            pytest.mark.transition_tool_cache_key(transition_tool_cache_key),
+        ]
+    else:
+        marks = []
     if isinstance(
         format_with_or_without_label, LabeledExecuteFormat
     ) or isinstance(format_with_or_without_label, LabeledFixtureFormat):
+        parameter_id = format_with_or_without_label.label
         return pytest.param(
             format_with_or_without_label.format,
-            id=format_with_or_without_label.label,
+            id=parameter_id,
             marks=[
                 getattr(
                     pytest.mark,
@@ -59,20 +69,25 @@ def labeled_format_parameter_set(
                 ),
                 getattr(
                     pytest.mark,
-                    format_with_or_without_label.label.lower(),
+                    parameter_id.lower(),
                 ),
-            ],
+                pytest.mark.fixture_format_id(parameter_id),
+            ]
+            + marks,
         )
     else:
+        parameter_id = format_with_or_without_label.format_name.lower()
         return pytest.param(
             format_with_or_without_label,
-            id=format_with_or_without_label.format_name.lower(),
+            id=parameter_id,
             marks=[
                 getattr(
                     pytest.mark,
-                    format_with_or_without_label.format_name.lower(),
-                )
-            ],
+                    parameter_id,
+                ),
+                pytest.mark.fixture_format_id(parameter_id),
+            ]
+            + marks,
         )
 
 
diff --git a/packages/testing/src/execution_testing/client_clis/clis/besu.py b/packages/testing/src/execution_testing/client_clis/clis/besu.py
index 3a75c202d4..72690da7da 100644
--- a/packages/testing/src/execution_testing/client_clis/clis/besu.py
+++ b/packages/testing/src/execution_testing/client_clis/clis/besu.py
@@ -21,6 +21,7 @@
 
 from ..cli_types import TransitionToolOutput
 from ..transition_tool import (
+    Profiler,
     TransitionTool,
     dump_files_to_directory,
     model_dump_config,
@@ -113,15 +114,16 @@ def shutdown(self) -> None:
         if self.besu_trace_dir:
             self.besu_trace_dir.cleanup()
 
-    def evaluate(
+    def _evaluate(
         self,
         *,
         transition_tool_data: TransitionTool.TransitionToolData,
-        debug_output_path: str = "",
-        slow_request: bool = False,
+        debug_output_path: Path | None,
+        slow_request: bool,
+        profiler: Profiler,
     ) -> TransitionToolOutput:
         """Execute `evm t8n` with the specified arguments."""
-        del slow_request
+        del slow_request, profiler
 
         if not self.process:
             self.start_server()
diff --git a/packages/testing/src/execution_testing/client_clis/clis/evmone.py b/packages/testing/src/execution_testing/client_clis/clis/evmone.py
index e474e752aa..8157d29f93 100644
--- a/packages/testing/src/execution_testing/client_clis/clis/evmone.py
+++ b/packages/testing/src/execution_testing/client_clis/clis/evmone.py
@@ -132,7 +132,7 @@ def _consume_debug_dump(
             """
         )
         dump_files_to_directory(
-            str(debug_output_path),
+            debug_output_path,
             {
                 "consume_direct_args.py": command,
                 "consume_direct_returncode.txt": result.returncode,
diff --git a/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py b/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py
index 7f181df9cc..0d3e091037 100644
--- a/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py
+++ b/packages/testing/src/execution_testing/client_clis/clis/execution_specs.py
@@ -19,6 +19,7 @@
     dump_files_to_directory,
 )
 from execution_testing.client_clis.transition_tool import (
+    Profiler,
     TransitionTool,
     model_dump_config,
 )
@@ -62,17 +63,18 @@ def is_fork_supported(self, fork: Fork) -> bool:
         """Return True if the fork is supported by the tool."""
         return fork.transition_tool_name() in get_supported_forks()
 
-    def evaluate(
+    def _evaluate(
         self,
         *,
         transition_tool_data: TransitionTool.TransitionToolData,
-        debug_output_path: str = "",
-        slow_request: bool = False,
+        debug_output_path: Path | None,
+        slow_request: bool,
+        profiler: Profiler,
     ) -> TransitionToolOutput:
         """
         Evaluate using the EELS T8N entry point.
         """
-        del slow_request
+        del slow_request, profiler
         request_data = transition_tool_data.get_request_data()
         request_data_json = request_data.model_dump(
             mode="json", **model_dump_config
diff --git a/packages/testing/src/execution_testing/client_clis/clis/geth.py b/packages/testing/src/execution_testing/client_clis/clis/geth.py
index e2e2c439d6..813b74736e 100644
--- a/packages/testing/src/execution_testing/client_clis/clis/geth.py
+++ b/packages/testing/src/execution_testing/client_clis/clis/geth.py
@@ -231,7 +231,7 @@ def _consume_debug_dump(
             """
         )
         dump_files_to_directory(
-            str(debug_output_path),
+            debug_output_path,
             {
                 "consume_direct_args.py": command,
                 "consume_direct_returncode.txt": result.returncode,
diff --git a/packages/testing/src/execution_testing/client_clis/clis/nethermind.py b/packages/testing/src/execution_testing/client_clis/clis/nethermind.py
index bb23b04aa0..b3465af955 100644
--- a/packages/testing/src/execution_testing/client_clis/clis/nethermind.py
+++ b/packages/testing/src/execution_testing/client_clis/clis/nethermind.py
@@ -83,7 +83,7 @@ def _consume_debug_dump(
         )
 
         dump_files_to_directory(
-            str(debug_output_path),
+            debug_output_path,
             {
                 "consume_direct_args.py": command,
                 "consume_direct_returncode.txt": result.returncode,
diff --git a/packages/testing/src/execution_testing/client_clis/file_utils.py b/packages/testing/src/execution_testing/client_clis/file_utils.py
index 8e53f1729e..5b2047a6e7 100644
--- a/packages/testing/src/execution_testing/client_clis/file_utils.py
+++ b/packages/testing/src/execution_testing/client_clis/file_utils.py
@@ -3,6 +3,7 @@
 import os
 import stat
 from json import dump
+from pathlib import Path
 from typing import Any, Dict
 
 from pydantic import BaseModel, RootModel
@@ -14,9 +15,9 @@
 )
 
 
-def dump_files_to_directory(output_path: str, files: Dict[str, Any]) -> None:
+def dump_files_to_directory(output_path: Path, files: Dict[str, Any]) -> None:
     """Dump the files to the given directory."""
-    os.makedirs(output_path, exist_ok=True)
+    output_path.mkdir(parents=True, exist_ok=True)
     for file_rel_path_flags, file_contents in files.items():
         file_rel_path, flags = (
             file_rel_path_flags.split("+")
@@ -25,8 +26,8 @@ def dump_files_to_directory(output_path: str, files: Dict[str, Any]) -> None:
         )
         rel_path = os.path.dirname(file_rel_path)
         if rel_path:
-            os.makedirs(os.path.join(output_path, rel_path), exist_ok=True)
-        file_path = os.path.join(output_path, file_rel_path)
+            os.makedirs(output_path / rel_path, exist_ok=True)
+        file_path = output_path / file_rel_path
         with open(file_path, "w") as f:
             if isinstance(file_contents, (LazyAllocStr, LazyAllocJson)):
                 if isinstance(file_contents, LazyAllocJson):
diff --git a/packages/testing/src/execution_testing/client_clis/transition_tool.py b/packages/testing/src/execution_testing/client_clis/transition_tool.py
index 223a0cda71..0231ff86a7 100644
--- a/packages/testing/src/execution_testing/client_clis/transition_tool.py
+++ b/packages/testing/src/execution_testing/client_clis/transition_tool.py
@@ -122,6 +122,40 @@ def get_valid_transition_tool_names() -> set[str]:
     return {fork.transition_tool_name() for fork in all_available_forks}
 
 
+class OutputCache:
+    """
+    Unbounded cache for t8n outputs.
+
+    With xdist_group ensuring related formats run consecutively on the same
+    worker.
+    """
+
+    def __init__(self, *, key: str):
+        """Initialize the cache with a maximum size."""
+        self._cache: Dict[int, TransitionToolOutput] = {}
+        self.key = key
+        self.hits = 0
+        self.misses = 0
+
+    def get(self, subkey: int) -> TransitionToolOutput | None:
+        """Get a value from the cache, marking it as recently used."""
+        if subkey in self._cache:
+            self.hits += 1
+            return self._cache[subkey]
+        self.misses += 1
+        return None
+
+    def set(self, subkey: int, value: TransitionToolOutput) -> None:
+        """Set a value in the cache, evicting oldest if at capacity."""
+        self._cache[subkey] = value
+
+    def stats(self) -> str:
+        """Return cache statistics."""
+        total = self.hits + self.misses
+        rate = (self.hits / total * 100) if total > 0 else 0
+        return f"hits={self.hits}, misses={self.misses}, rate={rate:.1f}%"
+
+
 class TransitionTool(EthereumCLI):
     """
     Transition tool abstract base class which should be inherited by all
@@ -141,8 +175,12 @@ class TransitionTool(EthereumCLI):
     t8n_use_server: bool = False
     server_url: str | None = None
     process: Optional[subprocess.Popen] = None
-    supports_opcode_count: ClassVar[bool] = False
+    output_cache: OutputCache | None = None
+    debug_dump_dir: Path | None = None
+    call_counter: int = 0
+    opcode_count: OpcodeCount | None = None
 
+    supports_opcode_count: ClassVar[bool] = False
     supports_xdist: ClassVar[bool] = True
     supports_blob_params: ClassVar[bool] = False
     fork_name_map: ClassVar[Dict[str, str]] = {}
@@ -186,15 +224,14 @@ def shutdown(self) -> None:
 
     def reset_traces(self) -> None:
         """Reset the internal trace storage for a new test to begin."""
-        self.traces = None
+        self.traces = []
 
     def append_traces(self, new_traces: Traces) -> None:
         """
         Append a list of traces of a state transition to the current
         list.
         """
-        if self.traces is None:
-            self.traces = []
+        assert self.traces is not None
         self.traces.append(new_traces)
 
     def get_traces(self) -> List[Traces] | None:
@@ -205,7 +242,7 @@ def collect_traces(
         self,
         receipts: List[TransactionReceipt],
         temp_dir: tempfile.TemporaryDirectory,
-        debug_output_path: str = "",
+        debug_output_path: Path | None,
     ) -> Traces:
         """
         Collect the traces from the t8n tool output and store them in the
@@ -225,6 +262,34 @@ def collect_traces(
         self.append_traces(traces)
         return traces
 
+    def set_cache(self, *, key: str) -> bool:
+        """Sets the output cache."""
+        if self.output_cache is None or key != self.output_cache.key:
+            self.output_cache = OutputCache(key=key)
+            return False
+        self.output_cache.hits = 0
+        self.output_cache.misses = 0
+        return True
+
+    def remove_cache(self) -> None:
+        """
+        Remove the output cache to be garbage collected since it
+        will no longer be used.
+        """
+        self.output_cache = None
+
+    def reset_opcode_count(self) -> None:
+        """
+        Reset the opcode count to zero.
+        """
+        self.opcode_count = OpcodeCount({})
+
+    def remove_opcode_count(self) -> None:
+        """
+        Remove the opcode counter.
+        """
+        self.opcode_count = None
+
     @dataclass
     class TransitionToolData:
         """Transition tool files and data to pass between methods."""
@@ -305,7 +370,7 @@ def _evaluate_filesystem(
         self,
         *,
         t8n_data: TransitionToolData,
-        debug_output_path: str = "",
+        debug_output_path: Path | None,
         profiler: Profiler,
     ) -> TransitionToolOutput:
         """
@@ -359,7 +424,7 @@ def _evaluate_filesystem(
             "--state.chainid",
             str(t8n_data.chain_id),
         ]
-        if self.supports_opcode_count:
+        if self.supports_opcode_count and self.opcode_count is not None:
             args.extend(
                 [
                     "--opcode.count",
@@ -429,7 +494,7 @@ def _evaluate_filesystem(
             temp_dir_path / "output",
             context={"exception_mapper": self.exception_mapper},
         )
-        if self.supports_opcode_count:
+        if self.supports_opcode_count and self.opcode_count is not None:
             opcode_count_file_path = Path(temp_dir.name) / "opcodes.json"
             if opcode_count_file_path.exists():
                 opcode_count = OpcodeCount.model_validate_json(
@@ -507,7 +572,7 @@ def _evaluate_server(
         self,
         *,
         t8n_data: TransitionToolData,
-        debug_output_path: str = "",
+        debug_output_path: Path | None,
         timeout: int,
         profiler: Profiler,
     ) -> TransitionToolOutput:
@@ -595,7 +660,7 @@ def _evaluate_stream(
         self,
         *,
         t8n_data: TransitionToolData,
-        debug_output_path: str = "",
+        debug_output_path: Path | None,
         profiler: Profiler,
     ) -> TransitionToolOutput:
         """
@@ -617,10 +682,11 @@ def _evaluate_stream(
             stderr=subprocess.PIPE,
         )
 
-        with profiler.pause():
-            self.dump_debug_stream(
-                debug_output_path, temp_dir, stdin, args, result
-            )
+        if debug_output_path:
+            with profiler.pause():
+                self.dump_debug_stream(
+                    debug_output_path, temp_dir, stdin, args, result
+                )
 
         if result.returncode != 0:
             raise Exception("failed to evaluate: " + result.stderr.decode())
@@ -716,7 +782,7 @@ def construct_args_stream(
 
     def dump_debug_stream(
         self,
-        debug_output_path: str,
+        debug_output_path: Path,
         temp_dir: tempfile.TemporaryDirectory,
         stdin: TransitionToolInput,
         args: List[str],
@@ -725,9 +791,6 @@ def dump_debug_stream(
         """
         Export debug files if requested when interacting with t8n via streams.
         """
-        if not debug_output_path:
-            return
-
         t8n_call = " ".join(args)
         t8n_output_base_dir = os.path.join(debug_output_path, "t8n.sh.out")
         if self.trace:
@@ -761,11 +824,79 @@ def dump_debug_stream(
             },
         )
 
+    def _evaluate(
+        self,
+        *,
+        transition_tool_data: TransitionToolData,
+        debug_output_path: Path | None,
+        slow_request: bool,
+        profiler: Profiler,
+    ) -> TransitionToolOutput:
+        """
+        Execute the relevant evaluate method as required by the `t8n` tool.
+
+        If a client's `t8n` tool varies from the default behavior, this method
+        can be overridden.
+        """
+        if self.t8n_use_server:
+            if not self.server_url:
+                self.start_server()
+            return self._evaluate_server(
+                t8n_data=transition_tool_data,
+                debug_output_path=debug_output_path,
+                timeout=SLOW_REQUEST_TIMEOUT
+                if slow_request
+                else NORMAL_SERVER_TIMEOUT,
+                profiler=profiler,
+            )
+
+        elif self.t8n_use_stream:
+            return self._evaluate_stream(
+                t8n_data=transition_tool_data,
+                debug_output_path=debug_output_path,
+                profiler=profiler,
+            )
+        else:
+            return self._evaluate_filesystem(
+                t8n_data=transition_tool_data,
+                debug_output_path=debug_output_path,
+                profiler=profiler,
+            )
+
+    def get_next_transition_tool_output_path(
+        self, call_id: int
+    ) -> Path | None:
+        """Return path to the next transition tool output file."""
+        debug_dump_dir = self.debug_dump_dir
+        if debug_dump_dir is None:
+            return None
+        return debug_dump_dir / str(call_id)
+
+    def increment_call_counter(self) -> int:
+        """Increment the call counter by one and return the previous value."""
+        previous_value = self.call_counter
+        self.call_counter += 1
+        return previous_value
+
+    def process_result(
+        self, result: TransitionToolOutput
+    ) -> TransitionToolOutput:
+        """
+        Process the result of the transition tool evaluation performing the
+        following operations:
+        - Add opcode count to the result if available.
+        """
+        if (
+            result.result.opcode_count is not None
+            and self.opcode_count is not None
+        ):
+            self.opcode_count += result.result.opcode_count
+        return result
+
     def evaluate(
         self,
         *,
         transition_tool_data: TransitionToolData,
-        debug_output_path: str = "",
         slow_request: bool = False,
     ) -> TransitionToolOutput:
         """
@@ -774,33 +905,26 @@ def evaluate(
         If a client's `t8n` tool varies from the default behavior, this method
         can be overridden.
         """
+        current_call_id = self.increment_call_counter()
+        if self.output_cache is not None:
+            cached_result = self.output_cache.get(current_call_id)
+            if cached_result:
+                return self.process_result(cached_result)
+        debug_output_path = self.get_next_transition_tool_output_path(
+            current_call_id
+        )
         with Profiler(
-            enabled=debug_output_path != "",
-            filename=Path(debug_output_path) / "profile.out"
+            enabled=debug_output_path is not None,
+            filename=debug_output_path / "profile.out"
             if debug_output_path
             else None,
         ) as profiler:
-            if self.t8n_use_server:
-                if not self.server_url:
-                    self.start_server()
-                return self._evaluate_server(
-                    t8n_data=transition_tool_data,
-                    debug_output_path=debug_output_path,
-                    timeout=SLOW_REQUEST_TIMEOUT
-                    if slow_request
-                    else NORMAL_SERVER_TIMEOUT,
-                    profiler=profiler,
-                )
-
-            elif self.t8n_use_stream:
-                return self._evaluate_stream(
-                    t8n_data=transition_tool_data,
-                    debug_output_path=debug_output_path,
-                    profiler=profiler,
-                )
-            else:
-                return self._evaluate_filesystem(
-                    t8n_data=transition_tool_data,
-                    debug_output_path=debug_output_path,
-                    profiler=profiler,
-                )
+            result = self._evaluate(
+                transition_tool_data=transition_tool_data,
+                debug_output_path=debug_output_path,
+                slow_request=slow_request,
+                profiler=profiler,
+            )
+        if self.output_cache is not None:
+            self.output_cache.set(current_call_id, result)
+        return self.process_result(result)
diff --git a/packages/testing/src/execution_testing/fixtures/__init__.py b/packages/testing/src/execution_testing/fixtures/__init__.py
index a549c2d77b..60c6f9b340 100644
--- a/packages/testing/src/execution_testing/fixtures/__init__.py
+++ b/packages/testing/src/execution_testing/fixtures/__init__.py
@@ -5,8 +5,7 @@
     FixtureFillingPhase,
     FixtureFormat,
     LabeledFixtureFormat,
-    get_all_fixture_format_names,
-    strip_fixture_format_from_nodeid,
+    strip_fixture_format_from_node,
 )
 from .blockchain import (
     BlockchainEngineFixture,
@@ -39,14 +38,13 @@
     "FixtureConsumer",
     "FixtureFillingPhase",
     "FixtureFormat",
-    "get_all_fixture_format_names",
     "LabeledFixtureFormat",
     "PreAllocGroup",
     "PreAllocGroupBuilder",
     "PreAllocGroupBuilders",
     "PreAllocGroups",
     "StateFixture",
-    "strip_fixture_format_from_nodeid",
+    "strip_fixture_format_from_node",
     "TestInfo",
     "TransactionFixture",
 ]
diff --git a/packages/testing/src/execution_testing/fixtures/base.py b/packages/testing/src/execution_testing/fixtures/base.py
index cb38348593..ce0395dc0a 100644
--- a/packages/testing/src/execution_testing/fixtures/base.py
+++ b/packages/testing/src/execution_testing/fixtures/base.py
@@ -2,10 +2,19 @@
 
 import hashlib
 import json
-import re
 from enum import Enum, auto
-from functools import cached_property, lru_cache
-from typing import Annotated, Any, ClassVar, Dict, List, Set, Type, Union
+from functools import cached_property
+from typing import (
+    Annotated,
+    Any,
+    ClassVar,
+    Dict,
+    List,
+    Protocol,
+    Set,
+    Type,
+    Union,
+)
 
 import pytest
 from pydantic import (
@@ -72,6 +81,7 @@ class BaseFixture(CamelModel):
     format_phases: ClassVar[Set[FixtureFillingPhase]] = {
         FixtureFillingPhase.FILL
     }
+    transition_tool_cache_key: ClassVar[str] = ""
 
     @classmethod
     def output_base_dir_name(cls) -> str:
@@ -231,6 +241,11 @@ def format_phases(self) -> Set[FixtureFillingPhase]:
         """Get the filling format phases where it should be included."""
         return self.format.format_phases
 
+    @property
+    def transition_tool_cache_key(self) -> str:
+        """Get the transition tool cache key."""
+        return self.format.transition_tool_cache_key
+
     def __eq__(self, other: Any) -> bool:
         """
         Check if two labeled fixture formats are equal.
@@ -255,29 +270,22 @@ def __eq__(self, other: Any) -> bool:
 ]
 
 
-@lru_cache(maxsize=1)
-def get_all_fixture_format_names() -> tuple[str, ...]:
-    """
-    Get all fixture format names (base formats + labeled formats).
-
-    Returns names sorted by length descending so longer patterns match first
-    when used for nodeid parsing.
-    """
-    all_names: set[str] = set()
-    all_names.update(BaseFixture.formats.keys())
-    all_names.update(LabeledFixtureFormat.registered_labels.keys())
-    return tuple(sorted(all_names, key=len, reverse=True))
+class PytestItemProtocol(Protocol):
+    """Protocol that resembles pytest.Item."""
 
+    @property
+    def nodeid(self) -> str:
+        """Return the nodeid of the item."""
+        ...
 
-@lru_cache(maxsize=1)
-def _get_fixture_format_pattern() -> re.Pattern[str]:
-    """Compile regex pattern for matching fixture format names in nodeids."""
-    formats = "|".join(re.escape(f) for f in get_all_fixture_format_names())
-    # Match format name with hyphen boundaries or at string edges
-    return re.compile(rf"(^|-)({formats})(-|$)")
+    def get_closest_marker(self, name: str) -> pytest.Mark | None:
+        """Return the closest marker with the given name."""
+        ...
 
 
-def strip_fixture_format_from_nodeid(nodeid: str) -> str:
+def strip_fixture_format_from_node(
+    item: PytestItemProtocol,
+) -> str:
     """
     Remove fixture format suffix from a test nodeid.
 
@@ -288,20 +296,12 @@ def strip_fixture_format_from_nodeid(nodeid: str) -> str:
         'test.py::test[fork_Osaka-state_test]' -> 'test.py::test[fork_Osaka]'
 
     """
-    if "[" not in nodeid:
+    fixture_format_id_marker = item.get_closest_marker("fixture_format_id")
+    nodeid = item.nodeid
+    if fixture_format_id_marker is None:
         return nodeid
-
-    bracket_pos = nodeid.rfind("[")
-    params_section = nodeid[bracket_pos + 1 : -1]
-
-    def _replace(match: re.Match[str]) -> str:
-        prefix, _, suffix = match.groups()
-        # If both boundaries are hyphens, keep one hyphen
-        if prefix == "-" and suffix == "-":
-            return "-"
-        return ""
-
-    pattern = _get_fixture_format_pattern()
-    params_section = pattern.sub(_replace, params_section, count=1)
-
-    return nodeid[: bracket_pos + 1] + params_section + "]"
+    assert len(fixture_format_id_marker.args) == 1
+    fixture_id = fixture_format_id_marker.args[0]
+    if fixture_id not in nodeid:
+        return nodeid
+    return nodeid.replace(fixture_id, "")
diff --git a/packages/testing/src/execution_testing/fixtures/blockchain.py b/packages/testing/src/execution_testing/fixtures/blockchain.py
index fa85d4b60b..f9b5f48f7b 100644
--- a/packages/testing/src/execution_testing/fixtures/blockchain.py
+++ b/packages/testing/src/execution_testing/fixtures/blockchain.py
@@ -745,6 +745,7 @@ class BlockchainFixture(BlockchainFixtureCommon):
     genesis_rlp: Bytes = Field(..., alias="genesisRLP")
     blocks: List[FixtureBlock | InvalidFixtureBlock]
     seal_engine: Literal["NoProof"] = Field("NoProof")
+    transition_tool_cache_key: ClassVar[str] = "blockchain_test"
 
 
 @post_state_validator()
@@ -789,6 +790,7 @@ class BlockchainEngineFixture(BlockchainEngineFixtureCommon):
     payloads: List[FixtureEngineNewPayload] = Field(
         ..., alias="engineNewPayloads"
     )
+    transition_tool_cache_key: ClassVar[str] = "blockchain_test"
 
 
 @post_state_validator(alternate_field="post_state_diff")
@@ -812,6 +814,7 @@ class BlockchainEngineXFixture(BlockchainEngineFixtureCommon):
         FixtureFillingPhase.FILL,
         FixtureFillingPhase.PRE_ALLOC_GENERATION,
     }
+    can_use_cache: ClassVar[bool] = False
 
     pre_hash: str
     """Hash of the pre-allocation group this test belongs to."""
@@ -843,6 +846,7 @@ class BlockchainEngineSyncFixture(BlockchainEngineFixture):
         "Tests that generate a blockchain test fixture for Engine API "
         "testing with client sync."
     )
+    can_use_cache: ClassVar[bool] = False
     sync_payload: FixtureEngineNewPayload | None = None
 
     @classmethod
diff --git a/packages/testing/src/execution_testing/specs/base.py b/packages/testing/src/execution_testing/specs/base.py
index c06ee44c91..53edd4247f 100644
--- a/packages/testing/src/execution_testing/specs/base.py
+++ b/packages/testing/src/execution_testing/specs/base.py
@@ -6,10 +6,7 @@
 from abc import abstractmethod
 from enum import StrEnum, unique
 from functools import reduce
-from os import path
-from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     ClassVar,
@@ -17,23 +14,15 @@
     Generator,
     List,
     Sequence,
-    Tuple,
     Type,
 )
 
-if TYPE_CHECKING:
-    # Guarded import to avoid circular dependency: filler.py imports BaseTest
-    from execution_testing.cli.pytest_commands.plugins.filler.filler import (
-        FillingSession,
-    )
-
 import pytest
-from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
+from pydantic import BaseModel, ConfigDict, PrivateAttr
 from typing_extensions import Self
 
 from execution_testing.base_types import to_hex
 from execution_testing.client_clis import Result, TransitionTool
-from execution_testing.client_clis.cli_types import OpcodeCount
 from execution_testing.execution import (
     BaseExecute,
     ExecuteFormat,
@@ -44,7 +33,6 @@
     FixtureFormat,
     LabeledFixtureFormat,
     PreAllocGroupBuilders,
-    strip_fixture_format_from_nodeid,
 )
 from execution_testing.forks import Fork
 from execution_testing.forks.base_fork import BaseFork
@@ -113,17 +101,12 @@ class BaseTest(BaseModel):
     _operation_mode: OpMode | None = PrivateAttr(None)
     _gas_optimization: int | None = PrivateAttr(None)
     _gas_optimization_max_gas_limit: int | None = PrivateAttr(None)
-    _opcode_count: OpcodeCount | None = PrivateAttr(None)
 
     expected_benchmark_gas_used: int | None = None
     skip_gas_used_validation: bool = False
 
     spec_types: ClassVar[Dict[str, Type["BaseTest"]]] = {}
 
-    # Transition tool specific fields
-    t8n_dump_dir: Path | None = Field(None, exclude=True)
-    t8n_call_counter: int = Field(0, exclude=True)
-
     supported_fixture_formats: ClassVar[
         Sequence[FixtureFormat | LabeledFixtureFormat]
     ] = []
@@ -176,14 +159,12 @@ def from_test(
         new_instance = cls(
             tag=base_test.tag,
             fork=base_test.fork,
-            t8n_dump_dir=base_test.t8n_dump_dir,
             expected_benchmark_gas_used=base_test.expected_benchmark_gas_used,
             skip_gas_used_validation=base_test.skip_gas_used_validation,
             **kwargs,
         )
         new_instance._request = base_test._request
         new_instance._operation_mode = base_test._operation_mode
-        new_instance._opcode_count = base_test._opcode_count
         return new_instance
 
     @classmethod
@@ -232,17 +213,6 @@ def pytest_parameter_name(cls) -> str:
             lambda x, y: x + ("_" if y.isupper() else "") + y, cls.__name__
         ).lower()
 
-    def get_next_transition_tool_output_path(self) -> str:
-        """Return path to the next transition tool output file."""
-        if not self.t8n_dump_dir:
-            return ""
-        current_value = self.t8n_call_counter
-        self.t8n_call_counter += 1
-        return path.join(
-            self.t8n_dump_dir,
-            str(current_value),
-        )
-
     def is_tx_gas_heavy_test(self) -> bool:
         """Check if the test is gas-heavy for transaction execution."""
         if self._request is not None and hasattr(self._request, "node"):
@@ -269,11 +239,11 @@ def is_exception_test(self) -> bool | None:
             )
         return None
 
-    def node_id(self) -> str:
+    def node(self) -> pytest.Item | pytest.Function | None:
         """Return the node ID of the test."""
         if self._request is not None and hasattr(self._request, "node"):
-            return self._request.node.nodeid
-        return ""
+            return self._request.node
+        return None
 
     def check_exception_test(
         self,
@@ -366,21 +336,5 @@ def compute_pre_alloc_group_hash(self) -> str:
 
         return f"0x{combined_hash:016x}"
 
-    def _get_base_nodeid(self) -> str:
-        """Get base nodeid without fixture_format for cache key."""
-        return strip_fixture_format_from_nodeid(self.node_id())
-
-    def _get_t8n_cache_key(
-        self, fork_name: str, block_index: int = 0
-    ) -> Tuple[str, str, int]:
-        """Get cache key for t8n output."""
-        return (self._get_base_nodeid(), fork_name, block_index)
-
-    def _get_filling_session(self) -> "FillingSession | None":
-        """Get filling session if available."""
-        if self._request is not None and hasattr(self._request, "config"):
-            return getattr(self._request.config, "filling_session", None)
-        return None
-
 
 TestSpec = Callable[[Fork], Generator[BaseTest, None, None]]
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
index 0777d69b7c..3c6d09d1d6 100644
--- a/packages/testing/src/execution_testing/specs/benchmark.py
+++ b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -499,7 +499,10 @@ def generate(
                 self.target_opcode is not None
                 and self.fixed_opcode_count is not None
             ):
-                self._verify_target_opcode_count(blockchain_test._opcode_count)
+                opcode_count = t8n.opcode_count
+                if opcode_count is None:
+                    raise Exception("Opcode count is not available")
+                self._verify_target_opcode_count(opcode_count)
             return fixture
         else:
             raise Exception(f"Unsupported fixture format: {fixture_format}")
diff --git a/packages/testing/src/execution_testing/specs/blockchain.py b/packages/testing/src/execution_testing/specs/blockchain.py
index e798911b13..35eb705e41 100644
--- a/packages/testing/src/execution_testing/specs/blockchain.py
+++ b/packages/testing/src/execution_testing/specs/blockchain.py
@@ -601,52 +601,20 @@ def generate_block_data(
                     "exception must be the last transaction in the block"
                 )
 
-        # Try cache lookup (blockchain_test -> blockchain_test_engine reuse).
-        # On cache hit, the t8n call is skipped.
-        # Skip cache for engine_x/engine_sync variants (different execution).
-        filling_session = self._get_filling_session()
-        nodeid = self.node_id()
-        cache_key = self._get_t8n_cache_key(
-            self.fork.name(), block_index=int(env.number)
-        )
-
-        transition_tool_output = None
-        cache = None
-        if filling_session is not None:
-            cache = filling_session.t8n_output_cache
-        can_use_cache = (
-            "engine_x" not in nodeid and "engine_sync" not in nodeid
-        )
-
-        if cache is not None and can_use_cache:
-            transition_tool_output = cache.get(cache_key)
-
-        if transition_tool_output is None:
-            transition_tool_output = t8n.evaluate(
-                transition_tool_data=TransitionTool.TransitionToolData(
-                    alloc=previous_alloc,
-                    txs=txs,
-                    env=env,
-                    fork=self.fork,
-                    chain_id=self.chain_id,
-                    reward=self.fork.get_reward(
-                        block_number=env.number, timestamp=env.timestamp
-                    ),
-                    blob_schedule=self.fork.blob_schedule(),
+        transition_tool_output = t8n.evaluate(
+            transition_tool_data=TransitionTool.TransitionToolData(
+                alloc=previous_alloc,
+                txs=txs,
+                env=env,
+                fork=self.fork,
+                chain_id=self.chain_id,
+                reward=self.fork.get_reward(
+                    block_number=env.number, timestamp=env.timestamp
                 ),
-                debug_output_path=self.get_next_transition_tool_output_path(),
-                slow_request=self.is_tx_gas_heavy_test(),
-            )
-            if cache is not None and can_use_cache:
-                cache.set(cache_key, transition_tool_output)
-
-        if transition_tool_output.result.opcode_count is not None:
-            if self._opcode_count is None:
-                self._opcode_count = transition_tool_output.result.opcode_count
-            else:
-                self._opcode_count += (
-                    transition_tool_output.result.opcode_count
-                )
+                blob_schedule=self.fork.blob_schedule(),
+            ),
+            slow_request=self.is_tx_gas_heavy_test(),
+        )
 
         # One special case of the invalid transactions is the blob gas used,
         # since this value is not included in the transition tool result, but
@@ -905,8 +873,8 @@ def make_fixture(
         alloc = alloc.get() if isinstance(alloc, LazyAlloc) else alloc
         self.verify_post_state(t8n, t8n_state=alloc)
         info = {}
-        if self._opcode_count is not None:
-            info["opcode_count"] = self._opcode_count.model_dump()
+        if t8n.opcode_count is not None:
+            info["opcode_count"] = t8n.opcode_count.model_dump()
         return BlockchainFixture(
             fork=self.fork,
             genesis=genesis.header,
@@ -993,8 +961,8 @@ def make_hive_fixture(
 
         # Create base fixture data, common to all fixture formats
         info = {}
-        if self._opcode_count is not None:
-            info["opcode_count"] = self._opcode_count.model_dump()
+        if t8n.opcode_count is not None:
+            info["opcode_count"] = t8n.opcode_count.model_dump()
         fixture_data = {
             "fork": self.fork,
             "genesis": genesis.header,
@@ -1072,7 +1040,6 @@ def generate(
         fixture_format: FixtureFormat,
     ) -> BaseFixture:
         """Generate the BlockchainTest fixture."""
-        t8n.reset_traces()
         if fixture_format in [
             BlockchainEngineFixture,
             BlockchainEngineXFixture,
diff --git a/packages/testing/src/execution_testing/specs/state.py b/packages/testing/src/execution_testing/specs/state.py
index e4d5f9c61c..50c0ae9a0f 100644
--- a/packages/testing/src/execution_testing/specs/state.py
+++ b/packages/testing/src/execution_testing/specs/state.py
@@ -152,7 +152,6 @@ def verify_modified_gas_limit(
                 blob_schedule=fork.blob_schedule(),
                 state_test=True,
             ),
-            debug_output_path=self.get_next_transition_tool_output_path(),
             slow_request=self.is_tx_gas_heavy_test(),
         )
         modified_traces = modified_tool_output.result.traces
@@ -366,7 +365,6 @@ def make_state_test_fixture(
                 blob_schedule=fork.blob_schedule(),
                 state_test=True,
             ),
-            debug_output_path=self.get_next_transition_tool_output_path(),
             slow_request=self.is_tx_gas_heavy_test(),
         )
         output_alloc = transition_tool_output.alloc.get()