From 05f80f9850cba1c7a0c73d4484306e58f5e0bbf9 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 22 Dec 2025 14:56:30 -0700 Subject: [PATCH 01/18] Add test to check end of channel fallback Asana task: https://app.asana.com/1/1208976916964769/project/1209976130071762/task/1211892212379885?focus=true We need a test to check the fallback Batcher behavior in the event that the Espresso Batcher is able to submit a partial Channel that is im progress. The specific scenario we want to test for is one concerning a multi-frame channel that has had at least part of the full channel submitted to the L1 by the Espresso Batcher, then no more. After which we swap to the Fallback Batcher, and we should be able to pick up the missed / incomplete channel, and complete the transactions. --- .../environment/14_batcher_fallback_test.go | 191 ++++++++++++++++++ espresso/environment/e2e_helpers.go | 87 ++++++++ espresso/environment/tx_helpers.go | 37 ++++ 3 files changed, 315 insertions(+) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 769bc5a7034..c3c1ac3afd8 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -2,6 +2,7 @@ package environment_test import ( "context" + "errors" "math/big" "testing" "time" @@ -13,7 +14,9 @@ import ( "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-node/config" + "github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/core/types" "github.com/stretchr/testify/require" ) @@ -133,3 +136,191 @@ Loop: require.Equal(t, verifBlock.Hash(), caffBlock.Hash()) } + +// TxManagerIntercept is a txmgr.TxManager that wraps another txmgr.TxManager +// and intercepts calls to Send and SendAsync. +// +// The purpose of this intercept is to simulate a failure in the tx submission +// process such that when activated a single frame of a multi-frame channel is +// sent to L1, and the remaining frames fail to be sent, triggering the fallback +// batcher to take over. +type TxManagerIntercept struct { + txmgr.TxManager + + // shouldFail indicates whether to simulate a failure on Send/SendAsync. + shouldFail bool + + // triggerAfterOne indicates whether to start failing after a single + // successful Send/SendAsync. + triggerAfterOne bool + + // failureCount tracks the number of failures that have occurred. + failureCount int +} + +// ErrSimulatedTxSubmissionFailure is the sentinel error returned when a +// simulated tx submission failure is triggered. +// +// We utilize this as a placeholder error to indicate that the tx submission +// failure was intentional for testing purposes. +var ErrSimulatedTxSubmissionFailure = errors.New("simulated tx submission failure") + +// Send implements txmgr.TxManager. +// +// Send is overridden to simulate a failure when shouldFail is true, and to +// allow for one final transaction to be sent before failures begin when +// triggerAfterOne is true. +func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandidate) (*types.Receipt, error) { + if t.shouldFail { + t.failureCount++ + time.Sleep(50 * time.Millisecond) // Simulate some delay + return nil, ErrSimulatedTxSubmissionFailure + } + + if t.triggerAfterOne { + t.shouldFail = true + } + + return t.TxManager.Send(ctx, candidate) +} + +// SendAsync implements txmgr.TxManager. +// +// SendAsync is overridden to simulate a failure when shouldFail is true, and +// to allow for one final transaction to be sent before failures begin when +// triggerAfterOne is true. +func (t *TxManagerIntercept) SendAsync(ctx context.Context, candidate txmgr.TxCandidate, ch chan txmgr.SendResponse) { + if t.shouldFail { + t.failureCount++ + time.Sleep(50 * time.Millisecond) // Simulate some delay + ch <- txmgr.SendResponse{Err: ErrSimulatedTxSubmissionFailure} + return + } + + if t.triggerAfterOne { + t.shouldFail = true + } + + t.TxManager.SendAsync(ctx, candidate, ch) +} + +// Compile time assertion to ensure TxManagerIntercept implements +// txmgr.TxManager. +var _ txmgr.TxManager = (*TxManagerIntercept)(nil) + +// TestFallbackMechanismIntegrationTestChannelNotClosed is a test case that is +// meant to verify the correct expected behavior in the event that the Espresso +// Batcher encounters an error mid L1 Batch submission that prevents the full +// channel from being submitted to the L1. +// +// In this scenario this issue is expected to send a single frame of a +// multi-frame channel to the contract. At this point the batch should be +// switched to the fallback and the fallback batcher should continue +// submitting the remaining frames of the channel without any issues. +func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + launcher := new(env.EspressoDevNodeLauncherDocker) + + system, espressoDevNode, err := launcher.StartE2eDevnet( + ctx, + t, + // We want a Max L1 Number of frames larger than 1 to ensure we can + // trigger the multi-frame channel scenario. + env.WithBatcherMaxL1NumFrames(3), + + // We want a small Max L1 Tx Size to ensure that even a small L2 + // transaction will result in multiple L1 Transactions. + env.WithBatcherMaxL1TxSize(5000), + ) + + require.NoError(t, err) + + // We create an intercept around the existing tx manager so we have + // control over when our failures start to occur. + + interceptTxManager := &TxManagerIntercept{ + TxManager: system.BatchSubmitter.TxManager, + } + system.BatchSubmitter.TxManager = interceptTxManager + system.BatchSubmitter.TestDriver().DriverSetup.Txmgr = interceptTxManager + + l1Client := system.NodeClient(e2esys.RoleL1) + + defer env.Stop(t, system) + defer env.Stop(t, espressoDevNode) + + // Send Transaction on L1, and wait for verification on the L2 Verifier + env.RunSimpleL1TransferAndVerifier(ctx, t, system) + + // Verify everything works + env.RunSimpleL2Burn(ctx, t, system) + + // We want to trigger the failure mode now. + interceptTxManager.triggerAfterOne = true + + // Now we need to submit a multi-frame channel to L1 to trigger the + // failure. We can do this by adjusting the batcher config to use a very + // small MaxL1TxSize such that even a small L2 transaction will result in + // multiple frames. + + // We want enough L2 Transactions to ensure we have multiple frames. + n := 10 + + receipts := env.RunSimpleMultiTransactions(ctx, t, system, n) + + // We want to wait until we know that the intercept tx manager has + // trigger the failure mode successfully, and that all n transactions + // have been attempted. + + wait.For(ctx, 10*time.Second, func() (bool, error) { + return interceptTxManager.failureCount >= 1, nil + }) + + if have, want := interceptTxManager.failureCount, 1; have < want { + t.Fatalf("tx submission failure not triggered enough times:\nhave:\n\t%d\nwant at least:\n\t%d", have, want) + } + + // Make sure that the verifier doesn't see any of the transactions. + + l2Verif := system.NodeClient(e2esys.RoleVerif) + + for _, receipt := range receipts { + _, err := l2Verif.TransactionReceipt(ctx, receipt.TxHash) + if have, doNotWant := err, error(nil); have == doNotWant { + t.Errorf("receipt for tx %s found on L2 Verifier when not expected:\nhave:\n\t%v\nwant:\n\t%v", receipt.TxHash, have, doNotWant) + } + } + + // Stop the "TEE" batcher + err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) + require.NoError(t, err) + + // Switch active batcher + options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) + require.NoError(t, err) + + batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) + require.NoError(t, err) + + tx, err := batchAuthenticator.SwitchBatcher(options) + require.NoError(t, err) + _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) + require.NoError(t, err) + + // Start the fallback batcher + err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() + require.NoError(t, err) + + // Everything should still work + env.RunSimpleL2Burn(ctx, t, system) + + // Verify that our previous receipts were also recorded on L1 + for i, receipt := range receipts { + _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) + if have, want := err, error(nil); have != want { + t.Errorf("receipt %d for tx %s not found on L2 Verifier:\nhave:\n\t%v\nwant:\n\t%v", i, receipt.TxHash, have, want) + } + } +} diff --git a/espresso/environment/e2e_helpers.go b/espresso/environment/e2e_helpers.go index 1bcd7cfa5cd..45e9d14317a 100644 --- a/espresso/environment/e2e_helpers.go +++ b/espresso/environment/e2e_helpers.go @@ -4,6 +4,7 @@ import ( "math/big" "time" + bss "github.com/ethereum-optimism/optimism/op-batcher/batcher" "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-e2e/system/helpers" "github.com/ethereum/go-ethereum/common" @@ -131,3 +132,89 @@ func WithL2BlockTime(blockTime time.Duration) E2eDevnetLauncherOption { } } } + +// WithBatcherMaxL1NumFrames is a E2eDevnetLauncherOption that configures the +// batcher's `MaxL1FrameSize` option to the provided value. +// +// This governs how many frames the batcher will attempt to utilize when +// submitting a channel to the L1. +func WithBatcherMaxL1NumFrames(size int) E2eDevnetLauncherOption { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxL1NumFrames", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.TargetNumFrames = size + }, + }, + }, + } + } +} + +// WithBatcherMaxPendingTransactions is a E2eDevnetLauncherOption that +// configures the batcher's `MaxPendingTransactions` option to the provided +// value. +// +// This governs how many pending L1 transactions the batcher will allow +// before pausing new submissions. +func WithBatcherMaxPendingTransactions(pendingTransactions uint64) E2eDevnetLauncherOption { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxPendingTransactions", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.MaxPendingTransactions = pendingTransactions + }, + }, + }, + } + } +} + +// WithBatcherMaxL1TxSize is a E2eDevnetLauncherOption that configures the +// batcher's `MaxL1TxSize` option to the provided value. +// +// This governs the maximum L1 transaction size that the batcher will attempt +// to submit when submitting a channel to L1. +func WithBatcherMaxL1TxSize(maxL1TxSize uint64) E2eDevnetLauncherOption { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxL1TxSize", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.MaxL1TxSize = maxL1TxSize + }, + }, + }, + } + } +} + +// WithBatcherMaxBlocksPerSpanBatch is a E2eDevnetLauncherOption that +// configures the batcher's `MaxBlocksPerSpanBatch` option to the provided +// value. +// +// This governs how many blocks the batcher will include in a single span +// when creating batches to submit to L1. +func WithBatcherMaxBlocksPerSpanBatch(maxBlocksPerSpanBatch int) E2eDevnetLauncherOption { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxBlocksPerSpanBatch", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.MaxBlocksPerSpanBatch = maxBlocksPerSpanBatch + }, + }, + }, + } + } +} diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index fb114fbbd08..a1103cb6f8c 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -6,6 +6,7 @@ import ( "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" "github.com/stretchr/testify/require" "math/big" @@ -129,3 +130,39 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { cancel() } + +// RunSimpleMultiTransactions sends numTransactions simple L2 transactions +// from Bob's account with a bunch of random data applied to each transaction. +func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) []*types.Receipt { + senderKey := system.Cfg.Secrets.Bob + senderAddress := system.Cfg.Secrets.Addresses().Bob + l2Seq := system.NodeClient(e2esys.RoleSeq) + nonce, err := l2Seq.NonceAt(ctx, senderAddress, nil) + if err != nil { + require.NoError(t, err, "failed to get nonce for account %s", senderAddress) + } + + ch := make(chan *types.Receipt, numTransactions) + for i := 0; i < numTransactions; i++ { + go (func(ch chan *types.Receipt, i int, nonce uint64) { + receipt := helpers.SendL2Tx(t, system.Cfg, l2Seq, senderKey, func(opts *helpers.TxOpts) { + opts.Nonce = nonce + uint64(i) + opts.Gas = 100_000 + + // opts.Data = make([]byte, 256) + // Fill with random data + // rand.Read(opts.Data) + }) + ch <- receipt + })(ch, i, nonce) + } + + var receipts []*types.Receipt + for i := 0; i < numTransactions; i++ { + receipt := <-ch + receipts = append(receipts, receipt) + } + close(ch) + + return receipts +} From 19a3bd33d5c81740856d29ff1a3281ec4e64f6d3 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 22 Dec 2025 15:31:51 -0700 Subject: [PATCH 02/18] Rename helper function to match naming pattern --- espresso/environment/14_batcher_fallback_test.go | 2 +- espresso/environment/e2e_helpers.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index c3c1ac3afd8..5d770aa2b01 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -228,7 +228,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { t, // We want a Max L1 Number of frames larger than 1 to ensure we can // trigger the multi-frame channel scenario. - env.WithBatcherMaxL1NumFrames(3), + env.WithBatcherTargetNumFrames(3), // We want a small Max L1 Tx Size to ensure that even a small L2 // transaction will result in multiple L1 Transactions. diff --git a/espresso/environment/e2e_helpers.go b/espresso/environment/e2e_helpers.go index 45e9d14317a..4980828f487 100644 --- a/espresso/environment/e2e_helpers.go +++ b/espresso/environment/e2e_helpers.go @@ -133,12 +133,12 @@ func WithL2BlockTime(blockTime time.Duration) E2eDevnetLauncherOption { } } -// WithBatcherMaxL1NumFrames is a E2eDevnetLauncherOption that configures the -// batcher's `MaxL1FrameSize` option to the provided value. +// WithBatcherTargetNumFrames is a E2eDevnetLauncherOption that configures the +// batcher's `TargetNumFrames` option to the provided value. // // This governs how many frames the batcher will attempt to utilize when // submitting a channel to the L1. -func WithBatcherMaxL1NumFrames(size int) E2eDevnetLauncherOption { +func WithBatcherTargetNumFrames(size int) E2eDevnetLauncherOption { return func(c *E2eDevnetLauncherContext) E2eSystemOption { return E2eSystemOption{ StartOptions: []e2esys.StartOption{ From c111de2d4700b7064945d68f96dd5aa9853c170f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 22 Dec 2025 15:32:55 -0700 Subject: [PATCH 03/18] Fix lint issue with not checking error result of wait.For --- espresso/environment/14_batcher_fallback_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 5d770aa2b01..5f3be8845a9 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -274,9 +274,10 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // trigger the failure mode successfully, and that all n transactions // have been attempted. - wait.For(ctx, 10*time.Second, func() (bool, error) { + err = wait.For(ctx, 10*time.Second, func() (bool, error) { return interceptTxManager.failureCount >= 1, nil }) + require.NoError(t, err) if have, want := interceptTxManager.failureCount, 1; have < want { t.Fatalf("tx submission failure not triggered enough times:\nhave:\n\t%d\nwant at least:\n\t%d", have, want) From 6f3c50b16190f8ae6154a9dac1e2bc787011980e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 26 Dec 2025 13:06:08 -0700 Subject: [PATCH 04/18] Commit work in progress multi frame channel efforts --- .../environment/14_batcher_fallback_test.go | 304 +++++++++++++++--- espresso/environment/e2e_helpers.go | 69 ++++ espresso/environment/tx_helpers.go | 10 +- 3 files changed, 327 insertions(+), 56 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 5f3be8845a9..e3c4c329160 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -1,9 +1,13 @@ package environment_test import ( + "bytes" "context" "errors" + "fmt" + "io" "math/big" + "sync" "testing" "time" @@ -14,6 +18,8 @@ import ( "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-node/config" + "github.com/ethereum-optimism/optimism/op-node/rollup/derive" + "github.com/ethereum-optimism/optimism/op-node/rollup/derive/params" "github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/core/types" @@ -146,6 +152,7 @@ Loop: // batcher to take over. type TxManagerIntercept struct { txmgr.TxManager + sync.Mutex // shouldFail indicates whether to simulate a failure on Send/SendAsync. shouldFail bool @@ -156,6 +163,17 @@ type TxManagerIntercept struct { // failureCount tracks the number of failures that have occurred. failureCount int + + successfulFrames map[derive.ChannelID][]derive.Frame + unsuccessfulFrames map[derive.ChannelID][]derive.Frame +} + +func NewTxManagerIntercept(base txmgr.TxManager) *TxManagerIntercept { + return &TxManagerIntercept{ + TxManager: base, + successfulFrames: make(map[derive.ChannelID][]derive.Frame), + unsuccessfulFrames: make(map[derive.ChannelID][]derive.Frame), + } } // ErrSimulatedTxSubmissionFailure is the sentinel error returned when a @@ -165,14 +183,117 @@ type TxManagerIntercept struct { // failure was intentional for testing purposes. var ErrSimulatedTxSubmissionFailure = errors.New("simulated tx submission failure") +func decodeFrameInformation(candidate txmgr.TxCandidate) ([]derive.Frame, error) { + if len(candidate.TxData) > 0 { + // We have a CallData tx, so we can decode the frame information from + // the tx data. + return decodeFrameInformationFromTxData(candidate) + } + + if len(candidate.Blobs) > 0 { + // We have a Blob tx, so we can decode the frame information from + // the blobs. + return decodeFrameInformationFromBlobs(candidate) + } + + return nil, fmt.Errorf("tx candidate has neither tx data nor blobs to decode frame information from") +} + +func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Frame, error) { + data := candidate.TxData + + if data[0] != params.DerivationVersion0 { + // Not a supported derivation version + return nil, fmt.Errorf("unsupported derivation version: %d", data[0]) + } + + var frames []derive.Frame + reader := bytes.NewBuffer(data[1:]) + for { + var frame derive.Frame + err := frame.UnmarshalBinary(reader) + if errors.Is(err, io.EOF) { + // We've consumed all of the frames. + break + } + + // If this is any other error, it indicates that there was an + // error decoding the frame. + if err != nil { + return frames, fmt.Errorf("error decoding frame: %w", err) + } + + frames = append(frames, frame) + } + + return frames, nil +} + +func decodeFrameInformationFromBlobs(candidate txmgr.TxCandidate) ([]derive.Frame, error) { + var frames []derive.Frame + for _, blob := range candidate.Blobs { + data, err := blob.ToData() + if err != nil { + return frames, fmt.Errorf("error converting blob to data: %w", err) + } + + if data[0] != params.DerivationVersion0 { + // Not a supported derivation version + return frames, fmt.Errorf("unsupported derivation version: %d", data[0]) + } + + reader := bytes.NewBuffer(data[1:]) + for { + var frame derive.Frame + err := frame.UnmarshalBinary(reader) + if errors.Is(err, io.EOF) { + // We've consumed all of the frames in this blob. + break + } + + // If this is any other error, it indicates that there was an + // error decoding the frame. + if err != nil { + return frames, fmt.Errorf("error decoding frame from blob: %w", err) + } + + frames = append(frames, frame) + } + } + + return frames, nil +} + +func (t *TxManagerIntercept) markFramesAsSuccessful(frames []derive.Frame) { + t.Mutex.Lock() + defer t.Mutex.Unlock() + for _, frame := range frames { + t.successfulFrames[frame.ID] = append(t.successfulFrames[frame.ID], frame) + } +} + +func (t *TxManagerIntercept) markFramesAsUnsuccessful(frames []derive.Frame) { + t.Mutex.Lock() + defer t.Mutex.Unlock() + for _, frame := range frames { + t.unsuccessfulFrames[frame.ID] = append(t.successfulFrames[frame.ID], frame) + } +} + // Send implements txmgr.TxManager. // // Send is overridden to simulate a failure when shouldFail is true, and to // allow for one final transaction to be sent before failures begin when // triggerAfterOne is true. func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandidate) (*types.Receipt, error) { + frames, err := decodeFrameInformation(candidate) + if err != nil { + return nil, err + } + if t.shouldFail { t.failureCount++ + t.markFramesAsSuccessful(frames) time.Sleep(50 * time.Millisecond) // Simulate some delay return nil, ErrSimulatedTxSubmissionFailure } @@ -181,6 +302,8 @@ func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandida t.shouldFail = true } + t.markFramesAsUnsuccessful(frames) + return t.TxManager.Send(ctx, candidate) } @@ -190,8 +313,15 @@ func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandida // to allow for one final transaction to be sent before failures begin when // triggerAfterOne is true. func (t *TxManagerIntercept) SendAsync(ctx context.Context, candidate txmgr.TxCandidate, ch chan txmgr.SendResponse) { + frames, err := decodeFrameInformation(candidate) + if err != nil { + ch <- txmgr.SendResponse{Err: fmt.Errorf("failed to decode frame information: %w", err)} + return + } + if t.shouldFail { t.failureCount++ + t.markFramesAsUnsuccessful(frames) time.Sleep(50 * time.Millisecond) // Simulate some delay ch <- txmgr.SendResponse{Err: ErrSimulatedTxSubmissionFailure} return @@ -201,9 +331,35 @@ func (t *TxManagerIntercept) SendAsync(ctx context.Context, candidate txmgr.TxCa t.shouldFail = true } + t.markFramesAsSuccessful(frames) t.TxManager.SendAsync(ctx, candidate, ch) } +type partialFrameData struct { + channelID derive.ChannelID + successfulFrames []derive.Frame + unsuccessfulFrames []derive.Frame +} + +func (t *TxManagerIntercept) partialFrameData() []partialFrameData { + var partials []partialFrameData + + for channelID, unsuccessfulFrames := range t.unsuccessfulFrames { + successfulFrames, ok := t.successfulFrames[channelID] + if !ok { + continue + } + + partials = append(partials, partialFrameData{ + channelID: channelID, + successfulFrames: successfulFrames, + unsuccessfulFrames: unsuccessfulFrames, + }) + } + + return partials +} + // Compile time assertion to ensure TxManagerIntercept implements // txmgr.TxManager. var _ txmgr.TxManager = (*TxManagerIntercept)(nil) @@ -218,21 +374,32 @@ var _ txmgr.TxManager = (*TxManagerIntercept)(nil) // switched to the fallback and the fallback batcher should continue // submitting the remaining frames of the channel without any issues. func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) defer cancel() launcher := new(env.EspressoDevNodeLauncherDocker) + // Values of 1200 frames and 100 bytes max tx size were chosen to match + // the overall default configuration as seen in batcher/service.go when + // the flag is set to Auto. + system, espressoDevNode, err := launcher.StartE2eDevnet( ctx, t, - // We want a Max L1 Number of frames larger than 1 to ensure we can - // trigger the multi-frame channel scenario. - env.WithBatcherTargetNumFrames(3), + // Use Blobs DA to ensure multi-frame channels are possible. + // env.WithBatcherDataAvailabilityType(flags.BlobsType), + + // // We want a Max L1 Number of frames larger than 1 to ensure we can + // // trigger the multi-frame channel scenario. + // env.WithBatcherTargetNumFrames(20000), // We want a small Max L1 Tx Size to ensure that even a small L2 // transaction will result in multiple L1 Transactions. - env.WithBatcherMaxL1TxSize(5000), + env.WithBatcherMaxL1TxSize(50), + + // // We want to increase the maximum channel duration so we can be sure + // // that a channel isn't progressed every l1 block + env.WithBatcherMaxChannelDuration(10), ) require.NoError(t, err) @@ -240,25 +407,45 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // We create an intercept around the existing tx manager so we have // control over when our failures start to occur. - interceptTxManager := &TxManagerIntercept{ - TxManager: system.BatchSubmitter.TxManager, - } - system.BatchSubmitter.TxManager = interceptTxManager - system.BatchSubmitter.TestDriver().DriverSetup.Txmgr = interceptTxManager + // interceptTxManager := NewTxManagerIntercept( + // system.BatchSubmitter.TxManager, + // ) - l1Client := system.NodeClient(e2esys.RoleL1) + // { + // // We have to stop the Batch Submitter and restart it in order for the + // // intercept manager to be used in the queuing behavior + // err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) + // require.NoError(t, err) + + // // Replace the existing TxManager with our intercept + // system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager + + // // Start the Batcher again, so the publishingLoop picks up the TxManager + // // when creating its queue. + // err = system.BatchSubmitter.TestDriver().StartBatchSubmitting() + // require.NoError(t, err) + + // // Wait for the Next L2 Block to be verified by ensure everything is + // // working and progressing without issue + // err = wait.ForProcessingFullBatch(ctx, system.RollupClient(e2esys.RoleVerif)) + // require.NoError(t, err) + + // // Reset TxManager as we don't want to target or interfere with the + // // other aspects of the system. + // system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager.TxManager + // } defer env.Stop(t, system) defer env.Stop(t, espressoDevNode) // Send Transaction on L1, and wait for verification on the L2 Verifier - env.RunSimpleL1TransferAndVerifier(ctx, t, system) + // env.RunSimpleL1TransferAndVerifier(ctx, t, system) // Verify everything works env.RunSimpleL2Burn(ctx, t, system) // We want to trigger the failure mode now. - interceptTxManager.triggerAfterOne = true + // interceptTxManager.triggerAfterOne = true // Now we need to submit a multi-frame channel to L1 to trigger the // failure. We can do this by adjusting the batcher config to use a very @@ -266,62 +453,77 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // multiple frames. // We want enough L2 Transactions to ensure we have multiple frames. - n := 10 + const n = 10 - receipts := env.RunSimpleMultiTransactions(ctx, t, system, n) + // receipts := env.RunSimpleMultiTransactions(ctx, t, system, n, 0) // We want to wait until we know that the intercept tx manager has // trigger the failure mode successfully, and that all n transactions // have been attempted. - err = wait.For(ctx, 10*time.Second, func() (bool, error) { - return interceptTxManager.failureCount >= 1, nil - }) + // Wait until at least 2 L2 blocks have been mined (one for the + // a block with successful frames, and one for a block with failed frames). + l2Seq := system.NodeClient(e2esys.RoleSeq) + err = wait.ForNextBlock(ctx, l2Seq) + require.NoError(t, err) + err = wait.ForNextBlock(ctx, l2Seq) require.NoError(t, err) - if have, want := interceptTxManager.failureCount, 1; have < want { - t.Fatalf("tx submission failure not triggered enough times:\nhave:\n\t%d\nwant at least:\n\t%d", have, want) - } + // Make sure that at least one failure has occurred, as this should + // indicate that the submission process should have failed a multiframe + // channel submission. + // err = wait.For(ctx, 10*time.Second, func() (bool, error) { + // return interceptTxManager.failureCount >= 1, nil + // }) + // require.NoError(t, err) // Make sure that the verifier doesn't see any of the transactions. - l2Verif := system.NodeClient(e2esys.RoleVerif) + // l2Verif := system.NodeClient(e2esys.RoleVerif) - for _, receipt := range receipts { - _, err := l2Verif.TransactionReceipt(ctx, receipt.TxHash) - if have, doNotWant := err, error(nil); have == doNotWant { - t.Errorf("receipt for tx %s found on L2 Verifier when not expected:\nhave:\n\t%v\nwant:\n\t%v", receipt.TxHash, have, doNotWant) - } - } + // for _, receipt := range receipts { + // _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) + // // _, err := l2Verif.TransactionReceipt(ctx, receipt.TxHash) + // if have, doNotWant := err, error(nil); have == doNotWant { + // t.Errorf("receipt for tx %s found on L2 Verifier when not expected:\nhave:\n\t%v\nwant:\n\t%v", receipt.TxHash, have, doNotWant) + // } + // } - // Stop the "TEE" batcher - err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) - require.NoError(t, err) + // // Stop the "TEE" batcher + // err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) + // require.NoError(t, err) - // Switch active batcher - options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) - require.NoError(t, err) + // // Switch active batcher + // options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) + // require.NoError(t, err) - batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) - require.NoError(t, err) + // l1Client := system.NodeClient(e2esys.RoleL1) + // batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) + // require.NoError(t, err) - tx, err := batchAuthenticator.SwitchBatcher(options) - require.NoError(t, err) - _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) - require.NoError(t, err) + // tx, err := batchAuthenticator.SwitchBatcher(options) + // require.NoError(t, err) + // _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) + // require.NoError(t, err) - // Start the fallback batcher - err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() - require.NoError(t, err) + // // Start the fallback batcher + // err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() + // require.NoError(t, err) - // Everything should still work - env.RunSimpleL2Burn(ctx, t, system) + // // Everything should still work + // env.RunSimpleL2Burn(ctx, t, system) + + // There should be some failure recorded in the intercept tx manager that + // has a corresponding success in the intercept tx manager. + + // partialFrameData := interceptTxManager.partialFrameData() + // require.Greaterf(t, len(partialFrameData), 0, "expected to find at least one partially submitted frame") // Verify that our previous receipts were also recorded on L1 - for i, receipt := range receipts { - _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) - if have, want := err, error(nil); have != want { - t.Errorf("receipt %d for tx %s not found on L2 Verifier:\nhave:\n\t%v\nwant:\n\t%v", i, receipt.TxHash, have, want) - } - } + // for i, receipt := range receipts { + // _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) + // if have, want := err, error(nil); have != want { + // t.Errorf("receipt %d for tx %s not found on L2 Verifier:\nhave:\n\t%v\nwant:\n\t%v", i, receipt.TxHash, have, want) + // } + // } } diff --git a/espresso/environment/e2e_helpers.go b/espresso/environment/e2e_helpers.go index 4980828f487..76d6973d299 100644 --- a/espresso/environment/e2e_helpers.go +++ b/espresso/environment/e2e_helpers.go @@ -5,6 +5,7 @@ import ( "time" bss "github.com/ethereum-optimism/optimism/op-batcher/batcher" + "github.com/ethereum-optimism/optimism/op-batcher/flags" "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-e2e/system/helpers" "github.com/ethereum/go-ethereum/common" @@ -190,6 +191,15 @@ func WithBatcherMaxL1TxSize(maxL1TxSize uint64) E2eDevnetLauncherOption { Role: e2esys.RoleSeq, BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { batchConfig.MaxL1TxSize = maxL1TxSize + + if batchConfig.DataAvailabilityType == flags.BlobsType { + // If we're setting the max data size for blobs, + // we need to also inform the batcher to use that + // setting when calculating blob sizes. + // + // Otherwise it will use the max blob size constant. + batchConfig.TestUseMaxTxSizeForBlobs = true + } }, }, }, @@ -218,3 +228,62 @@ func WithBatcherMaxBlocksPerSpanBatch(maxBlocksPerSpanBatch int) E2eDevnetLaunch } } } + +// WithBatcherDataAvailabilityType is a E2eDevnetLauncherOption that configures +// the batcher's `DataAvailabilityType` option to the provided value. +// +// This governs which data availability method the batcher will use when +// submitting frames to L1. +func WithBatcherDataAvailabilityType(daAvailabilityType flags.DataAvailabilityType) E2eDevnetLauncherOption { + { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "dataAvailabilityType", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.DataAvailabilityType = daAvailabilityType + }, + }, + }, + } + } + } +} + +func WithBatcherMaxChannelDuration(maxChannelDuration uint64) E2eDevnetLauncherOption { + { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxChannelDuration", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.MaxChannelDuration = maxChannelDuration + }, + }, + }, + } + } + } +} + +func WithBatcherMaxFrameSize(maxFrameSize uint64) E2eDevnetLauncherOption { + { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "maxFrameSize", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.MaxChannelDuration = maxFrameSize + }, + }, + }, + } + } + } +} diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index a1103cb6f8c..af998676071 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -2,6 +2,7 @@ package environment import ( "context" + "crypto/rand" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum/go-ethereum/accounts/abi/bind" @@ -133,7 +134,7 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { // RunSimpleMultiTransactions sends numTransactions simple L2 transactions // from Bob's account with a bunch of random data applied to each transaction. -func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) []*types.Receipt { +func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions, dataSize int) []*types.Receipt { senderKey := system.Cfg.Secrets.Bob senderAddress := system.Cfg.Secrets.Addresses().Bob l2Seq := system.NodeClient(e2esys.RoleSeq) @@ -148,10 +149,9 @@ func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esy receipt := helpers.SendL2Tx(t, system.Cfg, l2Seq, senderKey, func(opts *helpers.TxOpts) { opts.Nonce = nonce + uint64(i) opts.Gas = 100_000 - - // opts.Data = make([]byte, 256) - // Fill with random data - // rand.Read(opts.Data) + opts.Data = make([]byte, dataSize) // add some data to the tx + // Fill will random data + rand.Read(opts.Data) }) ch <- receipt })(ch, i, nonce) From 45687362e63ff6cba85738f0e46d1378dbc55d1c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Jan 2026 08:57:36 -0700 Subject: [PATCH 05/18] Adjust settings to successfully trigger multi-frame channels After a mob programming session @Quentinl was able to help identify a a specific combiniation of parameters to successfully and consistently trigger multi-frames within the Batcher. This condition is a necessary precusor to the test being attempted. This commit updates the test with the information necessary to trigger this condition and sets the necessary test criteria that we are aiming to achieve. --- .../environment/14_batcher_fallback_test.go | 192 ++++++++++-------- espresso/environment/e2e_helpers.go | 27 +++ 2 files changed, 137 insertions(+), 82 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index e3c4c329160..971440656cb 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -15,6 +15,7 @@ import ( env "github.com/ethereum-optimism/optimism/espresso/environment" "github.com/ethereum-optimism/optimism/op-batcher/batcher" "github.com/ethereum-optimism/optimism/op-batcher/bindings" + "github.com/ethereum-optimism/optimism/op-batcher/compressor" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-node/config" @@ -199,6 +200,9 @@ func decodeFrameInformation(candidate txmgr.TxCandidate) ([]derive.Frame, error) return nil, fmt.Errorf("tx candidate has neither tx data nor blobs to decode frame information from") } +// decodeFrameInformationFromTxData takes a txmgr.TxCandidate and will assume +// that the frame data is encoded within the TxData. This data will be taken +// and decoded into frames and returned. func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Frame, error) { data := candidate.TxData @@ -229,6 +233,9 @@ func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Fra return frames, nil } +// decodeFrameInformationFromBlobs() takes a txmgr.TxCandidate and will assume +// that the frame data is encoded within the Blobs. The blobs will be +// converted back to txData, and the data will be decoded into frames. func decodeFrameInformationFromBlobs(candidate txmgr.TxCandidate) ([]derive.Frame, error) { var frames []derive.Frame for _, blob := range candidate.Blobs { @@ -374,32 +381,65 @@ var _ txmgr.TxManager = (*TxManagerIntercept)(nil) // switched to the fallback and the fallback batcher should continue // submitting the remaining frames of the channel without any issues. func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Minute*10) + ctx, cancel := context.WithTimeoutCause(context.Background(), time.Minute*10, fmt.Errorf("test did not complete within expected time allotment: %w", context.DeadlineExceeded)) defer cancel() launcher := new(env.EspressoDevNodeLauncherDocker) - // Values of 1200 frames and 100 bytes max tx size were chosen to match - // the overall default configuration as seen in batcher/service.go when - // the flag is set to Auto. + // In order to force a multi-frame channel with the e2e system setup, + // we need to multimately modify the channel config that will be utilized + // by the batcher. + // + // This may seem a bit convoluted, but we have to contend with a few + // different settings in order to ensure that the behavior we are + // targeting is achieved. + // + // All of the options given below are utilized with the specific purpose + // of triggering multi-frame channels. + // + // NOTE: Some of the configuration options pull double-duty. They are + // utilized in both the creation of the frames, and the sending of the + // frames. In both scenarios, they may behave differently. I will make + // an effor to note them where they occur. system, espressoDevNode, err := launcher.StartE2eDevnet( ctx, t, - // Use Blobs DA to ensure multi-frame channels are possible. - // env.WithBatcherDataAvailabilityType(flags.BlobsType), - // // We want a Max L1 Number of frames larger than 1 to ensure we can - // // trigger the multi-frame channel scenario. - // env.WithBatcherTargetNumFrames(20000), - - // We want a small Max L1 Tx Size to ensure that even a small L2 - // transaction will result in multiple L1 Transactions. - env.WithBatcherMaxL1TxSize(50), - - // // We want to increase the maximum channel duration so we can be sure - // // that a channel isn't progressed every l1 block - env.WithBatcherMaxChannelDuration(10), + // Explicitly disable using any sort of compression. This is + // necessary as we will be specifying that we will be targeting + // a specific frame size, and we don't want compression to indirectly + // interfere with this process. + env.WithBatcherCompressor(compressor.NoneKind), + + // This sets the Target Number of Frames that each channel is aiming + // to achieve. In this case we specify 3 so that we can ensure that + // our channel will always be a multi frame channel. + // + // Coupling this with the max channel duration helps us to ensure that + // each channel will always aim for the same number of channels. + // + // NOTE: This has different behavior when constructing the frames on + // the Batcher preparation than it does on the L1 submission. + // Specifically concerning the Da Type DaTypeCalldata. When utilizing + // call data, the L1 Submission will **ALWAYS** utilize 1 frame instead + // of this passed value. Yet channel construction will utilize this + // provided value as appropriate. + env.WithBatcherTargetNumFrames(3), + + // We set the MaxL1TxSize to some value that will hold our L2 + // Transaction size comfortably. + env.WithBatcherMaxL1TxSize(1200), + + // We set the MaxChannelDuration to 0 specifically to disable premature + // channel closing before we have enough frames. The default behavior + // is to create a new Channel with the specified window of L1 Blocks. + // The idea is that you can prevent eager channel production when + // you are not producing blocks with transactions. + // + // Setting this to 0 explicitly disables the feature, and as a result + // it will only send the data when the previous conditions are met. + env.WithBatcherMaxChannelDuration(0), ) require.NoError(t, err) @@ -407,33 +447,33 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // We create an intercept around the existing tx manager so we have // control over when our failures start to occur. - // interceptTxManager := NewTxManagerIntercept( - // system.BatchSubmitter.TxManager, - // ) + interceptTxManager := NewTxManagerIntercept( + system.BatchSubmitter.TxManager, + ) - // { - // // We have to stop the Batch Submitter and restart it in order for the - // // intercept manager to be used in the queuing behavior - // err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) - // require.NoError(t, err) + { + // We have to stop the Batch Submitter and restart it in order for the + // intercept manager to be used in the queuing behavior + err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) + require.NoError(t, err) - // // Replace the existing TxManager with our intercept - // system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager + // Replace the existing TxManager with our intercept + system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager - // // Start the Batcher again, so the publishingLoop picks up the TxManager - // // when creating its queue. - // err = system.BatchSubmitter.TestDriver().StartBatchSubmitting() - // require.NoError(t, err) + // Start the Batcher again, so the publishingLoop picks up the TxManager + // when creating its queue. + err = system.BatchSubmitter.TestDriver().StartBatchSubmitting() + require.NoError(t, err) - // // Wait for the Next L2 Block to be verified by ensure everything is - // // working and progressing without issue - // err = wait.ForProcessingFullBatch(ctx, system.RollupClient(e2esys.RoleVerif)) - // require.NoError(t, err) + // Wait for the Next L2 Block to be verified by ensure everything is + // working and progressing without issue + err = wait.ForProcessingFullBatch(ctx, system.RollupClient(e2esys.RoleVerif)) + require.NoError(t, err) - // // Reset TxManager as we don't want to target or interfere with the - // // other aspects of the system. - // system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager.TxManager - // } + // Reset TxManager as we don't want to target or interfere with the + // other aspects of the system. + system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager.TxManager + } defer env.Stop(t, system) defer env.Stop(t, espressoDevNode) @@ -445,7 +485,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { env.RunSimpleL2Burn(ctx, t, system) // We want to trigger the failure mode now. - // interceptTxManager.triggerAfterOne = true + interceptTxManager.triggerAfterOne = true // Now we need to submit a multi-frame channel to L1 to trigger the // failure. We can do this by adjusting the batcher config to use a very @@ -455,7 +495,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // We want enough L2 Transactions to ensure we have multiple frames. const n = 10 - // receipts := env.RunSimpleMultiTransactions(ctx, t, system, n, 0) + receipts := env.RunSimpleMultiTransactions(ctx, t, system, n, 0) // We want to wait until we know that the intercept tx manager has // trigger the failure mode successfully, and that all n transactions @@ -472,58 +512,46 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // Make sure that at least one failure has occurred, as this should // indicate that the submission process should have failed a multiframe // channel submission. - // err = wait.For(ctx, 10*time.Second, func() (bool, error) { - // return interceptTxManager.failureCount >= 1, nil - // }) - // require.NoError(t, err) - - // Make sure that the verifier doesn't see any of the transactions. - - // l2Verif := system.NodeClient(e2esys.RoleVerif) + err = wait.For(ctx, 10*time.Second, func() (bool, error) { + return interceptTxManager.failureCount >= 1, nil + }) + require.NoError(t, err) - // for _, receipt := range receipts { - // _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) - // // _, err := l2Verif.TransactionReceipt(ctx, receipt.TxHash) - // if have, doNotWant := err, error(nil); have == doNotWant { - // t.Errorf("receipt for tx %s found on L2 Verifier when not expected:\nhave:\n\t%v\nwant:\n\t%v", receipt.TxHash, have, doNotWant) - // } - // } + l2Verif := system.NodeClient(e2esys.RoleVerif) - // // Stop the "TEE" batcher - // err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) - // require.NoError(t, err) + // Stop the "TEE" batcher + err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) + require.NoError(t, err) - // // Switch active batcher - // options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) - // require.NoError(t, err) + // Switch active batcher + options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) + require.NoError(t, err) - // l1Client := system.NodeClient(e2esys.RoleL1) - // batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) - // require.NoError(t, err) + l1Client := system.NodeClient(e2esys.RoleL1) + batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) + require.NoError(t, err) - // tx, err := batchAuthenticator.SwitchBatcher(options) - // require.NoError(t, err) - // _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) - // require.NoError(t, err) + tx, err := batchAuthenticator.SwitchBatcher(options) + require.NoError(t, err) + _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) + require.NoError(t, err) // // Start the fallback batcher - // err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() - // require.NoError(t, err) - - // // Everything should still work - // env.RunSimpleL2Burn(ctx, t, system) + err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() + require.NoError(t, err) // There should be some failure recorded in the intercept tx manager that // has a corresponding success in the intercept tx manager. - // partialFrameData := interceptTxManager.partialFrameData() - // require.Greaterf(t, len(partialFrameData), 0, "expected to find at least one partially submitted frame") + partialFrameData := interceptTxManager.partialFrameData() + require.Greaterf(t, len(partialFrameData), 0, "expected to find at least one partially submitted frame") // Verify that our previous receipts were also recorded on L1 - // for i, receipt := range receipts { - // _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) - // if have, want := err, error(nil); have != want { - // t.Errorf("receipt %d for tx %s not found on L2 Verifier:\nhave:\n\t%v\nwant:\n\t%v", i, receipt.TxHash, have, want) - // } - // } + for i, receipt := range receipts { + _, err := wait.ForReceiptOK(ctx, l2Verif, receipt.TxHash) + require.NoError(t, err, "failed to find receipt %d for tx %s on L2 Verifier", i, receipt.TxHash) + } + + // Everything should still work + env.RunSimpleL2Burn(ctx, t, system) } diff --git a/espresso/environment/e2e_helpers.go b/espresso/environment/e2e_helpers.go index 76d6973d299..4eab27150b3 100644 --- a/espresso/environment/e2e_helpers.go +++ b/espresso/environment/e2e_helpers.go @@ -252,6 +252,9 @@ func WithBatcherDataAvailabilityType(daAvailabilityType flags.DataAvailabilityTy } } +// WithBatcherMaxChannelDuration is a configuration option that modifies the +// MaxChannelDuration for the Batcher Config. This value will then be +// utilized by the Channels created by the batcher. func WithBatcherMaxChannelDuration(maxChannelDuration uint64) E2eDevnetLauncherOption { { return func(c *E2eDevnetLauncherContext) E2eSystemOption { @@ -270,6 +273,9 @@ func WithBatcherMaxChannelDuration(maxChannelDuration uint64) E2eDevnetLauncherO } } +// WithBatcherMaxFrameSize is a configuration option that modifies the +// MaxChannelDuration for the Batcher Config. This value will then be +// utilized by the channels created by the batcher. func WithBatcherMaxFrameSize(maxFrameSize uint64) E2eDevnetLauncherOption { { return func(c *E2eDevnetLauncherContext) E2eSystemOption { @@ -287,3 +293,24 @@ func WithBatcherMaxFrameSize(maxFrameSize uint64) E2eDevnetLauncherOption { } } } + +// WithBatcherCompressor is a configuration option that modifies the Compressor +// setting of the Batcher Config. This value will be utilzied to determine +// compression options for the channels created by the batcher. +func WithBatcherCompressor(compressor string) E2eDevnetLauncherOption { + { + return func(c *E2eDevnetLauncherContext) E2eSystemOption { + return E2eSystemOption{ + StartOptions: []e2esys.StartOption{ + { + Key: "compressor", + Role: e2esys.RoleSeq, + BatcherMod: func(batchConfig *bss.CLIConfig, sys *e2esys.System) { + batchConfig.Compressor = compressor + }, + }, + }, + } + } + } +} From 5a45aede1fc0151c0a4eef329bdfbb17af9a4cee Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 2 Jan 2026 10:00:30 -0700 Subject: [PATCH 06/18] Perform some code cleanup This change does a few things: - Address linting issue causing CI failure - Adjusts some golang forloop usage to be more modern - Adjust function call signatures to remove unused variables --- .../environment/14_batcher_fallback_test.go | 15 +++++-------- espresso/environment/tx_helpers.go | 22 +++++++------------ 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 971440656cb..443eb8b3f30 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -272,16 +272,16 @@ func decodeFrameInformationFromBlobs(candidate txmgr.TxCandidate) ([]derive.Fram } func (t *TxManagerIntercept) markFramesAsSuccessful(frames []derive.Frame) { - t.Mutex.Lock() - defer t.Mutex.Unlock() + t.Lock() + defer t.Unlock() for _, frame := range frames { t.successfulFrames[frame.ID] = append(t.successfulFrames[frame.ID], frame) } } func (t *TxManagerIntercept) markFramesAsUnsuccessful(frames []derive.Frame) { - t.Mutex.Lock() - defer t.Mutex.Unlock() + t.Lock() + defer t.Unlock() for _, frame := range frames { t.unsuccessfulFrames[frame.ID] = append(t.successfulFrames[frame.ID], frame) } @@ -478,9 +478,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { defer env.Stop(t, system) defer env.Stop(t, espressoDevNode) - // Send Transaction on L1, and wait for verification on the L2 Verifier - // env.RunSimpleL1TransferAndVerifier(ctx, t, system) - // Verify everything works env.RunSimpleL2Burn(ctx, t, system) @@ -495,7 +492,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // We want enough L2 Transactions to ensure we have multiple frames. const n = 10 - receipts := env.RunSimpleMultiTransactions(ctx, t, system, n, 0) + receipts := env.RunSimpleMultiTransactions(ctx, t, system, n) // We want to wait until we know that the intercept tx manager has // trigger the failure mode successfully, and that all n transactions @@ -536,7 +533,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { _, err = wait.ForReceiptOK(ctx, l1Client, tx.Hash()) require.NoError(t, err) - // // Start the fallback batcher + // Start the fallback batcher err = system.FallbackBatchSubmitter.TestDriver().StartBatchSubmitting() require.NoError(t, err) diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index af998676071..2d8f14151e0 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -2,7 +2,9 @@ package environment import ( "context" - "crypto/rand" + "math/big" + "testing" + "time" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum/go-ethereum/accounts/abi/bind" @@ -10,10 +12,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/stretchr/testify/require" - "math/big" - "testing" - "time" - "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum-optimism/optimism/op-e2e/system/helpers" "github.com/ethereum/go-ethereum/ethclient" @@ -27,7 +25,8 @@ func RunSimpleL2Transfer( system *e2esys.System, nonce uint64, amount big.Int, - l2Seq *ethclient.Client) common.Hash { + l2Seq *ethclient.Client, +) common.Hash { _, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() @@ -48,7 +47,6 @@ func RunSimpleL2Transfer( txHash := receipt.TxHash return txHash - } // runSimpleL1TransferAndVerifier runs a simple L1 transfer and verifies it on @@ -134,7 +132,7 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { // RunSimpleMultiTransactions sends numTransactions simple L2 transactions // from Bob's account with a bunch of random data applied to each transaction. -func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions, dataSize int) []*types.Receipt { +func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) []*types.Receipt { senderKey := system.Cfg.Secrets.Bob senderAddress := system.Cfg.Secrets.Addresses().Bob l2Seq := system.NodeClient(e2esys.RoleSeq) @@ -144,21 +142,17 @@ func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esy } ch := make(chan *types.Receipt, numTransactions) - for i := 0; i < numTransactions; i++ { + for i := range numTransactions { go (func(ch chan *types.Receipt, i int, nonce uint64) { receipt := helpers.SendL2Tx(t, system.Cfg, l2Seq, senderKey, func(opts *helpers.TxOpts) { opts.Nonce = nonce + uint64(i) - opts.Gas = 100_000 - opts.Data = make([]byte, dataSize) // add some data to the tx - // Fill will random data - rand.Read(opts.Data) }) ch <- receipt })(ch, i, nonce) } var receipts []*types.Receipt - for i := 0; i < numTransactions; i++ { + for range numTransactions { receipt := <-ch receipts = append(receipts, receipt) } From 7da0f4e4239eb2faf6a3bc62de4ec06be11f777e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Mon, 5 Jan 2026 09:18:50 -0700 Subject: [PATCH 07/18] Fix bug tracking unsuccessful frames in test In the `TxManagerIntercept` there is a bug that appends the successful frames to the unsuccessful ones. While this bug isn't great in the information that it taints, it doesn't actually have the large of an impact on the test as a whole, as the resulting failure condition would be triggered regardless. This bug does affect the accurate tracking of failed frames which could be valuable information for inspection. --- espresso/environment/14_batcher_fallback_test.go | 2 +- espresso/environment/tx_helpers.go | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 443eb8b3f30..a0caa47dbfe 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -283,7 +283,7 @@ func (t *TxManagerIntercept) markFramesAsUnsuccessful(frames []derive.Frame) { t.Lock() defer t.Unlock() for _, frame := range frames { - t.unsuccessfulFrames[frame.ID] = append(t.successfulFrames[frame.ID], frame) + t.unsuccessfulFrames[frame.ID] = append(t.unsuccessfulFrames[frame.ID], frame) } } diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index 2d8f14151e0..77a499ff303 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -131,7 +131,14 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { } // RunSimpleMultiTransactions sends numTransactions simple L2 transactions -// from Bob's account with a bunch of random data applied to each transaction. +// from Bob's account and returns the receipts. +// +// This is all attempted in porallel, as it will spawn a separate goroutine +// for each transaction submission. Each transaction will be provided its +// own nonce, based on the currently understood value of the nonce for +// Bob. +// +// This will return once all receipts have been returned. func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) []*types.Receipt { senderKey := system.Cfg.Secrets.Bob senderAddress := system.Cfg.Secrets.Addresses().Bob From 0ebf7139288f900d1c48a42a7c06f661da9ac01e Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Tue, 6 Jan 2026 11:16:13 -0700 Subject: [PATCH 08/18] Update espresso/environment/e2e_helpers.go Co-authored-by: Phil --- espresso/environment/e2e_helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espresso/environment/e2e_helpers.go b/espresso/environment/e2e_helpers.go index 4eab27150b3..7989016c9e4 100644 --- a/espresso/environment/e2e_helpers.go +++ b/espresso/environment/e2e_helpers.go @@ -295,7 +295,7 @@ func WithBatcherMaxFrameSize(maxFrameSize uint64) E2eDevnetLauncherOption { } // WithBatcherCompressor is a configuration option that modifies the Compressor -// setting of the Batcher Config. This value will be utilzied to determine +// setting of the Batcher Config. This value will be utilized to determine // compression options for the channels created by the batcher. func WithBatcherCompressor(compressor string) E2eDevnetLauncherOption { { From 9b446affb26ed5762b4ef3d715dca1b1ef70762c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:28:28 -0700 Subject: [PATCH 09/18] Replace Disable Batcher setting references There are a number of places in our testing setup where we are explicitly preventing the Batcher from starting on launch. Instead of rewriting this same option every time we want to use it, we should reference a built in option that we can reference continually. This allows for non-repeated code and improved documentation as to the point and purpose of this option. --- .../environment/14_batcher_fallback_test.go | 7 ++---- .../5_batch_authentication_test.go | 5 +--- espresso/environment/6_batch_inbox_test.go | 9 +++---- espresso/environment/enclave_helpers.go | 19 +++++++++++--- .../optitmism_espresso_test_helpers.go | 25 +++++++++++++++++++ 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index a0caa47dbfe..93d862331d3 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -405,6 +405,8 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { system, espressoDevNode, err := launcher.StartE2eDevnet( ctx, t, + // Make Sure that ther Batcher does not start Running + env.WithBatcherStoppedInitially(), // Explicitly disable using any sort of compression. This is // necessary as we will be specifying that we will be targeting @@ -452,11 +454,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { ) { - // We have to stop the Batch Submitter and restart it in order for the - // intercept manager to be used in the queuing behavior - err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) - require.NoError(t, err) - // Replace the existing TxManager with our intercept system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager diff --git a/espresso/environment/5_batch_authentication_test.go b/espresso/environment/5_batch_authentication_test.go index f9a457fded2..b0bf92aaaee 100644 --- a/espresso/environment/5_batch_authentication_test.go +++ b/espresso/environment/5_batch_authentication_test.go @@ -9,7 +9,6 @@ import ( env "github.com/ethereum-optimism/optimism/espresso/environment" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/geth" - "github.com/ethereum-optimism/optimism/op-e2e/system/e2esys" "github.com/ethereum/go-ethereum/crypto" ) @@ -29,9 +28,7 @@ func TestE2eDevnetWithInvalidAttestation(t *testing.T) { system, _, err := launcher.StartE2eDevnet(ctx, t, env.SetBatcherKey(*privateKey), - env.Config(func(cfg *e2esys.SystemConfig) { - cfg.DisableBatcher = true - }), + env.WithBatcherStoppedInitially(), env.WithEspressoAttestationVerifierService(), ) diff --git a/espresso/environment/6_batch_inbox_test.go b/espresso/environment/6_batch_inbox_test.go index afe12971a9c..2cb623e2305 100644 --- a/espresso/environment/6_batch_inbox_test.go +++ b/espresso/environment/6_batch_inbox_test.go @@ -43,12 +43,9 @@ func TestE2eDevnetWithoutAuthenticatingBatches(t *testing.T) { launcher := new(env.EspressoDevNodeLauncherDocker) - system, _, err := - launcher.StartE2eDevnet(ctx, t, - env.Config(func(cfg *e2esys.SystemConfig) { - cfg.DisableBatcher = true - }), - ) + system, _, err := launcher.StartE2eDevnet(ctx, t, + env.WithBatcherStoppedInitially(), + ) if have, want := err, error(nil); have != want { t.Fatalf("failed to start dev environment with espresso dev node:\nhave:\n\t\"%v\"\nwant:\n\t\"%v\"\n", have, want) diff --git a/espresso/environment/enclave_helpers.go b/espresso/environment/enclave_helpers.go index 6b754a32ffa..d9af71511c9 100644 --- a/espresso/environment/enclave_helpers.go +++ b/espresso/environment/enclave_helpers.go @@ -85,13 +85,24 @@ func appendArg(args *[]string, flagName string, value any) { // an Enclave, as such, it is better to pre-configure the option instead of // allowing for the potential of an error to occur due to not including the // other Option. +// +// This LauncherOption explicitly creates a Batcher to run in the Enclave based +// on the configuration of the batcher that would be created and started +// locally. The locally created Batcher in the E2e System is never meant to +// actually run with this option, and instead the External Batcher is meant +// to be run instead. func LaunchBatcherInEnclave() E2eDevnetLauncherOption { return func(ct *E2eDevnetLauncherContext) E2eSystemOption { return E2eSystemOption{ - SystemConfigOption: func(cfg *e2esys.SystemConfig) { - cfg.DisableBatcher = true - }, - SystemConfigOpt: e2esys.WithAllocType(config.AllocTypeEspressoWithEnclave), + // | NOTE: while this option initially disables the batcher for + // the purposes of being started later, it is the intention of + // this Launchger Option to tie the Batcher as an external + // connection, rather than the local testing one. As a result + // The local Batcher should not be accessed / inspecting / + // interacted with for the purposes of any tests that are + // utilizing this Laucnher Option. + SystemConfigOption: SystemConfigOptionDisableBatcher, + SystemConfigOpt: e2esys.WithAllocType(config.AllocTypeEspressoWithEnclave), StartOptions: []e2esys.StartOption{ launchEspressoAttestationVerifierServiceDockerContainer(ct), { diff --git a/espresso/environment/optitmism_espresso_test_helpers.go b/espresso/environment/optitmism_espresso_test_helpers.go index 799ce6a5c53..cadb203c987 100644 --- a/espresso/environment/optitmism_espresso_test_helpers.go +++ b/espresso/environment/optitmism_espresso_test_helpers.go @@ -570,6 +570,22 @@ func SetEspressoUrls(numGood int, numBad int, badServerUrl string) E2eDevnetLaun } } +// SystemConfigOptionDisableBatcher is a SystemConfigOption that disables +// the Batcher. +// +// | NOTE: This doesn't actually stop the Batcher from being created entirely. +// +// Instead, it prevents the Batcher from "Starting". The Batcher still +// exists in the local context, it just won't be running initially. But +// it can still be started programatically via its API. This is most +// easily done by calling `StartBatchSubmitting` on the `TestDriver` of +// the system. +func SystemConfigOptionDisableBatcher(cfg *e2esys.SystemConfig) { + cfg.DisableBatcher = true +} + +// Config is a convenience function that allows for the initial modification +// of the SystemConfig only. func Config(fn func(*e2esys.SystemConfig)) E2eDevnetLauncherOption { return func(ct *E2eDevnetLauncherContext) E2eSystemOption { return E2eSystemOption{ @@ -578,6 +594,15 @@ func Config(fn func(*e2esys.SystemConfig)) E2eDevnetLauncherOption { } } +// WithBatcherStoppedInitially is an E2eDevNetLauncherOption that ensures that +// the locally created Batcher is not running initially. +// +// The Batcher can still be started locally with a call to the TestDriver's +// method: `StartBatchSubmitting`. +func WithBatcherStoppedInitially() E2eDevnetLauncherOption { + return Config(SystemConfigOptionDisableBatcher) +} + // getContainerRemappedHostPort is a helper function that takes the // containerListeningHostPort and returns the remapped host port // that the container is listening on. From 846b4b4a893fcd7ebbc7476df9ce7344017f3e1f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:31:26 -0700 Subject: [PATCH 10/18] Refactor custom wait in test There's a condition being waited on in the switch to fallback batcher test. This wait is useful, and can be reused between tests. But the wait itself is somewhat hiding it's intention by being inline defined within the test itself. We should pull this wait out so it can be easily used, and its intention / purpose can be more easily documented. --- .../environment/14_batcher_fallback_test.go | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 93d862331d3..71005f951e9 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -21,12 +21,39 @@ import ( "github.com/ethereum-optimism/optimism/op-node/config" "github.com/ethereum-optimism/optimism/op-node/rollup/derive" "github.com/ethereum-optimism/optimism/op-node/rollup/derive/params" + "github.com/ethereum-optimism/optimism/op-service/sources" "github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum/go-ethereum/accounts/abi/bind" "github.com/ethereum/go-ethereum/core/types" "github.com/stretchr/testify/require" ) +// waitForRollupToMovePastL1Block waits until the targeted rollup cli moves +// past the reference l1BlockNumber. This indicates that the rollupCli is +// still receiveing information that indicates that the L1 has progressed +// past the desired height. +// +// For convenience, this also returns the Local Safe L2 Height of the last +// call to the Sync Status on the Rollup Client. If this wait passes, it +// will be the LocalSafeL2 height of the SyncStatus that exceeded the +// referenced l1BlockNumber. +func waitForRollupToMovePastL1Block(ctx context.Context, rollupCli *sources.RollupClient, l1BlockNumber uint64) (uint64, error) { + timeoutCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + var localSafeL2Height uint64 + defer cancel() + err := wait.For(timeoutCtx, 100*time.Millisecond, func() (bool, error) { + status, err := rollupCli.SyncStatus(ctx) + if err != nil { + return false, err + } + + localSafeL2Height = status.LocalSafeL2.Number + return status.CurrentL1.Number > l1BlockNumber, nil + }) + + return localSafeL2Height, err +} + func TestBatcherSwitching(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -86,26 +113,8 @@ func TestBatcherSwitching(t *testing.T) { require.NoError(t, err) // Give things time to settle - var l2Height uint64 - - ticker := time.NewTicker(100 * time.Millisecond) - timeoutCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) - defer cancel() - -Loop: - for { - select { - case <-timeoutCtx.Done(): - panic("Timeout waiting for verifier derivation pipeline to advance past the fallback batcher switchoff point") - case <-ticker.C: - status, err := system.RollupClient(e2esys.RoleVerif).SyncStatus(ctx) - require.NoError(t, err) - if status.CurrentL1.Number > switchReceipt.BlockNumber.Uint64() { - l2Height = status.LocalSafeL2.Number - break Loop - } - } - } + l2Height, err := waitForRollupToMovePastL1Block(ctx, system.RollupClient(e2esys.RoleVerif), switchReceipt.BlockNumber.Uint64()) + require.NoError(t, err) espHeight, err := espClient.FetchLatestBlockHeight(ctx) require.NoError(t, err) From af14368ce23b623f9138c062a136941fc00c027a Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:33:29 -0700 Subject: [PATCH 11/18] Cleanup code reuse in frame decoding When decoding frame information for one of the Batcher fallback tests, there are similar code paths taken that result in most of the code being reused. We should clean up this code reuse so that we don't repeat ourselves in order to avoid diverging logic. Additionally, it allows us to reduce the amount of code needing to be maintained, and more clearly document the intention of the code, and the consistency with how we perform this frame decoding process. --- .../environment/14_batcher_fallback_test.go | 46 ++++++++----------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 71005f951e9..0eb42c2fd68 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -193,6 +193,8 @@ func NewTxManagerIntercept(base txmgr.TxManager) *TxManagerIntercept { // failure was intentional for testing purposes. var ErrSimulatedTxSubmissionFailure = errors.New("simulated tx submission failure") +// decodeFrameInformation takes a txmgr.TxCandidate and attempts to decode +// frames contained within either the Blob fields, or the TxData field. func decodeFrameInformation(candidate txmgr.TxCandidate) ([]derive.Frame, error) { if len(candidate.TxData) > 0 { // We have a CallData tx, so we can decode the frame information from @@ -209,12 +211,10 @@ func decodeFrameInformation(candidate txmgr.TxCandidate) ([]derive.Frame, error) return nil, fmt.Errorf("tx candidate has neither tx data nor blobs to decode frame information from") } -// decodeFrameInformationFromTxData takes a txmgr.TxCandidate and will assume -// that the frame data is encoded within the TxData. This data will be taken -// and decoded into frames and returned. -func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Frame, error) { - data := candidate.TxData - +// decodeFrameInformationFromData takes a byte slice and decodes each frame +// until it can no longer decode any frames. It returns a slice of all +// decoded frames, and any error encountered. +func decodeFrameInformationFromData(data []byte) ([]derive.Frame, error) { if data[0] != params.DerivationVersion0 { // Not a supported derivation version return nil, fmt.Errorf("unsupported derivation version: %d", data[0]) @@ -242,6 +242,15 @@ func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Fra return frames, nil } +// decodeFrameInformationFromTxData takes a txmgr.TxCandidate and will assume +// that the frame data is encoded within the TxData. This data will be taken +// and decoded into frames and returned. +func decodeFrameInformationFromTxData(candidate txmgr.TxCandidate) ([]derive.Frame, error) { + data := candidate.TxData + + return decodeFrameInformationFromData(data) +} + // decodeFrameInformationFromBlobs() takes a txmgr.TxCandidate and will assume // that the frame data is encoded within the Blobs. The blobs will be // converted back to txData, and the data will be decoded into frames. @@ -253,28 +262,11 @@ func decodeFrameInformationFromBlobs(candidate txmgr.TxCandidate) ([]derive.Fram return frames, fmt.Errorf("error converting blob to data: %w", err) } - if data[0] != params.DerivationVersion0 { - // Not a supported derivation version - return frames, fmt.Errorf("unsupported derivation version: %d", data[0]) - } - - reader := bytes.NewBuffer(data[1:]) - for { - var frame derive.Frame - err := frame.UnmarshalBinary(reader) - if errors.Is(err, io.EOF) { - // We've consumed all of the frames in this blob. - break - } - - // If this is any other error, it indicates that there was an - // error decoding the frame. - if err != nil { - return frames, fmt.Errorf("error decoding frame from blob: %w", err) - } - - frames = append(frames, frame) + newFrames, err := decodeFrameInformationFromData(data) + if err != nil { + return frames, err } + frames = append(frames, newFrames...) } return frames, nil From 19883874f72fefd510e4b25ed319e110a0b21a7c Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:35:22 -0700 Subject: [PATCH 12/18] Relocate deferred stop calls The Stop calls should occur as close to the launch of the environment as possible. As a result, any deferred calls to Stop for the system or the Espresso Dev Node should occur as close to their occurence as possible. --- espresso/environment/14_batcher_fallback_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 0eb42c2fd68..8fd56632c46 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -446,6 +446,8 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { ) require.NoError(t, err) + defer env.Stop(t, system) + defer env.Stop(t, espressoDevNode) // We create an intercept around the existing tx manager so we have // control over when our failures start to occur. @@ -473,8 +475,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager.TxManager } - defer env.Stop(t, system) - defer env.Stop(t, espressoDevNode) // Verify everything works env.RunSimpleL2Burn(ctx, t, system) From 1ff43e64152eb0bf01df692bc3719033d94c1bf4 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:38:44 -0700 Subject: [PATCH 13/18] Modify Initial L2Verif wait to be longer With the specific Frame and Channel settings being specified in the `TestFallbackMechanismIntegrationTestChannelNotClosed` test, the initial startup check for the L2 Verifier is failing. This is due to our settings requiring the Verifier process to take a bit longer than normal. In general, we want to give it more time, but the time frame for the failure is hard-coded in the `wait` function being utilized. While we **could** add a simple `time.Sleep`, and this would work, this is generally a bad appraoch as it just adds an unchecked delay. Instead, we opt to utilize a simple `retry` for up to `n` times. In this case, we only need to wait up to `3x` the normal time, so ensure that we perform at least `3` times. --- .../environment/14_batcher_fallback_test.go | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 8fd56632c46..7716536cde8 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -372,6 +372,20 @@ func (t *TxManagerIntercept) partialFrameData() []partialFrameData { // txmgr.TxManager. var _ txmgr.TxManager = (*TxManagerIntercept)(nil) +// retryWaitNTimes retries the given function up to n times until it +// succeeds. +func retryWaitNTimes(fn func() error, n int) error { + var lastErr error + for range n { + lastErr = fn() + if lastErr == nil { + break + } + } + + return lastErr +} + // TestFallbackMechanismIntegrationTestChannelNotClosed is a test case that is // meant to verify the correct expected behavior in the event that the Espresso // Batcher encounters an error mid L1 Batch submission that prevents the full @@ -475,6 +489,19 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { system.BatchSubmitter.TestDriver().Txmgr = interceptTxManager.TxManager } + l2Seq := system.NodeClient(e2esys.RoleSeq) + l1Client := system.NodeClient(e2esys.RoleL1) + l2Verif := system.NodeClient(e2esys.RoleVerif) + + // Let's make sure that the system is progressing initially for both + // the Sequencer, the Verifier, and the L1Node + // err = wait.ForNextBlock(ctx, l2Seq) + err = wait.ForBlock(ctx, l2Seq, 3) + require.NoError(t, err) + err = retryWaitNTimes(func() error { + return wait.ForNextBlock(ctx, l2Verif) + }, 3) + require.NoError(t, err) // Verify everything works env.RunSimpleL2Burn(ctx, t, system) @@ -498,7 +525,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // Wait until at least 2 L2 blocks have been mined (one for the // a block with successful frames, and one for a block with failed frames). - l2Seq := system.NodeClient(e2esys.RoleSeq) err = wait.ForNextBlock(ctx, l2Seq) require.NoError(t, err) err = wait.ForNextBlock(ctx, l2Seq) @@ -512,8 +538,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { }) require.NoError(t, err) - l2Verif := system.NodeClient(e2esys.RoleVerif) - // Stop the "TEE" batcher err = system.BatchSubmitter.TestDriver().StopBatchSubmitting(ctx) require.NoError(t, err) @@ -522,7 +546,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { options, err := bind.NewKeyedTransactorWithChainID(system.Config().Secrets.Deployer, system.Cfg.L1ChainIDBig()) require.NoError(t, err) - l1Client := system.NodeClient(e2esys.RoleL1) batchAuthenticator, err := bindings.NewBatchAuthenticator(system.RollupConfig.BatchAuthenticatorAddress, l1Client) require.NoError(t, err) From 939d00f50425ff5b1683c564953fc36edfe8fd75 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:43:28 -0700 Subject: [PATCH 14/18] Fix failure in Batcher Fallback test The TestFallbackMechanismIntegrationTestChannelNotClosed test fails locally without stopping, in spite of the overall time limit being specified on the test. After some troubleshooting and debugging, We were able to chase down the cause to be due to the `RunSimpleMultiTransactions`. It's unclear as to why this was causing the process to hang for as long as it was. It seemed to not be handling timeout errors well for some reason. Either way, we fority this helper by setting an explicit time limit on it, and referncing the context whenever we're performing channel operations. This should allow the channel operations themselves not to block and hang the test. After this modification we were able to determine that this process was failing due to insufficient gas being provided. For some reason when running the transactions through this mechanism, we require even more gas than we're normally need. This seems a bit odd, perhaps it has to do with the differences in the transaction construction. In any case, we up the gas being provided so that this becomes a non-issue. --- .../environment/14_batcher_fallback_test.go | 3 ++- espresso/environment/tx_helpers.go | 23 ++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 7716536cde8..e3e2c4b5cf0 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -517,7 +517,8 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // We want enough L2 Transactions to ensure we have multiple frames. const n = 10 - receipts := env.RunSimpleMultiTransactions(ctx, t, system, n) + receipts, err := env.RunSimpleMultiTransactions(ctx, t, system, n) + require.NoError(t, err) // We want to wait until we know that the intercept tx manager has // trigger the failure mode successfully, and that all n transactions diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index 77a499ff303..bbb2f36851a 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -2,6 +2,7 @@ package environment import ( "context" + "fmt" "math/big" "testing" "time" @@ -139,7 +140,10 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { // Bob. // // This will return once all receipts have been returned. -func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) []*types.Receipt { +func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esys.System, numTransactions int) ([]*types.Receipt, error) { + ctx, cancel := context.WithTimeoutCause(ctx, 2*time.Minute, fmt.Errorf("failed to submit all transactions within time frame: %w", context.DeadlineExceeded)) + defer cancel() + senderKey := system.Cfg.Secrets.Bob senderAddress := system.Cfg.Secrets.Addresses().Bob l2Seq := system.NodeClient(e2esys.RoleSeq) @@ -149,10 +153,15 @@ func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esy } ch := make(chan *types.Receipt, numTransactions) + defer close(ch) for i := range numTransactions { go (func(ch chan *types.Receipt, i int, nonce uint64) { receipt := helpers.SendL2Tx(t, system.Cfg, l2Seq, senderKey, func(opts *helpers.TxOpts) { opts.Nonce = nonce + uint64(i) + // We need to explicitly increase the gas beyond some threshold + // for an unknown reason. We'll set it high enough so that + // it hopefully won't cause a problem + opts.Gas = 100_000 }) ch <- receipt })(ch, i, nonce) @@ -160,10 +169,12 @@ func RunSimpleMultiTransactions(ctx context.Context, t *testing.T, system *e2esy var receipts []*types.Receipt for range numTransactions { - receipt := <-ch - receipts = append(receipts, receipt) + select { + case <-ctx.Done(): + return receipts, ctx.Err() + case receipt := <-ch: + receipts = append(receipts, receipt) + } } - close(ch) - - return receipts + return receipts, nil } From 2a23768a5d6a2629ae5b15b18b2705fd1cb65923 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 10:56:15 -0700 Subject: [PATCH 15/18] Fix linting issues --- espresso/environment/14_batcher_fallback_test.go | 2 +- espresso/environment/enclave_helpers.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index e3e2c4b5cf0..f8967f8cf87 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -420,7 +420,7 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { system, espressoDevNode, err := launcher.StartE2eDevnet( ctx, t, - // Make Sure that ther Batcher does not start Running + // Make Sure that the Batcher does not start Running env.WithBatcherStoppedInitially(), // Explicitly disable using any sort of compression. This is diff --git a/espresso/environment/enclave_helpers.go b/espresso/environment/enclave_helpers.go index d9af71511c9..e4a2d1b6087 100644 --- a/espresso/environment/enclave_helpers.go +++ b/espresso/environment/enclave_helpers.go @@ -100,7 +100,7 @@ func LaunchBatcherInEnclave() E2eDevnetLauncherOption { // connection, rather than the local testing one. As a result // The local Batcher should not be accessed / inspecting / // interacted with for the purposes of any tests that are - // utilizing this Laucnher Option. + // utilizing this Launcher Option. SystemConfigOption: SystemConfigOptionDisableBatcher, SystemConfigOpt: e2esys.WithAllocType(config.AllocTypeEspressoWithEnclave), StartOptions: []e2esys.StartOption{ From 000733b85ec6b89b8862918469faa0980e03920b Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 16:50:22 -0700 Subject: [PATCH 16/18] Update espresso/environment/tx_helpers.go Co-authored-by: Phil --- espresso/environment/tx_helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espresso/environment/tx_helpers.go b/espresso/environment/tx_helpers.go index bbb2f36851a..52c19f43e1b 100644 --- a/espresso/environment/tx_helpers.go +++ b/espresso/environment/tx_helpers.go @@ -134,7 +134,7 @@ func RunSimpleL2Burn(ctx context.Context, t *testing.T, system *e2esys.System) { // RunSimpleMultiTransactions sends numTransactions simple L2 transactions // from Bob's account and returns the receipts. // -// This is all attempted in porallel, as it will spawn a separate goroutine +// This is all attempted in parallel, as it will spawn a separate goroutine // for each transaction submission. Each transaction will be provided its // own nonce, based on the currently understood value of the nonce for // Bob. From a030ed3f96f0dfa42f778480d94da0a0a0eb260f Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Fri, 9 Jan 2026 16:52:07 -0700 Subject: [PATCH 17/18] Correct failure vs success in Send The triggered conditions for failures and successes are backwards in the `Send` method of `TxMangerIntercept`. Their specific frame markers should be switched. --- espresso/environment/14_batcher_fallback_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index f8967f8cf87..420fb3c6427 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -301,7 +301,7 @@ func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandida if t.shouldFail { t.failureCount++ - t.markFramesAsSuccessful(frames) + t.markFramesAsUnsuccessful(frames) time.Sleep(50 * time.Millisecond) // Simulate some delay return nil, ErrSimulatedTxSubmissionFailure } @@ -310,7 +310,7 @@ func (t *TxManagerIntercept) Send(ctx context.Context, candidate txmgr.TxCandida t.shouldFail = true } - t.markFramesAsUnsuccessful(frames) + t.markFramesAsSuccessful(frames) return t.TxManager.Send(ctx, candidate) } From 00db67a2000019feb2849a6b8c5dc1a03da55390 Mon Sep 17 00:00:00 2001 From: Theodore Schnepper Date: Sun, 11 Jan 2026 09:42:02 -0700 Subject: [PATCH 18/18] Update espresso/environment/14_batcher_fallback_test.go Co-authored-by: Phil --- espresso/environment/14_batcher_fallback_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/espresso/environment/14_batcher_fallback_test.go b/espresso/environment/14_batcher_fallback_test.go index 420fb3c6427..5cf2720c3c8 100644 --- a/espresso/environment/14_batcher_fallback_test.go +++ b/espresso/environment/14_batcher_fallback_test.go @@ -495,7 +495,6 @@ func TestFallbackMechanismIntegrationTestChannelNotClosed(t *testing.T) { // Let's make sure that the system is progressing initially for both // the Sequencer, the Verifier, and the L1Node - // err = wait.ForNextBlock(ctx, l2Seq) err = wait.ForBlock(ctx, l2Seq, 3) require.NoError(t, err) err = retryWaitNTimes(func() error {