diff --git a/tests/node/engine_test.go b/tests/node/engine_test.go index deab6d1e77..adaa8abd32 100644 --- a/tests/node/engine_test.go +++ b/tests/node/engine_test.go @@ -24,7 +24,7 @@ func TestEngine(gt *testing.T) { clRPC, err := GetNodeRPCEndpoint(t.Ctx(), &node) // See if the node supports the dev RPC. - if !supportsDevRPC(t, clName, clRPC) { + if !supportsDevRPC(clRPC) { t.Log("node does not support dev RPC, skipping engine test for", node.Escape().ID().Key()) continue } diff --git a/tests/node/mod.go b/tests/node/mod.go index 566420cf5a..bcd9097be4 100644 --- a/tests/node/mod.go +++ b/tests/node/mod.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "os/exec" "strings" "time" @@ -54,7 +55,7 @@ func websocketRPC(clRPC string) string { return strings.Replace(clRPC, "http", "ws", 1) } -func supportsDevRPC(t devtest.T, clName string, clRPC string) bool { +func supportsDevRPC(clRPC string) bool { // To see if the node supports the dev RPC, we try to send a request to the dev RPC to // get the last engine queue length. engineQueueLength := 0 @@ -123,6 +124,87 @@ func GetNodeRPCEndpoint(ctx context.Context, node *dsl.L2CLNode) (string, error) return rpcEndpoint(ctx, node.Escape().ID().Key()) } +// StopNode stops a specific l2cl node. +func StopNode(ctx context.Context, node *dsl.L2CLNode) error { + return StopKurtosisService(ctx, node.Escape().ID().Key()) +} + +// StopKurtosisService stops a specific service in a Kurtosis enclave +func StopKurtosisService(ctx context.Context, serviceName string) error { + kurtosisCtx, err := kurtosis_context.NewKurtosisContextFromLocalEngine() + if err != nil { + return fmt.Errorf("failed to create kurtosis context: %w", err) + } + + enclaves, err := kurtosisCtx.GetEnclaves(ctx) + if err != nil { + return err + } + + for enclave := range enclaves.GetEnclavesByName() { + enclaveCtx, err := kurtosisCtx.GetEnclaveContext(ctx, enclave) + if err != nil { + return fmt.Errorf("failed to get enclave context for %s: %w", enclave, err) + } + + // Check if the service exists in this enclave + _, err = enclaveCtx.GetServiceContext(serviceName) + if err != nil { + continue + } + + // Stop the service + if err := exec.Command("kurtosis", "service", "stop", + enclave, serviceName).Run(); err != nil { + return fmt.Errorf("failed to stop service %s in enclave %s: %w", serviceName, enclave, err) + } + + return nil + } + + return fmt.Errorf("service %s not found in any enclave", serviceName) +} + +// StartNode starts a specific l2cl node that was previously stopped +func StartNode(ctx context.Context, node *dsl.L2CLNode) error { + return StartKurtosisService(ctx, node.Escape().ID().Key()) +} + +// StartKurtosisService starts a specific service in a Kurtosis enclave +func StartKurtosisService(ctx context.Context, serviceName string) error { + kurtosisCtx, err := kurtosis_context.NewKurtosisContextFromLocalEngine() + if err != nil { + return fmt.Errorf("failed to create kurtosis context: %w", err) + } + + enclaves, err := kurtosisCtx.GetEnclaves(ctx) + if err != nil { + return err + } + + for enclave := range enclaves.GetEnclavesByName() { + enclaveCtx, err := kurtosisCtx.GetEnclaveContext(ctx, enclave) + if err != nil { + return fmt.Errorf("failed to get enclave context for %s: %w", enclave, err) + } + + // Check if the service exists in this enclave + _, err = enclaveCtx.GetServiceContext(serviceName) + if err != nil { + continue + } + + if err := exec.Command("kurtosis", "service", "start", + enclave, serviceName).Run(); err != nil { + return fmt.Errorf("failed to start service %s in enclave %s: %w", serviceName, enclave, err) + } + + return nil + } + + return fmt.Errorf("service %s not found in any enclave", serviceName) +} + func SendRPCRequest[T any](addr string, method string, resOutput *T, params ...any) error { // 1. Build the payload. s := rpcRequest{ @@ -255,7 +337,7 @@ outer_loop: return output } -func GetKonaWs[T any](t devtest.T, wsRPC string, method string, runUntil <-chan T) []eth.L2BlockRef { +func GetKonaWS[T any](t devtest.T, wsRPC string, method string, runUntil <-chan T) []eth.L2BlockRef { return GetPrefixedWs[T, eth.L2BlockRef](t, "ws", wsRPC, method, runUntil) } diff --git a/tests/node/restart_test.go b/tests/node/restart_test.go new file mode 100644 index 0000000000..b3f6417959 --- /dev/null +++ b/tests/node/restart_test.go @@ -0,0 +1,69 @@ +package node + +import ( + "sync" + "testing" + "time" + + "github.com/ethereum-optimism/optimism/op-devstack/devtest" + "github.com/ethereum-optimism/optimism/op-devstack/dsl" + "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/types" + "github.com/stretchr/testify/require" +) + +// Ensure that kona-nodes restart and sync properly when stopped for a while. +func TestRestartSync(gt *testing.T) { + t := devtest.SerialT(gt) + + out := NewMixedOpKona(t) + + nodes := out.L2CLNodes() + + t.Gate().Greater(len(nodes), 1, "expected at least two nodes") + + ref := nodes[0] + + var wg sync.WaitGroup + for _, node := range nodes { + if node == ref { + t.Logf("skipping reference node %s", node.Escape().ID().Key()) + continue + } + + t.Logf("testing restarts for node %s", node.Escape().ID().Key()) + wg.Add(1) + go func(node *dsl.L2CLNode) { + defer wg.Done() + clName := node.Escape().ID().Key() + + require.NoError(t, StopNode(t.Ctx(), node), "failed to stop node %s", clName) + t.Logf("stopped node %s", clName) + + // Wait for 2 minutes + time.Sleep(2 * time.Minute) + + require.NoError(t, StartNode(t.Ctx(), node), "failed to start node %s", clName) + t.Logf("restarted node %s", clName) + + // Check that the node is resyncing with the network + dsl.CheckAll(t, node.MatchedFn(&ref, types.LocalSafe, 50), node.MatchedFn(&ref, types.LocalUnsafe, 50)) + + // Check that the node is connected to the reference node + peers := node.Peers() + t.Require().Greater(len(peers.Peers), 0, "expected at least one peer") + + // Check that there is at least a peer with the same ID as the ref node + found := false + for _, peer := range peers.Peers { + if peer.PeerID == ref.PeerInfo().PeerID { + t.Logf("node %s is connected to reference node %s", clName, ref.Escape().ID().Key()) + found = true + break + } + } + + t.Require().True(found, "expected node %s to be connected to reference node %s", clName, ref.Escape().ID().Key()) + }(&node) + } + wg.Wait() +} diff --git a/tests/node/sync_ws_test.go b/tests/node/sync_ws_test.go index 0c7f2b3be9..094f32a776 100644 --- a/tests/node/sync_ws_test.go +++ b/tests/node/sync_ws_test.go @@ -44,9 +44,9 @@ func TestSyncUnsafeBecomesSafe(gt *testing.T) { wsRPC := websocketRPC(clRPC) t.Log("node supports ws endpoint, continuing sync test", clName, wsRPC) - unsafeBlocks := GetKonaWs(t, wsRPC, "unsafe_head", time.After(SECS_WAIT_FOR_UNSAFE_HEAD*time.Second)) + unsafeBlocks := GetKonaWS(t, wsRPC, "unsafe_head", time.After(SECS_WAIT_FOR_UNSAFE_HEAD*time.Second)) - safeBlocks := GetKonaWs(t, wsRPC, "safe_head", time.After(SECS_WAIT_FOR_SAFE_HEAD*time.Second)) + safeBlocks := GetKonaWS(t, wsRPC, "safe_head", time.After(SECS_WAIT_FOR_SAFE_HEAD*time.Second)) require.GreaterOrEqual(t, len(unsafeBlocks), 1, "we didn't receive enough unsafe gossip blocks!") require.GreaterOrEqual(t, len(safeBlocks), 1, "we didn't receive enough safe gossip blocks!") @@ -101,7 +101,7 @@ func TestSyncUnsafe(gt *testing.T) { wsRPC := websocketRPC(clRPC) t.Log("node supports ws endpoint, continuing sync test", clName, wsRPC) - output := GetKonaWs(t, wsRPC, "unsafe_head", time.After(10*time.Second)) + output := GetKonaWS(t, wsRPC, "unsafe_head", time.After(10*time.Second)) // For each block, we check that the block is actually in the chain of the other nodes. // That should always be the case unless there is a reorg or a long sync. @@ -160,7 +160,7 @@ func TestSyncSafe(gt *testing.T) { wsRPC := websocketRPC(clRPC) t.Log("node supports ws endpoint, continuing sync test", clName, wsRPC) - output := GetKonaWs(t, wsRPC, "safe_head", time.After(10*time.Second)) + output := GetKonaWS(t, wsRPC, "safe_head", time.After(10*time.Second)) // For each block, we check that the block is actually in the chain of the other nodes. // That should always be the case unless there is a reorg or a long sync. @@ -219,7 +219,7 @@ func TestSyncFinalized(gt *testing.T) { wsRPC := websocketRPC(clRPC) t.Log("node supports ws endpoint, continuing sync test", clName, wsRPC) - output := GetKonaWs(t, wsRPC, "finalized_head", time.After(4*time.Minute)) + output := GetKonaWS(t, wsRPC, "finalized_head", time.After(4*time.Minute)) // We should check that we received at least 2 finalized blocks within 4 minutes! require.Greater(t, len(output), 1, "we didn't receive enough finalized gossip blocks!")