-
Notifications
You must be signed in to change notification settings - Fork 121
subservers: fail LiT startup when integrated sub-server boot fails #1183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
7341bdb
19728cd
d787f25
66abec6
9884fdc
05b8da3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ import ( | |
| "github.com/lightninglabs/faraday/frdrpc" | ||
| "github.com/lightninglabs/lightning-node-connect/mailbox" | ||
| "github.com/lightninglabs/lightning-terminal/litrpc" | ||
| "github.com/lightninglabs/lightning-terminal/subservers" | ||
| "github.com/lightninglabs/loop/looprpc" | ||
| "github.com/lightninglabs/pool/poolrpc" | ||
| "github.com/lightninglabs/taproot-assets/taprpc" | ||
|
|
@@ -29,6 +30,7 @@ import ( | |
| "github.com/lightningnetwork/lnd/lnrpc/routerrpc" | ||
| "github.com/lightningnetwork/lnd/lnrpc/walletrpc" | ||
| "github.com/lightningnetwork/lnd/macaroons" | ||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| "golang.org/x/net/http2" | ||
| "google.golang.org/grpc" | ||
|
|
@@ -465,6 +467,170 @@ func testModeIntegrated(ctx context.Context, net *NetworkHarness, | |
| ) | ||
| } | ||
|
|
||
| // testCriticalTapStartupFailure ensures LiT exits quickly when a critical | ||
| // integrated sub-server (tapd) fails to start during boot. | ||
| func testCriticalTapStartupFailure(ctx context.Context, net *NetworkHarness, | ||
| t *harnessTest) { | ||
|
|
||
| // Force tapd to use a postgres backend with an invalid host to | ||
| // guarantee a startup failure in integrated mode. This config will | ||
| // error during tapd's startup (after wallet unlock) rather than during | ||
| // litd's config validation phase. | ||
| node, err := net.NewNode( | ||
| t.t, "FailFastTap", nil, false, false, | ||
| "--taproot-assets.databasebackend=postgres", | ||
| "--taproot-assets.postgres.host=tapd-postgres.invalid", | ||
| ) | ||
| require.NoError(t.t, err) | ||
|
|
||
| defer func() { | ||
| _ = net.ShutdownNode(node) | ||
| }() | ||
|
|
||
| select { | ||
| case procErr := <-net.ProcessErrors(): | ||
| require.ErrorContains(t.t, procErr, "tapd-postgres.invalid") | ||
| case <-time.After(15 * time.Second): | ||
| t.Fatalf("expected tapd startup failure to be reported") | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Depending on @ellemouton's response to my broader question in the main review comment, we likely want to expand the test code here, to ensure that the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue to track interceptor decoupling: #1201 |
||
| // LiT should terminate promptly after the critical startup failure. | ||
| select { | ||
| case <-node.processExit: | ||
| case <-time.After(5 * time.Second): | ||
| t.Fatalf("litd did not exit after tapd startup failure") | ||
| } | ||
| } | ||
|
|
||
| // testNonCriticalLoopStartupFailure ensures LiT continues running when a | ||
| // non-critical integrated sub-server (loopd) fails to start during boot. | ||
| func testNonCriticalLoopStartupFailure(ctx context.Context, | ||
| net *NetworkHarness, t *harnessTest) { | ||
|
|
||
| // Force loopd into an invalid config combination to guarantee a | ||
| // startup failure in integrated mode. | ||
| node, err := net.NewNode( | ||
| t.t, "NonCriticalLoop", nil, false, false, | ||
| "--loop.maxl402cost=1", | ||
| "--loop.maxlsatcost=1", | ||
| ) | ||
| require.NoError(t.t, err) | ||
|
|
||
| stopNodeGracefully := func() error { | ||
| conn, err := node.ConnectRPC(true) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| defer conn.Close() | ||
|
|
||
| stopCtx, cancel := context.WithTimeout(ctx, defaultTimeout) | ||
| defer cancel() | ||
|
|
||
| lndConn := lnrpc.NewLightningClient(conn) | ||
| _, err = lndConn.StopDaemon(stopCtx, &lnrpc.StopRequest{}) | ||
| return err | ||
| } | ||
|
|
||
| defer func() { | ||
| t.Logf("Shutting down node: %s", node.Cfg.LitDir) | ||
| if err := stopNodeGracefully(); err != nil { | ||
| t.Logf("Graceful shutdown failed, killing node: %v", | ||
| err) | ||
| _ = net.KillNode(node) | ||
| } | ||
| select { | ||
| case <-node.processExit: | ||
| case <-time.After(5 * time.Second): | ||
| _ = net.KillNode(node) | ||
| } | ||
| _ = node.cleanup() | ||
| }() | ||
|
|
||
| // Wait for the TLS cert so we can connect to the status service. | ||
| t.Logf("Waiting for lit TLS cert to be created: %s", | ||
| node.Cfg.LitTLSCertPath) | ||
| require.Eventually(t.t, func() bool { | ||
| _, err := os.Stat(node.Cfg.LitTLSCertPath) | ||
| return err == nil | ||
| }, 10*time.Second, 1*time.Second, | ||
| "expected lit TLS cert to be created", | ||
| ) | ||
|
|
||
| t.Logf("Connecting to litd service") | ||
| rawConn, err := connectLitRPC( | ||
| ctx, node.Cfg.LitAddr(), node.Cfg.LitTLSCertPath, "", | ||
| ) | ||
| require.NoError(t.t, err) | ||
| defer rawConn.Close() | ||
|
|
||
| statusConn := litrpc.NewStatusClient(rawConn) | ||
|
|
||
| fetchStatus := func() (*litrpc.SubServerStatusResp, error) { | ||
| callCtx, cancel := context.WithTimeout(ctx, 2*time.Second) | ||
| defer cancel() | ||
|
|
||
| return statusConn.SubServerStatus( | ||
| callCtx, &litrpc.SubServerStatusReq{}, | ||
| ) | ||
| } | ||
|
|
||
| waitForStatus := func(desc string, | ||
| predicate func(*litrpc.SubServerStatusResp) bool) { | ||
|
|
||
| t.t.Helper() | ||
|
|
||
| var lastErr error | ||
| var lastResp *litrpc.SubServerStatusResp | ||
|
|
||
| ok := assert.Eventually(t.t, func() bool { | ||
| lastResp, lastErr = fetchStatus() | ||
| if lastErr != nil || lastResp == nil { | ||
| return false | ||
| } | ||
|
|
||
| return predicate(lastResp) | ||
| }, 20*time.Second, 200*time.Millisecond) | ||
| if ok { | ||
| return | ||
| } | ||
|
|
||
| if lastErr != nil { | ||
| t.t.Fatalf("%s: last error: %v", desc, lastErr) | ||
| } | ||
|
|
||
| t.t.Fatalf("%s: last status: %#v", desc, lastResp.SubServers) | ||
| } | ||
|
|
||
| t.Logf("Waiting for loop to report non-critical loop startup failure") | ||
| waitForStatus("expected loop startup error to be reported", | ||
| func(resp *litrpc.SubServerStatusResp) bool { | ||
| status, ok := resp.SubServers[subservers.LOOP] | ||
| if !ok || status.Running || status.Error == "" { | ||
| return false | ||
| } | ||
|
|
||
| res := strings.Contains(status.Error, "maxl402cost") | ||
| return res | ||
| }, | ||
| ) | ||
|
|
||
| t.Logf("Waiting for lnd to report running state") | ||
| waitForStatus("expected lnd to be running", | ||
| func(resp *litrpc.SubServerStatusResp) bool { | ||
| status, ok := resp.SubServers[subservers.LND] | ||
| return ok && status.Running | ||
| }, | ||
| ) | ||
|
|
||
| t.Logf("Waiting for litd to report running state") | ||
| waitForStatus("expected litd to be running", | ||
| func(resp *litrpc.SubServerStatusResp) bool { | ||
| status, ok := resp.SubServers[subservers.LIT] | ||
| return ok && status.Running | ||
| }, | ||
| ) | ||
| } | ||
|
|
||
| // integratedTestSuite makes sure that in integrated mode all daemons work | ||
| // correctly. | ||
| func integratedTestSuite(ctx context.Context, net *NetworkHarness, t *testing.T, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.