From 817994ef0ca7931b8fbd5b22446bb3b98e1322bd Mon Sep 17 00:00:00 2001 From: Ralf Dittmer <10587619+Antisys@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:20:53 +0100 Subject: [PATCH 1/3] perms: fix build-tag-gated Config struct field references in mocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mock FetchConfig implementations were setting fields (Manager, ChainNotifier, Chain, Signer, FeeEstimator, Wallet, KeyRing, Sweeper) that only exist when the corresponding RPC build tags are active. Without those tags, the Config structs are empty, causing compilation failures on plain `go build ./...`. Remove the field initializers since CreateSubServer only needs a non-nil Config to extract permissions — the field values are unused. Co-Authored-By: Claude Opus 4.6 (1M context) --- perms/mock.go | 25 ++++--------------------- perms/mock_dev.go | 25 ++++--------------------- 2 files changed, 8 insertions(+), 42 deletions(-) diff --git a/perms/mock.go b/perms/mock.go index 43b82cb41..c07897c2d 100644 --- a/perms/mock.go +++ b/perms/mock.go @@ -5,8 +5,6 @@ package perms import ( "net" - "github.com/lightningnetwork/lnd/autopilot" - "github.com/lightningnetwork/lnd/chainreg" "github.com/lightningnetwork/lnd/lnrpc" "github.com/lightningnetwork/lnd/lnrpc/autopilotrpc" "github.com/lightningnetwork/lnd/lnrpc/chainrpc" @@ -19,9 +17,7 @@ import ( "github.com/lightningnetwork/lnd/lnrpc/walletrpc" "github.com/lightningnetwork/lnd/lnrpc/watchtowerrpc" "github.com/lightningnetwork/lnd/lnrpc/wtclientrpc" - "github.com/lightningnetwork/lnd/lntest/mock" "github.com/lightningnetwork/lnd/routing" - "github.com/lightningnetwork/lnd/sweep" ) // mockConfig implements lnrpc.SubServerConfigDispatcher. It provides the @@ -50,14 +46,9 @@ func (t *mockConfig) FetchConfig(subServerName string) (interface{}, bool) { }, }, true case "AutopilotRPC": - return &autopilotrpc.Config{ - Manager: &autopilot.Manager{}, - }, true + return &autopilotrpc.Config{}, true case "ChainRPC": - return &chainrpc.Config{ - ChainNotifier: &chainreg.NoChainBackend{}, - Chain: &mock.ChainIO{}, - }, true + return &chainrpc.Config{}, true case "DevRPC": return &devrpc.Config{}, true case "NeutrinoKitRPC": @@ -69,17 +60,9 @@ func (t *mockConfig) FetchConfig(subServerName string) (interface{}, bool) { Router: &routing.ChannelRouter{}, }, true case "SignRPC": - return &signrpc.Config{ - Signer: &mock.DummySigner{}, - }, true + return &signrpc.Config{}, true case "WalletKitRPC": - return &walletrpc.Config{ - FeeEstimator: &chainreg.NoChainBackend{}, - Wallet: &mock.WalletController{}, - KeyRing: &mock.SecretKeyRing{}, - Sweeper: &sweep.UtxoSweeper{}, - Chain: &mock.ChainIO{}, - }, true + return &walletrpc.Config{}, true case "WatchtowerRPC": return &watchtowerrpc.Config{}, true default: diff --git a/perms/mock_dev.go b/perms/mock_dev.go index f609a5e2f..74d3f4789 100644 --- a/perms/mock_dev.go +++ b/perms/mock_dev.go @@ -6,8 +6,6 @@ import ( "net" "github.com/btcsuite/btcd/chaincfg" - "github.com/lightningnetwork/lnd/autopilot" - "github.com/lightningnetwork/lnd/chainreg" graphdb "github.com/lightningnetwork/lnd/graph/db" "github.com/lightningnetwork/lnd/lnrpc" "github.com/lightningnetwork/lnd/lnrpc/autopilotrpc" @@ -21,9 +19,7 @@ import ( "github.com/lightningnetwork/lnd/lnrpc/walletrpc" "github.com/lightningnetwork/lnd/lnrpc/watchtowerrpc" "github.com/lightningnetwork/lnd/lnrpc/wtclientrpc" - "github.com/lightningnetwork/lnd/lntest/mock" "github.com/lightningnetwork/lnd/routing" - "github.com/lightningnetwork/lnd/sweep" ) // mockConfig implements lnrpc.SubServerConfigDispatcher. It provides the @@ -52,14 +48,9 @@ func (t *mockConfig) FetchConfig(subServerName string) (interface{}, bool) { }, }, true case "AutopilotRPC": - return &autopilotrpc.Config{ - Manager: &autopilot.Manager{}, - }, true + return &autopilotrpc.Config{}, true case "ChainRPC": - return &chainrpc.Config{ - ChainNotifier: &chainreg.NoChainBackend{}, - Chain: &mock.ChainIO{}, - }, true + return &chainrpc.Config{}, true case "DevRPC": return &devrpc.Config{ ActiveNetParams: &chaincfg.RegressionNetParams, @@ -74,17 +65,9 @@ func (t *mockConfig) FetchConfig(subServerName string) (interface{}, bool) { Router: &routing.ChannelRouter{}, }, true case "SignRPC": - return &signrpc.Config{ - Signer: &mock.DummySigner{}, - }, true + return &signrpc.Config{}, true case "WalletKitRPC": - return &walletrpc.Config{ - FeeEstimator: &chainreg.NoChainBackend{}, - Wallet: &mock.WalletController{}, - KeyRing: &mock.SecretKeyRing{}, - Sweeper: &sweep.UtxoSweeper{}, - Chain: &mock.ChainIO{}, - }, true + return &walletrpc.Config{}, true case "WatchtowerRPC": return &watchtowerrpc.Config{}, true default: From 72783b81b6fa7748fadbe61f549962d885f8bb42 Mon Sep 17 00:00:00 2001 From: Ralf Dittmer <10587619+Antisys@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:21:01 +0100 Subject: [PATCH 2/3] subservers+terminal: fail litd startup on critical sub-server error When taproot-assets-mode is not disabled, lnd calls into tapd at runtime. If tapd fails to start, litd previously continued in a broken state where those calls would silently fail. Mark tapd as a critical sub-server so that its startup failure is fatal to litd. The implementation uses a two-pass approach: all sub-servers are attempted regardless of individual failures, then an error is returned if any critical server failed. This avoids non-determinism from Go's random map iteration order. Fixes #1181. Co-Authored-By: Claude Opus 4.6 (1M context) --- subservers/manager.go | 55 ++++++++- subservers/manager_test.go | 239 +++++++++++++++++++++++++++++++++++++ terminal.go | 29 +++-- 3 files changed, 311 insertions(+), 12 deletions(-) create mode 100644 subservers/manager_test.go diff --git a/subservers/manager.go b/subservers/manager.go index 8d9f6b1b3..b34805886 100644 --- a/subservers/manager.go +++ b/subservers/manager.go @@ -36,7 +36,12 @@ type Manager struct { servers map[string]*subServerWrapper permsMgr *perms.Manager statusServer *status.Manager - mu sync.RWMutex + + // critical is a set of sub-server names whose startup failure should + // be treated as fatal for litd. + critical map[string]bool + + mu sync.RWMutex } // NewManager constructs a new Manager. @@ -47,6 +52,7 @@ func NewManager(permsMgr *perms.Manager, servers: make(map[string]*subServerWrapper), permsMgr: permsMgr, statusServer: statusServer, + critical: make(map[string]bool), } } @@ -103,14 +109,31 @@ func (s *Manager) GetServer(name string) (SubServer, bool) { return ss.SubServer, true } +// SetCritical marks the given sub-server as critical. If a critical sub-server +// fails to start, litd will treat the failure as fatal and will not continue +// starting up. This must be called before StartIntegratedServers or +// ConnectRemoteSubServers. +func (s *Manager) SetCritical(name string) { + s.mu.Lock() + defer s.mu.Unlock() + + s.critical[name] = true +} + // StartIntegratedServers starts all the manager's sub-servers that should be -// started in integrated mode. +// started in integrated mode. All sub-servers are attempted regardless of +// individual failures. After all attempts, if any sub-server that has been +// marked as critical via SetCritical failed to start, an error is returned. +// Non-critical sub-server failures are logged but do not prevent litd from +// continuing. func (s *Manager) StartIntegratedServers(lndClient lnrpc.LightningClient, - lndGrpc *lndclient.GrpcLndServices, withMacaroonService bool) { + lndGrpc *lndclient.GrpcLndServices, withMacaroonService bool) error { s.mu.Lock() defer s.mu.Unlock() + var criticalErr error + for _, ss := range s.servers { if ss.Remote() { continue @@ -126,19 +149,32 @@ func (s *Manager) StartIntegratedServers(lndClient lnrpc.LightningClient, ) if err != nil { s.statusServer.SetErrored(ss.Name(), err.Error()) + + if s.critical[ss.Name()] { + criticalErr = fmt.Errorf("critical "+ + "sub-server %s failed to "+ + "start: %w", ss.Name(), err) + } + continue } s.statusServer.SetRunning(ss.Name()) } + + return criticalErr } // ConnectRemoteSubServers creates connections to all the manager's sub-servers -// that are running remotely. -func (s *Manager) ConnectRemoteSubServers() { +// that are running remotely. All sub-servers are attempted regardless of +// individual failures. After all attempts, if any sub-server that has been +// marked as critical via SetCritical failed to connect, an error is returned. +func (s *Manager) ConnectRemoteSubServers() error { s.mu.Lock() defer s.mu.Unlock() + var criticalErr error + for _, ss := range s.servers { if !ss.Remote() { continue @@ -147,11 +183,20 @@ func (s *Manager) ConnectRemoteSubServers() { err := ss.connectRemote() if err != nil { s.statusServer.SetErrored(ss.Name(), err.Error()) + + if s.critical[ss.Name()] { + criticalErr = fmt.Errorf("critical "+ + "sub-server %s failed to "+ + "connect: %w", ss.Name(), err) + } + continue } s.statusServer.SetRunning(ss.Name()) } + + return criticalErr } // RegisterRPCServices registers all the manager's sub-servers with the given diff --git a/subservers/manager_test.go b/subservers/manager_test.go new file mode 100644 index 000000000..714c7ee11 --- /dev/null +++ b/subservers/manager_test.go @@ -0,0 +1,239 @@ +package subservers + +import ( + "context" + "errors" + "fmt" + "testing" + + restProxy "github.com/grpc-ecosystem/grpc-gateway/v2/runtime" + "github.com/lightninglabs/lightning-terminal/status" + "github.com/lightninglabs/lndclient" + "github.com/lightninglabs/taproot-assets/fn" + "github.com/lightningnetwork/lnd/lnrpc" + "google.golang.org/grpc" + "gopkg.in/macaroon-bakery.v2/bakery" +) + +// mockSubServer is a minimal implementation of the SubServer interface for +// testing. +type mockSubServer struct { + name string + remote bool + startErr error +} + +var _ SubServer = (*mockSubServer)(nil) + +func (m *mockSubServer) Name() string { return m.name } +func (m *mockSubServer) Remote() bool { return m.remote } +func (m *mockSubServer) MacPath() string { return "" } + +func (m *mockSubServer) RemoteConfig() *RemoteDaemonConfig { + return &RemoteDaemonConfig{} +} + +func (m *mockSubServer) Start(_ lnrpc.LightningClient, + _ *lndclient.GrpcLndServices, _ bool) error { + + return m.startErr +} + +func (m *mockSubServer) Stop() error { return nil } + +func (m *mockSubServer) RegisterGrpcService(_ grpc.ServiceRegistrar) {} + +func (m *mockSubServer) RegisterRestService(_ context.Context, + _ *restProxy.ServeMux, _ string, _ []grpc.DialOption) error { + + return nil +} + +func (m *mockSubServer) ServerErrChan() chan error { return nil } + +func (m *mockSubServer) Permissions() map[string][]bakery.Op { + return nil +} + +func (m *mockSubServer) WhiteListedURLs() map[string]struct{} { + return nil +} + +func (m *mockSubServer) Impl() fn.Option[any] { + return fn.None[any]() +} + +func (m *mockSubServer) ValidateMacaroon(_ context.Context, + _ []bakery.Op, _ string) error { + + return nil +} + +func newTestManager() *Manager { + return &Manager{ + servers: make(map[string]*subServerWrapper), + statusServer: status.NewStatusManager(), + critical: make(map[string]bool), + } +} + +// addMockServer registers a mock sub-server directly in the manager's internal +// map, bypassing AddServer (which requires a functional perms.Manager). The +// status manager is also updated to track the sub-server. +func addMockServer(t *testing.T, mgr *Manager, mock *mockSubServer) { + t.Helper() + + err := mgr.statusServer.RegisterSubServer(mock.Name()) + if err != nil { + t.Fatalf("failed to register sub-server %s with status "+ + "manager: %v", mock.Name(), err) + } + + mgr.statusServer.SetEnabled(mock.Name()) + + mgr.servers[mock.Name()] = &subServerWrapper{ + SubServer: mock, + quit: make(chan struct{}), + } +} + +// TestStartIntegratedServersCriticalFailure verifies that +// StartIntegratedServers returns an error when a critical sub-server fails to +// start, while still attempting to start all other sub-servers. +func TestStartIntegratedServersCriticalFailure(t *testing.T) { + mgr := newTestManager() + + failErr := errors.New("tapd init failed") + tapMock := &mockSubServer{name: TAP, startErr: failErr} + loopMock := &mockSubServer{name: LOOP} + faradayMock := &mockSubServer{name: FARADAY} + + addMockServer(t, mgr, tapMock) + addMockServer(t, mgr, loopMock) + addMockServer(t, mgr, faradayMock) + + mgr.SetCritical(TAP) + + err := mgr.StartIntegratedServers(nil, nil, false) + if err == nil { + t.Fatal("expected error from StartIntegratedServers when " + + "critical sub-server fails, got nil") + } + + if !errors.Is(err, failErr) { + t.Fatalf("expected wrapped error to contain %v, got: %v", + failErr, err) + } + + // Verify non-critical servers were still started despite the critical + // failure (map iteration order is random, so we check both). + for _, name := range []string{LOOP, FARADAY} { + ss, ok := mgr.servers[name] + if !ok { + t.Fatalf("server %s not found", name) + } + if !ss.started() { + t.Errorf("non-critical server %s should have been "+ + "started even though critical server failed", + name) + } + } +} + +// TestStartIntegratedServersNonCriticalFailure verifies that +// StartIntegratedServers does NOT return an error when a non-critical +// sub-server fails. +func TestStartIntegratedServersNonCriticalFailure(t *testing.T) { + mgr := newTestManager() + + loopMock := &mockSubServer{ + name: LOOP, + startErr: errors.New("loop init failed"), + } + tapMock := &mockSubServer{name: TAP} + + addMockServer(t, mgr, loopMock) + addMockServer(t, mgr, tapMock) + + mgr.SetCritical(TAP) + + err := mgr.StartIntegratedServers(nil, nil, false) + if err != nil { + t.Fatalf("non-critical failure should not cause error, "+ + "got: %v", err) + } +} + +// TestStartIntegratedServersNoCritical verifies the original behavior: when +// no servers are marked critical, all errors are non-fatal. +func TestStartIntegratedServersNoCritical(t *testing.T) { + mgr := newTestManager() + + tapMock := &mockSubServer{ + name: TAP, + startErr: errors.New("tapd init failed"), + } + addMockServer(t, mgr, tapMock) + + // No SetCritical call. + err := mgr.StartIntegratedServers(nil, nil, false) + if err != nil { + t.Fatalf("without critical servers, no error should be "+ + "returned, got: %v", err) + } +} + +// TestStartIntegratedServersCriticalDisabled verifies that marking a server +// as critical has no effect if the server was never added (i.e., disabled). +func TestStartIntegratedServersCriticalDisabled(t *testing.T) { + mgr := newTestManager() + + loopMock := &mockSubServer{name: LOOP} + addMockServer(t, mgr, loopMock) + + // Mark TAP as critical but don't add it — simulates + // taproot-assets-mode=disable where AddServer is called with + // enable=false. + mgr.SetCritical(TAP) + + err := mgr.StartIntegratedServers(nil, nil, false) + if err != nil { + t.Fatalf("critical server that is disabled should not "+ + "cause error, got: %v", err) + } +} + +// TestStatusSetOnCriticalFailure verifies that the status manager correctly +// records the error for a critical sub-server that fails. +func TestStatusSetOnCriticalFailure(t *testing.T) { + mgr := newTestManager() + + tapMock := &mockSubServer{ + name: TAP, + startErr: fmt.Errorf("database corrupt"), + } + addMockServer(t, mgr, tapMock) + mgr.SetCritical(TAP) + + _ = mgr.StartIntegratedServers(nil, nil, false) + + resp, err := mgr.statusServer.SubServerStatus( + context.Background(), nil, + ) + if err != nil { + t.Fatalf("failed to query status: %v", err) + } + + tapStatus, ok := resp.SubServers[TAP] + if !ok { + t.Fatal("TAP not found in status response") + } + + if tapStatus.Running { + t.Error("TAP should not be marked as running") + } + + if tapStatus.Error == "" { + t.Error("TAP should have an error message set") + } +} diff --git a/terminal.go b/terminal.go index 387ec3b12..024687f35 100644 --- a/terminal.go +++ b/terminal.go @@ -369,6 +369,14 @@ func (g *LightningTerminal) Run(ctx context.Context) error { return fmt.Errorf("could not initialise sub-servers: %w", err) } + // Since lnd may call into tapd when it is enabled, tapd must be + // started successfully before those calls are made. We therefore + // mark it as a critical sub-server so that litd will fail to start + // if tapd fails to start. + if g.cfg.TaprootAssetsMode != ModeDisable { + g.subServerMgr.SetCritical(subservers.TAP) + } + // Construct the rpcProxy. It must be initialised before the main web // server is started. g.rpcProxy = newRpcProxy( @@ -422,11 +430,11 @@ func (g *LightningTerminal) Run(ctx context.Context) error { return startErr } -// start attempts to start all the various components of Litd. Only Litd and -// LND errors are considered fatal and will result in an error being returned. -// If any of the sub-servers managed by the subServerMgr error while starting -// up, these are considered non-fatal and will not result in an error being -// returned. +// start attempts to start all the various components of Litd. Litd, LND, and +// critical sub-server errors are considered fatal and will result in an error +// being returned. If any non-critical sub-servers managed by the subServerMgr +// error while starting up, these are considered non-fatal and will not result +// in an error being returned. func (g *LightningTerminal) start(ctx context.Context) error { var err error @@ -626,7 +634,10 @@ func (g *LightningTerminal) start(ctx context.Context) error { // Initialise any connections to sub-servers that we are running in // remote mode. - g.subServerMgr.ConnectRemoteSubServers() + if err := g.subServerMgr.ConnectRemoteSubServers(); err != nil { + return fmt.Errorf("could not connect remote sub-servers: %w", + err) + } // bakeSuperMac is a closure that can be used to bake a new super // macaroon that contains all active permissions. @@ -769,9 +780,13 @@ func (g *LightningTerminal) start(ctx context.Context) error { // Both connection types are ready now, let's start our sub-servers if // they should be started locally as an integrated service. createDefaultMacaroons := !g.cfg.statelessInitMode - g.subServerMgr.StartIntegratedServers( + err = g.subServerMgr.StartIntegratedServers( g.basicClient, g.lndClient, createDefaultMacaroons, ) + if err != nil { + return fmt.Errorf("could not start integrated sub-servers: %w", + err) + } err = g.startInternalSubServers(ctx, !g.cfg.statelessInitMode) if err != nil { From fd3f0dd0a8437781dd1c3995ee756be01aa3819e Mon Sep 17 00:00:00 2001 From: Ralf Dittmer <10587619+Antisys@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:25:17 +0100 Subject: [PATCH 3/3] subservers: only capture first critical error for determinism If multiple critical sub-servers fail, the returned error should not depend on Go's random map iteration order. Capture only the first critical error encountered. Co-Authored-By: Claude Opus 4.6 (1M context) --- subservers/manager.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subservers/manager.go b/subservers/manager.go index b34805886..34c5ac89e 100644 --- a/subservers/manager.go +++ b/subservers/manager.go @@ -150,7 +150,7 @@ func (s *Manager) StartIntegratedServers(lndClient lnrpc.LightningClient, if err != nil { s.statusServer.SetErrored(ss.Name(), err.Error()) - if s.critical[ss.Name()] { + if s.critical[ss.Name()] && criticalErr == nil { criticalErr = fmt.Errorf("critical "+ "sub-server %s failed to "+ "start: %w", ss.Name(), err) @@ -184,7 +184,7 @@ func (s *Manager) ConnectRemoteSubServers() error { if err != nil { s.statusServer.SetErrored(ss.Name(), err.Error()) - if s.critical[ss.Name()] { + if s.critical[ss.Name()] && criticalErr == nil { criticalErr = fmt.Errorf("critical "+ "sub-server %s failed to "+ "connect: %w", ss.Name(), err)