diff --git a/pkg/server/admin_test.go b/pkg/server/admin_test.go index 76ac5ddad36f..effe83ef2939 100644 --- a/pkg/server/admin_test.go +++ b/pkg/server/admin_test.go @@ -2444,6 +2444,19 @@ func TestDecommissionEnqueueReplicas(t *testing.T) { // Ensure that the scratch range's replica was proactively enqueued. require.Equal(t, <-enqueuedRangeIDs, tc.LookupRangeOrFatal(t, scratchKey).RangeID) + + // Check that the node was marked as decommissioning in each of the nodes' + // decommissioningNodeMap. This needs to be wrapped in a SucceedsSoon to + // deal with gossip propagation delays. + testutils.SucceedsSoon(t, func() error { + for i := 0; i < tc.NumServers(); i++ { + srv := tc.Server(i) + if _, exists := srv.DecommissioningNodeMap()[decommissioningSrv.NodeID()]; !exists { + return errors.Newf("node %d not detected to be decommissioning", decommissioningSrv.NodeID()) + } + } + return nil + }) } decommissionAndCheck(2 /* decommissioningSrvIdx */) diff --git a/pkg/server/decommission.go b/pkg/server/decommission.go index 32e8aba1bc65..fdaf02ab3a75 100644 --- a/pkg/server/decommission.go +++ b/pkg/server/decommission.go @@ -54,6 +54,7 @@ func (t *decommissioningNodeMap) makeOnNodeDecommissioningCallback( // Nothing more to do. return } + t.nodes[decommissioningNodeID] = struct{}{} logLimiter := log.Every(5 * time.Second) // avoid log spam if err := stores.VisitStores(func(store *kvserver.Store) error { @@ -216,3 +217,15 @@ func (s *Server) Decommission( } return nil } + +// DecommissioningNodeMap returns the set of node IDs that are decommissioning +// from the perspective of the server. +func (s *Server) DecommissioningNodeMap() map[roachpb.NodeID]interface{} { + s.decomNodeMap.RLock() + defer s.decomNodeMap.RUnlock() + nodes := make(map[roachpb.NodeID]interface{}) + for key, val := range s.decomNodeMap.nodes { + nodes[key] = val + } + return nodes +} diff --git a/pkg/server/server.go b/pkg/server/server.go index 6a5a11d12668..e0d39a77d4b9 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -123,6 +123,7 @@ type Server struct { admin *adminServer status *statusServer drain *drainServer + decomNodeMap *decommissioningNodeMap authentication *authenticationServer migrationServer *migrationServer tsDB *ts.DB @@ -844,6 +845,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { admin: sAdmin, status: sStatus, drain: drain, + decomNodeMap: decomNodeMap, authentication: sAuth, tsDB: tsDB, tsServer: &sTS, diff --git a/pkg/testutils/serverutils/test_server_shim.go b/pkg/testutils/serverutils/test_server_shim.go index c40a86daacde..c2c8ea2dd22a 100644 --- a/pkg/testutils/serverutils/test_server_shim.go +++ b/pkg/testutils/serverutils/test_server_shim.go @@ -151,6 +151,10 @@ type TestServerInterface interface { // Decommission idempotently sets the decommissioning flag for specified nodes. Decommission(ctx context.Context, targetStatus livenesspb.MembershipStatus, nodeIDs []roachpb.NodeID) error + // DecommissioningNodeMap returns a map of nodeIDs that are known to the + // server to be decommissioning. + DecommissioningNodeMap() map[roachpb.NodeID]interface{} + // SplitRange splits the range containing splitKey. SplitRange(splitKey roachpb.Key) (left roachpb.RangeDescriptor, right roachpb.RangeDescriptor, err error)