diff --git a/internal/xds/bootstrap/bootstrap.go b/internal/xds/bootstrap/bootstrap.go index 142e803930e1..f409e4bd77b2 100644 --- a/internal/xds/bootstrap/bootstrap.go +++ b/internal/xds/bootstrap/bootstrap.go @@ -217,10 +217,16 @@ func (sc *ServerConfig) ServerFeaturesIgnoreResourceDeletion() bool { return false } +// SelectedCreds returns the selected credentials configuration for +// communicating with this server. +func (sc *ServerConfig) SelectedCreds() ChannelCreds { + return sc.selectedCreds +} + // DialOptions returns a slice of all the configured dial options for this -// server. +// server except grpc.WithCredentialsBundle(). func (sc *ServerConfig) DialOptions() []grpc.DialOption { - dopts := []grpc.DialOption{sc.credsDialOption} + var dopts []grpc.DialOption if sc.extraDialOptions != nil { dopts = append(dopts, sc.extraDialOptions...) } diff --git a/xds/internal/balancer/clusterimpl/balancer_test.go b/xds/internal/balancer/clusterimpl/balancer_test.go index 70c01d7b0b30..454ed9313c6f 100644 --- a/xds/internal/balancer/clusterimpl/balancer_test.go +++ b/xds/internal/balancer/clusterimpl/balancer_test.go @@ -23,12 +23,12 @@ import ( "encoding/json" "errors" "fmt" + "sort" "strings" + "sync" "testing" "time" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/base" "google.golang.org/grpc/balancer/roundrobin" @@ -43,11 +43,13 @@ import ( "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" xdsinternal "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/testutils/fakeclient" "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/load" v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" ) const ( @@ -63,11 +65,8 @@ const ( var ( testBackendEndpoints = []resolver.Endpoint{{Addresses: []resolver.Address{{Addr: "1.1.1.1:1"}}}} - cmpOpts = cmp.Options{ - cmpopts.EquateEmpty(), - cmpopts.IgnoreFields(load.Data{}, "ReportInterval"), - } - toleranceCmpOpt = cmpopts.EquateApprox(0, 1e-5) + cmpOpts = cmp.Options{cmpopts.EquateEmpty(), cmp.AllowUnexported(loadData{}, localityData{}, requestData{}, serverLoadData{}), sortDataSlice} + toleranceCmpOpt = cmp.Options{cmpopts.EquateApprox(0, 1e-5), cmp.AllowUnexported(loadData{}, localityData{}, requestData{}, serverLoadData{})} ) type s struct { @@ -78,10 +77,232 @@ func Test(t *testing.T) { grpctest.RunSubTests(t, s{}) } +// testLoadReporter records load data pertaining to a single cluster. +// +// It implements loadReporter interface for the picker. Tests can use it to +// override the loadStore in the picker to verify load reporting. +type testLoadReporter struct { + cluster, service string + + mu sync.Mutex + drops map[string]uint64 + localityRPCCount map[clients.Locality]*rpcCountData +} + +// CallStarted records a call started for the clients.Locality. +func (lr *testLoadReporter) CallStarted(locality clients.Locality) { + lr.mu.Lock() + defer lr.mu.Unlock() + if _, ok := lr.localityRPCCount[locality]; !ok { + lr.localityRPCCount[locality] = &rpcCountData{} + } + lr.localityRPCCount[locality].inProgress++ + lr.localityRPCCount[locality].issued++ +} + +// CallFinished records a call finished for the clients.Locality. +func (lr *testLoadReporter) CallFinished(locality clients.Locality, err error) { + lr.mu.Lock() + defer lr.mu.Unlock() + if lr.localityRPCCount == nil { + return + } + lrc := lr.localityRPCCount[locality] + lrc.inProgress-- + if err == nil { + lrc.succeeded++ + } else { + lrc.errored++ + } +} + +// CallServerLoad records a server load for the clients.Locality. +func (lr *testLoadReporter) CallServerLoad(locality clients.Locality, name string, val float64) { + lr.mu.Lock() + defer lr.mu.Unlock() + if lr.localityRPCCount == nil { + return + } + lrc, ok := lr.localityRPCCount[locality] + if !ok { + return + } + if lrc.serverLoads == nil { + lrc.serverLoads = make(map[string]*rpcLoadData) + } + if _, ok := lrc.serverLoads[name]; !ok { + lrc.serverLoads[name] = &rpcLoadData{} + } + rld := lrc.serverLoads[name] + rld.add(val) +} + +// CallDropped records a call dropped for the category. +func (lr *testLoadReporter) CallDropped(category string) { + lr.mu.Lock() + defer lr.mu.Unlock() + lr.drops[category]++ +} + +// stats returns and resets all loads reported for a cluster and service, +// except inProgress rpc counts. +// +// It returns nil if the store doesn't contain any (new) data. +func (lr *testLoadReporter) stats() *loadData { + lr.mu.Lock() + defer lr.mu.Unlock() + + sd := newLoadData(lr.cluster, lr.service) + for category, val := range lr.drops { + if val == 0 { + continue + } + if category != "" { + // Skip drops without category. They are counted in total_drops, but + // not in per category. One example is drops by circuit breaking. + sd.drops[category] = val + } + sd.totalDrops += val + lr.drops[category] = 0 // clear drops for next report + } + for locality, countData := range lr.localityRPCCount { + if countData.succeeded == 0 && countData.errored == 0 && countData.inProgress == 0 && countData.issued == 0 { + continue + } + + ld := localityData{ + requestStats: requestData{ + succeeded: countData.succeeded, + errored: countData.errored, + inProgress: countData.inProgress, + issued: countData.issued, + }, + loadStats: make(map[string]serverLoadData), + } + // clear localityRPCCount for next report + countData.succeeded = 0 + countData.errored = 0 + countData.inProgress = 0 + countData.issued = 0 + for key, rld := range countData.serverLoads { + s, c := rld.loadAndClear() // get and clear serverLoads for next report + if c == 0 { + continue + } + ld.loadStats[key] = serverLoadData{sum: s, count: c} + } + sd.localityStats[locality] = ld + } + if sd.totalDrops == 0 && len(sd.drops) == 0 && len(sd.localityStats) == 0 { + return nil + } + return sd +} + +// loadData contains all load data reported to the LoadStore since the most recent +// call to stats(). +type loadData struct { + // cluster is the name of the cluster this data is for. + cluster string + // service is the name of the EDS service this data is for. + service string + // totalDrops is the total number of dropped requests. + totalDrops uint64 + // drops is the number of dropped requests per category. + drops map[string]uint64 + // localityStats contains load reports per locality. + localityStats map[clients.Locality]localityData +} + +// localityData contains load data for a single locality. +type localityData struct { + // requestStats contains counts of requests made to the locality. + requestStats requestData + // loadStats contains server load data for requests made to the locality, + // indexed by the load type. + loadStats map[string]serverLoadData +} + +// requestData contains request counts. +type requestData struct { + // succeeded is the number of succeeded requests. + succeeded uint64 + // errored is the number of requests which ran into errors. + errored uint64 + // inProgress is the number of requests in flight. + inProgress uint64 + // issued is the total number requests that were sent. + issued uint64 +} + +// serverLoadData contains server load data. +type serverLoadData struct { + // count is the number of load reports. + count uint64 + // sum is the total value of all load reports. + sum float64 +} + +func newLoadData(cluster, service string) *loadData { + return &loadData{ + cluster: cluster, + service: service, + drops: make(map[string]uint64), + localityStats: make(map[clients.Locality]localityData), + } +} + +type rpcCountData struct { + succeeded uint64 + errored uint64 + inProgress uint64 + issued uint64 + serverLoads map[string]*rpcLoadData +} + +type rpcLoadData struct { + sum float64 + count uint64 +} + +func (rld *rpcLoadData) add(v float64) { + rld.sum += v + rld.count++ +} + +func (rld *rpcLoadData) loadAndClear() (s float64, c uint64) { + s, rld.sum = rld.sum, 0 + c, rld.count = rld.count, 0 + return s, c +} + func init() { NewRandomWRR = testutils.NewTestWRR } +var sortDataSlice = cmp.Transformer("SortDataSlice", func(in []*loadData) []*loadData { + out := append([]*loadData(nil), in...) // Copy input to avoid mutating it + sort.Slice(out, + func(i, j int) bool { + if out[i].cluster < out[j].cluster { + return true + } + if out[i].cluster == out[j].cluster { + return out[i].service < out[j].service + } + return false + }, + ) + return out +}) + +func verifyLoadStoreData(wantStoreData, gotStoreData *loadData) error { + if diff := cmp.Diff(wantStoreData, gotStoreData, cmpOpts); diff != "" { + return fmt.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) + } + return nil +} + // TestDropByCategory verifies that the balancer correctly drops the picks, and // that the drops are reported. func (s) TestDropByCategory(t *testing.T) { @@ -144,8 +365,16 @@ func (s) TestDropByCategory(t *testing.T) { sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) // Test pick with one backend. + testClusterLoadReporter := &testLoadReporter{cluster: testClusterName, service: testServiceName, drops: make(map[string]uint64), localityRPCCount: make(map[clients.Locality]*rpcCountData)} + const rpcCount = 24 if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error { + // Override the loadStore in the picker with testClusterLoadReporter. + picker := p.(*picker) + originalLoadStore := picker.loadStore + picker.loadStore = testClusterLoadReporter + defer func() { picker.loadStore = originalLoadStore }() + for i := 0; i < rpcCount; i++ { gotSCSt, err := p.Pick(balancer.PickInfo{}) // Even RPCs are dropped. @@ -174,28 +403,24 @@ func (s) TestDropByCategory(t *testing.T) { } // Dump load data from the store and compare with expected counts. - loadStore := xdsC.LoadStore() - if loadStore == nil { - t.Fatal("loadStore is nil in xdsClient") - } const dropCount = rpcCount * dropNumerator / dropDenominator - wantStatsData0 := []*load.Data{{ - Cluster: testClusterName, - Service: testServiceName, - TotalDrops: dropCount, - Drops: map[string]uint64{dropReason: dropCount}, - LocalityStats: map[string]load.LocalityData{ - xdsinternal.LocalityID{}.ToString(): {RequestStats: load.RequestData{ - Succeeded: (rpcCount - dropCount) * 3 / 4, - Errored: (rpcCount - dropCount) / 4, - Issued: rpcCount - dropCount, + wantStatsData0 := &loadData{ + cluster: testClusterName, + service: testServiceName, + totalDrops: dropCount, + drops: map[string]uint64{dropReason: dropCount}, + localityStats: map[clients.Locality]localityData{ + {}: {requestStats: requestData{ + succeeded: (rpcCount - dropCount) * 3 / 4, + errored: (rpcCount - dropCount) / 4, + issued: rpcCount - dropCount, }}, }, - }} + } - gotStatsData0 := loadStore.Stats([]string{testClusterName}) - if diff := cmp.Diff(gotStatsData0, wantStatsData0, cmpOpts); diff != "" { - t.Fatalf("got unexpected reports, diff (-got, +want): %v", diff) + gotStatsData0 := testClusterLoadReporter.stats() + if err := verifyLoadStoreData(wantStatsData0, gotStatsData0); err != nil { + t.Fatal(err) } // Send an update with new drop configs. @@ -223,6 +448,11 @@ func (s) TestDropByCategory(t *testing.T) { } if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error { + // Override the loadStore in the picker with testClusterLoadReporter. + picker := p.(*picker) + originalLoadStore := picker.loadStore + picker.loadStore = testClusterLoadReporter + defer func() { picker.loadStore = originalLoadStore }() for i := 0; i < rpcCount; i++ { gotSCSt, err := p.Pick(balancer.PickInfo{}) // Even RPCs are dropped. @@ -245,22 +475,22 @@ func (s) TestDropByCategory(t *testing.T) { } const dropCount2 = rpcCount * dropNumerator2 / dropDenominator2 - wantStatsData1 := []*load.Data{{ - Cluster: testClusterName, - Service: testServiceName, - TotalDrops: dropCount2, - Drops: map[string]uint64{dropReason2: dropCount2}, - LocalityStats: map[string]load.LocalityData{ - xdsinternal.LocalityID{}.ToString(): {RequestStats: load.RequestData{ - Succeeded: rpcCount - dropCount2, - Issued: rpcCount - dropCount2, + wantStatsData1 := &loadData{ + cluster: testClusterName, + service: testServiceName, + totalDrops: dropCount2, + drops: map[string]uint64{dropReason2: dropCount2}, + localityStats: map[clients.Locality]localityData{ + {}: {requestStats: requestData{ + succeeded: rpcCount - dropCount2, + issued: rpcCount - dropCount2, }}, }, - }} + } - gotStatsData1 := loadStore.Stats([]string{testClusterName}) - if diff := cmp.Diff(gotStatsData1, wantStatsData1, cmpOpts); diff != "" { - t.Fatalf("got unexpected reports, diff (-got, +want): %v", diff) + gotStatsData1 := testClusterLoadReporter.stats() + if err := verifyLoadStoreData(wantStatsData1, gotStatsData1); err != nil { + t.Fatal(err) } } @@ -318,9 +548,16 @@ func (s) TestDropCircuitBreaking(t *testing.T) { sc1.UpdateState(balancer.SubConnState{ConnectivityState: connectivity.Ready}) // Test pick with one backend. + testClusterLoadReporter := &testLoadReporter{cluster: testClusterName, service: testServiceName, drops: make(map[string]uint64), localityRPCCount: make(map[clients.Locality]*rpcCountData)} const rpcCount = 100 if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error { dones := []func(){} + // Override the loadStore in the picker with testClusterLoadReporter. + picker := p.(*picker) + originalLoadStore := picker.loadStore + picker.loadStore = testClusterLoadReporter + defer func() { picker.loadStore = originalLoadStore }() + for i := 0; i < rpcCount; i++ { gotSCSt, err := p.Pick(balancer.PickInfo{}) if i < 50 && err != nil { @@ -363,27 +600,22 @@ func (s) TestDropCircuitBreaking(t *testing.T) { } // Dump load data from the store and compare with expected counts. - loadStore := xdsC.LoadStore() - if loadStore == nil { - t.Fatal("loadStore is nil in xdsClient") - } - - wantStatsData0 := []*load.Data{{ - Cluster: testClusterName, - Service: testServiceName, - TotalDrops: uint64(maxRequest), - LocalityStats: map[string]load.LocalityData{ - xdsinternal.LocalityID{}.ToString(): {RequestStats: load.RequestData{ - Succeeded: uint64(rpcCount - maxRequest), - Errored: 50, - Issued: uint64(rpcCount - maxRequest + 50), + wantStatsData0 := &loadData{ + cluster: testClusterName, + service: testServiceName, + totalDrops: uint64(maxRequest), + localityStats: map[clients.Locality]localityData{ + {}: {requestStats: requestData{ + succeeded: uint64(rpcCount - maxRequest), + errored: 50, + issued: uint64(rpcCount - maxRequest + 50), }}, }, - }} + } - gotStatsData0 := loadStore.Stats([]string{testClusterName}) - if diff := cmp.Diff(gotStatsData0, wantStatsData0, cmpOpts); diff != "" { - t.Fatalf("got unexpected drop reports, diff (-got, +want): %v", diff) + gotStatsData0 := testClusterLoadReporter.stats() + if err := verifyLoadStoreData(wantStatsData0, gotStatsData0); err != nil { + t.Fatal(err) } } @@ -605,7 +837,7 @@ func (s) TestReResolution(t *testing.T) { } func (s) TestLoadReporting(t *testing.T) { - var testLocality = xdsinternal.LocalityID{ + var testLocality = clients.Locality{ Region: "test-region", Zone: "test-zone", SubZone: "test-sub-zone", @@ -670,9 +902,15 @@ func (s) TestLoadReporting(t *testing.T) { sca(&scs, endpoints[0].Addresses[0]) sc1.UpdateState(scs) // Test pick with one backend. + testClusterLoadReporter := &testLoadReporter{cluster: testClusterName, service: testServiceName, drops: make(map[string]uint64), localityRPCCount: make(map[clients.Locality]*rpcCountData)} const successCount = 5 const errorCount = 5 if err := cc.WaitForPicker(ctx, func(p balancer.Picker) error { + // Override the loadStore in the picker with testClusterLoadReporter. + picker := p.(*picker) + originalLoadStore := picker.loadStore + picker.loadStore = testClusterLoadReporter + defer func() { picker.loadStore = originalLoadStore }() for i := 0; i < successCount; i++ { gotSCSt, err := p.Pick(balancer.PickInfo{}) if gotSCSt.SubConn != sc1 { @@ -696,38 +934,32 @@ func (s) TestLoadReporting(t *testing.T) { } // Dump load data from the store and compare with expected counts. - loadStore := xdsC.LoadStore() - if loadStore == nil { - t.Fatal("loadStore is nil in xdsClient") - } - sds := loadStore.Stats([]string{testClusterName}) - if len(sds) == 0 { + sd := testClusterLoadReporter.stats() + if sd == nil { t.Fatalf("loads for cluster %v not found in store", testClusterName) } - sd := sds[0] - if sd.Cluster != testClusterName || sd.Service != testServiceName { - t.Fatalf("got unexpected load for %q, %q, want %q, %q", sd.Cluster, sd.Service, testClusterName, testServiceName) + if sd.cluster != testClusterName || sd.service != testServiceName { + t.Fatalf("got unexpected load for %q, %q, want %q, %q", sd.cluster, sd.service, testClusterName, testServiceName) } - testLocalityStr := testLocality.ToString() - localityData, ok := sd.LocalityStats[testLocalityStr] + localityData, ok := sd.localityStats[testLocality] if !ok { t.Fatalf("loads for %v not found in store", testLocality) } - reqStats := localityData.RequestStats - if reqStats.Succeeded != successCount { - t.Errorf("got succeeded %v, want %v", reqStats.Succeeded, successCount) + reqStats := localityData.requestStats + if reqStats.succeeded != successCount { + t.Errorf("got succeeded %v, want %v", reqStats.succeeded, successCount) } - if reqStats.Errored != errorCount { - t.Errorf("got errord %v, want %v", reqStats.Errored, errorCount) + if reqStats.errored != errorCount { + t.Errorf("got errord %v, want %v", reqStats.errored, errorCount) } - if reqStats.InProgress != 0 { - t.Errorf("got inProgress %v, want %v", reqStats.InProgress, 0) + if reqStats.inProgress != 0 { + t.Errorf("got inProgress %v, want %v", reqStats.inProgress, 0) } - wantLoadStats := map[string]load.ServerLoadData{ - testNamedMetricsKey1: {Count: 5, Sum: 15.7}, // aggregation of 5 * 3.14 = 15.7 - testNamedMetricsKey2: {Count: 5, Sum: 13.59}, // aggregation of 5 * 2.718 = 13.59 + wantLoadStats := map[string]serverLoadData{ + testNamedMetricsKey1: {count: 5, sum: 15.7}, // aggregation of 5 * 3.14 = 15.7 + testNamedMetricsKey2: {count: 5, sum: 13.59}, // aggregation of 5 * 2.718 = 13.59 } - if diff := cmp.Diff(wantLoadStats, localityData.LoadStats, toleranceCmpOpt); diff != "" { + if diff := cmp.Diff(wantLoadStats, localityData.loadStats, toleranceCmpOpt); diff != "" { t.Errorf("localityData.LoadStats returned unexpected diff (-want +got):\n%s", diff) } b.Close() @@ -741,7 +973,7 @@ func (s) TestLoadReporting(t *testing.T) { // - config modifies LRS server to a different string // - config sets LRS server to nil to stop load reporting func (s) TestUpdateLRSServer(t *testing.T) { - var testLocality = xdsinternal.LocalityID{ + var testLocality = clients.Locality{ Region: "test-region", Zone: "test-zone", SubZone: "test-sub-zone", diff --git a/xds/internal/balancer/clusterimpl/clusterimpl.go b/xds/internal/balancer/clusterimpl/clusterimpl.go index 71a4c9c9da73..096b738b0d3e 100644 --- a/xds/internal/balancer/clusterimpl/clusterimpl.go +++ b/xds/internal/balancer/clusterimpl/clusterimpl.go @@ -24,10 +24,12 @@ package clusterimpl import ( + "context" "encoding/json" "fmt" "sync" "sync/atomic" + "time" "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" @@ -41,14 +43,16 @@ import ( "google.golang.org/grpc/serviceconfig" xdsinternal "google.golang.org/grpc/xds/internal" "google.golang.org/grpc/xds/internal/balancer/loadstore" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/lrsclient" "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/load" ) const ( // Name is the name of the cluster_impl balancer. Name = "xds_cluster_impl_experimental" defaultRequestCountMax = 1024 + loadStoreStopTimeout = 1 * time.Second ) var ( @@ -90,13 +94,15 @@ type clusterImplBalancer struct { // The following fields are set at creation time, and are read-only after // that, and therefore need not be protected by a mutex. - logger *grpclog.PrefixLogger + logger *grpclog.PrefixLogger + // TODO: #8366 - Refactor usage of loadWrapper to easily plugin a test + // load reporter from tests. loadWrapper *loadstore.Wrapper // The following fields are only accessed from balancer API methods, which // are guaranteed to be called serially by gRPC. xdsClient xdsclient.XDSClient // Sent down in ResolverState attributes. - cancelLoadReport func() // To stop reporting load through the above xDS client. + cancelLoadReport func(context.Context) // To stop reporting load through the above xDS client. edsServiceName string // EDS service name to report load for. lrsServer *bootstrap.ServerConfig // Load reporting server configuration. dropCategories []DropConfig // The categories for drops. @@ -218,7 +224,9 @@ func (b *clusterImplBalancer) updateLoadStore(newConfig *LBConfig) error { if stopOldLoadReport { if b.cancelLoadReport != nil { - b.cancelLoadReport() + stopCtx, stopCancel := context.WithTimeout(context.Background(), loadStoreStopTimeout) + defer stopCancel() + b.cancelLoadReport(stopCtx) b.cancelLoadReport = nil if !startNewLoadReport { // If a new LRS stream will be started later, no need to update @@ -228,7 +236,7 @@ func (b *clusterImplBalancer) updateLoadStore(newConfig *LBConfig) error { } } if startNewLoadReport { - var loadStore *load.Store + var loadStore *lrsclient.LoadStore if b.xdsClient != nil { loadStore, b.cancelLoadReport = b.xdsClient.ReportLoad(b.lrsServer) } @@ -344,7 +352,9 @@ func (b *clusterImplBalancer) Close() { b.childState = balancer.State{} if b.cancelLoadReport != nil { - b.cancelLoadReport() + stopCtx, stopCancel := context.WithTimeout(context.Background(), loadStoreStopTimeout) + defer stopCancel() + b.cancelLoadReport(stopCtx) b.cancelLoadReport = nil } b.logger.Infof("Shutdown") @@ -406,16 +416,19 @@ type scWrapper struct { balancer.SubConn // locality needs to be atomic because it can be updated while being read by // the picker. - locality atomic.Value // type xdsinternal.LocalityID + locality atomic.Pointer[clients.Locality] } -func (scw *scWrapper) updateLocalityID(lID xdsinternal.LocalityID) { - scw.locality.Store(lID) +func (scw *scWrapper) updateLocalityID(lID clients.Locality) { + scw.locality.Store(&lID) } -func (scw *scWrapper) localityID() xdsinternal.LocalityID { - lID, _ := scw.locality.Load().(xdsinternal.LocalityID) - return lID +func (scw *scWrapper) localityID() clients.Locality { + lID := scw.locality.Load() + if lID == nil { + return clients.Locality{} + } + return *lID } func (b *clusterImplBalancer) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { @@ -436,7 +449,7 @@ func (b *clusterImplBalancer) NewSubConn(addrs []resolver.Address, opts balancer // address's locality. https://github.com/grpc/grpc-go/issues/7339 addr := connectedAddress(state) lID := xdsinternal.GetLocalityID(addr) - if lID.Empty() { + if (lID == clients.Locality{}) { if b.logger.V(2) { b.logger.Infof("Locality ID for %s unexpectedly empty", addr) } @@ -459,7 +472,7 @@ func (b *clusterImplBalancer) RemoveSubConn(sc balancer.SubConn) { func (b *clusterImplBalancer) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { clusterName := b.getClusterName() newAddrs := make([]resolver.Address, len(addrs)) - var lID xdsinternal.LocalityID + var lID clients.Locality for i, addr := range addrs { newAddrs[i] = xds.SetXDSHandshakeClusterName(addr, clusterName) lID = xdsinternal.GetLocalityID(newAddrs[i]) diff --git a/xds/internal/balancer/clusterimpl/picker.go b/xds/internal/balancer/clusterimpl/picker.go index 018122f2c68c..9ed16ffbe467 100644 --- a/xds/internal/balancer/clusterimpl/picker.go +++ b/xds/internal/balancer/clusterimpl/picker.go @@ -28,6 +28,8 @@ import ( "google.golang.org/grpc/internal/stats" "google.golang.org/grpc/internal/wrr" "google.golang.org/grpc/status" + "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient" ) @@ -71,10 +73,10 @@ func (d *dropper) drop() (ret bool) { // loadReporter wraps the methods from the loadStore that are used here. type loadReporter interface { - CallStarted(locality string) - CallFinished(locality string, err error) - CallServerLoad(locality, name string, val float64) - CallDropped(locality string) + CallStarted(locality clients.Locality) + CallFinished(locality clients.Locality, err error) + CallServerLoad(locality clients.Locality, name string, val float64) + CallDropped(category string) } // Picker implements RPC drop, circuit breaking drop and load reporting. @@ -133,7 +135,7 @@ func (d *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { } } - var lIDStr string + var lID clients.Locality pr, err := d.s.Picker.Pick(info) if scw, ok := pr.SubConn.(*scWrapper); ok { // This OK check also covers the case err!=nil, because SubConn will be @@ -141,7 +143,7 @@ func (d *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { pr.SubConn = scw.SubConn // If locality ID isn't found in the wrapper, an empty locality ID will // be used. - lIDStr = scw.localityID().ToString() + lID = scw.localityID() } if err != nil { @@ -153,24 +155,25 @@ func (d *picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) { } if labels := telemetryLabels(info.Ctx); labels != nil { - labels["grpc.lb.locality"] = lIDStr + labels["grpc.lb.locality"] = internal.LocalityString(lID) } if d.loadStore != nil { - d.loadStore.CallStarted(lIDStr) + locality := clients.Locality{Region: lID.Region, Zone: lID.Zone, SubZone: lID.SubZone} + d.loadStore.CallStarted(locality) oldDone := pr.Done pr.Done = func(info balancer.DoneInfo) { if oldDone != nil { oldDone(info) } - d.loadStore.CallFinished(lIDStr, info.Err) + d.loadStore.CallFinished(locality, info.Err) load, ok := info.ServerLoad.(*v3orcapb.OrcaLoadReport) if !ok || load == nil { return } for n, c := range load.NamedMetrics { - d.loadStore.CallServerLoad(lIDStr, n, c) + d.loadStore.CallServerLoad(locality, n, c) } } } diff --git a/xds/internal/balancer/clusterresolver/configbuilder.go b/xds/internal/balancer/clusterresolver/configbuilder.go index 72a023646a0d..9c7fed8629ba 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder.go +++ b/xds/internal/balancer/clusterresolver/configbuilder.go @@ -257,7 +257,7 @@ func priorityLocalitiesToClusterImpl(localities []xdsresource.Locality, priority if locality.Weight != 0 { lw = locality.Weight } - localityStr := locality.ID.ToString() + localityStr := internal.LocalityString(locality.ID) for _, endpoint := range locality.Endpoints { // Filter out all "unhealthy" endpoints (unknown and healthy are // both considered to be healthy: diff --git a/xds/internal/balancer/clusterresolver/configbuilder_childname.go b/xds/internal/balancer/clusterresolver/configbuilder_childname.go index 119f4c474752..bf4e33496ec7 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder_childname.go +++ b/xds/internal/balancer/clusterresolver/configbuilder_childname.go @@ -20,7 +20,7 @@ package clusterresolver import ( "fmt" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -31,7 +31,7 @@ import ( // struct keeps state between generate() calls, and a later generate() might // return names returned by the previous call. type nameGenerator struct { - existingNames map[internal.LocalityID]string + existingNames map[clients.Locality]string prefix uint64 nextID uint64 } @@ -55,7 +55,7 @@ func newNameGenerator(prefix uint64) *nameGenerator { func (ng *nameGenerator) generate(priorities [][]xdsresource.Locality) []string { var ret []string usedNames := make(map[string]bool) - newNames := make(map[internal.LocalityID]string) + newNames := make(map[clients.Locality]string) for _, priority := range priorities { var nameFound string for _, locality := range priority { diff --git a/xds/internal/balancer/clusterresolver/configbuilder_childname_test.go b/xds/internal/balancer/clusterresolver/configbuilder_childname_test.go index 36106b4ad3a0..f056b6bdaa6c 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder_childname_test.go +++ b/xds/internal/balancer/clusterresolver/configbuilder_childname_test.go @@ -21,7 +21,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -38,8 +38,8 @@ func Test_nameGenerator_generate(t *testing.T) { prefix: 3, input1: nil, input2: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, }, want: []string{"priority-3-0", "priority-3-1"}, }, @@ -47,11 +47,11 @@ func Test_nameGenerator_generate(t *testing.T) { name: "one new priority", prefix: 1, input1: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, }, input2: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, }, want: []string{"priority-1-0", "priority-1-1"}, }, @@ -59,40 +59,40 @@ func Test_nameGenerator_generate(t *testing.T) { name: "merge two priorities", prefix: 4, input1: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L1"}}}, - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L2"}}}, }, input2: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}, {ID: internal.LocalityID{Zone: "L1"}}}, - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L0"}}, {ID: clients.Locality{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L2"}}}, }, want: []string{"priority-4-0", "priority-4-2"}, }, { name: "swap two priorities", input1: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L1"}}}, - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L2"}}}, }, input2: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L1"}}}, - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L2"}}}, }, want: []string{"priority-0-1", "priority-0-0", "priority-0-2"}, }, { name: "split priority", input1: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}, {ID: internal.LocalityID{Zone: "L1"}}}, - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L0"}}, {ID: clients.Locality{Zone: "L1"}}}, + {{ID: clients.Locality{Zone: "L2"}}}, }, input2: [][]xdsresource.Locality{ - {{ID: internal.LocalityID{Zone: "L0"}}}, - {{ID: internal.LocalityID{Zone: "L1"}}}, // This gets a newly generated name, since "0-0" was already picked. - {{ID: internal.LocalityID{Zone: "L2"}}}, + {{ID: clients.Locality{Zone: "L0"}}}, + {{ID: clients.Locality{Zone: "L1"}}}, // This gets a newly generated name, since "0-0" was already picked. + {{ID: clients.Locality{Zone: "L2"}}}, }, want: []string{"priority-0-0", "priority-0-2", "priority-0-1"}, }, diff --git a/xds/internal/balancer/clusterresolver/configbuilder_test.go b/xds/internal/balancer/clusterresolver/configbuilder_test.go index a1d5bc8533ff..656596f95bbc 100644 --- a/xds/internal/balancer/clusterresolver/configbuilder_test.go +++ b/xds/internal/balancer/clusterresolver/configbuilder_test.go @@ -42,6 +42,7 @@ import ( "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/priority" "google.golang.org/grpc/xds/internal/balancer/wrrlocality" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -57,7 +58,7 @@ const ( ) var ( - testLocalityIDs []internal.LocalityID + testLocalityIDs []clients.Locality testResolverEndpoints [][]resolver.Endpoint testEndpoints [][]xdsresource.Endpoint @@ -84,7 +85,7 @@ var ( func init() { for i := 0; i < localityCount; i++ { - testLocalityIDs = append(testLocalityIDs, internal.LocalityID{Zone: fmt.Sprintf("test-zone-%d", i)}) + testLocalityIDs = append(testLocalityIDs, clients.Locality{Zone: fmt.Sprintf("test-zone-%d", i)}) var ( endpoints []resolver.Endpoint ends []xdsresource.Endpoint @@ -549,7 +550,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { {Addresses: []string{"addr-1-1"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, {Addresses: []string{"addr-1-2"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, }, - ID: internal.LocalityID{Zone: "test-zone-1"}, + ID: clients.Locality{Zone: "test-zone-1"}, Weight: 20, }, { @@ -557,7 +558,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { {Addresses: []string{"addr-2-1"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, {Addresses: []string{"addr-2-2"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, }, - ID: internal.LocalityID{Zone: "test-zone-2"}, + ID: clients.Locality{Zone: "test-zone-2"}, Weight: 80, }, }, @@ -575,10 +576,10 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { ChildPolicy: &iserviceconfig.BalancerConfig{Name: roundrobin.Name}, }, wantEndpoints: []resolver.Endpoint{ - testEndpointWithAttrs([]string{"addr-1-1"}, 20, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testEndpointWithAttrs([]string{"addr-1-2"}, 20, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testEndpointWithAttrs([]string{"addr-2-1"}, 80, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testEndpointWithAttrs([]string{"addr-2-2"}, 80, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testEndpointWithAttrs([]string{"addr-1-1"}, 20, 90, "test-priority", &clients.Locality{Zone: "test-zone-1"}), + testEndpointWithAttrs([]string{"addr-1-2"}, 20, 10, "test-priority", &clients.Locality{Zone: "test-zone-1"}), + testEndpointWithAttrs([]string{"addr-2-1"}, 80, 90, "test-priority", &clients.Locality{Zone: "test-zone-2"}), + testEndpointWithAttrs([]string{"addr-2-2"}, 80, 10, "test-priority", &clients.Locality{Zone: "test-zone-2"}), }, }, { @@ -589,7 +590,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { {Addresses: []string{"addr-1-1"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, {Addresses: []string{"addr-1-2"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, }, - ID: internal.LocalityID{Zone: "test-zone-1"}, + ID: clients.Locality{Zone: "test-zone-1"}, Weight: 20, }, { @@ -597,7 +598,7 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { {Addresses: []string{"addr-2-1"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 90}, {Addresses: []string{"addr-2-2"}, HealthStatus: xdsresource.EndpointHealthStatusHealthy, Weight: 10}, }, - ID: internal.LocalityID{Zone: "test-zone-2"}, + ID: clients.Locality{Zone: "test-zone-2"}, Weight: 80, }, }, @@ -611,10 +612,10 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { }, }, wantEndpoints: []resolver.Endpoint{ - testEndpointWithAttrs([]string{"addr-1-1"}, 20, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testEndpointWithAttrs([]string{"addr-1-2"}, 20, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-1"}), - testEndpointWithAttrs([]string{"addr-2-1"}, 80, 90, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), - testEndpointWithAttrs([]string{"addr-2-2"}, 80, 10, "test-priority", &internal.LocalityID{Zone: "test-zone-2"}), + testEndpointWithAttrs([]string{"addr-1-1"}, 20, 90, "test-priority", &clients.Locality{Zone: "test-zone-1"}), + testEndpointWithAttrs([]string{"addr-1-2"}, 20, 10, "test-priority", &clients.Locality{Zone: "test-zone-1"}), + testEndpointWithAttrs([]string{"addr-2-1"}, 80, 90, "test-priority", &clients.Locality{Zone: "test-zone-2"}), + testEndpointWithAttrs([]string{"addr-2-2"}, 80, 10, "test-priority", &clients.Locality{Zone: "test-zone-2"}), }, }, } @@ -634,14 +635,14 @@ func TestPriorityLocalitiesToClusterImpl(t *testing.T) { } } -func testEndpointWithAttrs(addrStrs []string, localityWeight, endpointWeight uint32, priority string, lID *internal.LocalityID) resolver.Endpoint { +func testEndpointWithAttrs(addrStrs []string, localityWeight, endpointWeight uint32, priority string, lID *clients.Locality) resolver.Endpoint { endpoint := resolver.Endpoint{} for _, a := range addrStrs { endpoint.Addresses = append(endpoint.Addresses, resolver.Address{Addr: a}) } path := []string{priority} if lID != nil { - path = append(path, lID.ToString()) + path = append(path, internal.LocalityString(*lID)) endpoint = internal.SetLocalityIDInEndpoint(endpoint, *lID) } endpoint = hierarchy.SetInEndpoint(endpoint, path) diff --git a/xds/internal/balancer/loadstore/load_store_wrapper.go b/xds/internal/balancer/loadstore/load_store_wrapper.go index f5605df83276..48a2b7d1ed49 100644 --- a/xds/internal/balancer/loadstore/load_store_wrapper.go +++ b/xds/internal/balancer/loadstore/load_store_wrapper.go @@ -22,7 +22,8 @@ package loadstore import ( "sync" - "google.golang.org/grpc/xds/internal/xdsclient/load" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/lrsclient" ) // NewWrapper creates a Wrapper. @@ -53,8 +54,8 @@ type Wrapper struct { // store and perCluster are initialized as nil. They are only set by the // balancer when LRS is enabled. Before that, all functions to record loads // are no-op. - store *load.Store - perCluster load.PerClusterReporter + store *lrsclient.LoadStore + perCluster *lrsclient.PerClusterReporter } // UpdateClusterAndService updates the cluster name and eds service for this @@ -68,23 +69,30 @@ func (lsw *Wrapper) UpdateClusterAndService(cluster, edsService string) { } lsw.cluster = cluster lsw.edsService = edsService - lsw.perCluster = lsw.store.PerCluster(lsw.cluster, lsw.edsService) + if lsw.store == nil { + return + } + lsw.perCluster = lsw.store.ReporterForCluster(lsw.cluster, lsw.edsService) } // UpdateLoadStore updates the load store for this wrapper. If it is changed // from before, the perCluster store in this wrapper will also be updated. -func (lsw *Wrapper) UpdateLoadStore(store *load.Store) { +func (lsw *Wrapper) UpdateLoadStore(store *lrsclient.LoadStore) { lsw.mu.Lock() defer lsw.mu.Unlock() if store == lsw.store { return } lsw.store = store - lsw.perCluster = lsw.store.PerCluster(lsw.cluster, lsw.edsService) + if lsw.store == nil { + lsw.perCluster = nil + return + } + lsw.perCluster = lsw.store.ReporterForCluster(lsw.cluster, lsw.edsService) } // CallStarted records a call started in the store. -func (lsw *Wrapper) CallStarted(locality string) { +func (lsw *Wrapper) CallStarted(locality clients.Locality) { lsw.mu.RLock() defer lsw.mu.RUnlock() if lsw.perCluster != nil { @@ -93,7 +101,7 @@ func (lsw *Wrapper) CallStarted(locality string) { } // CallFinished records a call finished in the store. -func (lsw *Wrapper) CallFinished(locality string, err error) { +func (lsw *Wrapper) CallFinished(locality clients.Locality, err error) { lsw.mu.RLock() defer lsw.mu.RUnlock() if lsw.perCluster != nil { @@ -102,7 +110,7 @@ func (lsw *Wrapper) CallFinished(locality string, err error) { } // CallServerLoad records the server load in the store. -func (lsw *Wrapper) CallServerLoad(locality, name string, val float64) { +func (lsw *Wrapper) CallServerLoad(locality clients.Locality, name string, val float64) { lsw.mu.RLock() defer lsw.mu.RUnlock() if lsw.perCluster != nil { diff --git a/xds/internal/balancer/wrrlocality/balancer.go b/xds/internal/balancer/wrrlocality/balancer.go index 2d03a9c75e7b..0c99c108f769 100644 --- a/xds/internal/balancer/wrrlocality/balancer.go +++ b/xds/internal/balancer/wrrlocality/balancer.go @@ -171,7 +171,7 @@ func (b *wrrLocalityBalancer) UpdateClientConnState(s balancer.ClientConnState) // shouldn't happen though (this attribute that is set actually gets // used to build localities in the first place), and thus don't error // out, and just build a weighted target with undefined behavior. - locality := internal.GetLocalityID(addr).ToString() + locality := internal.LocalityString(internal.GetLocalityID(addr)) ai, ok := getAddrInfo(addr) if !ok { return fmt.Errorf("xds_wrr_locality: missing locality weight information in address %q", addr) diff --git a/xds/internal/balancer/wrrlocality/balancer_test.go b/xds/internal/balancer/wrrlocality/balancer_test.go index 3316ac103002..9c3a499e6540 100644 --- a/xds/internal/balancer/wrrlocality/balancer_test.go +++ b/xds/internal/balancer/wrrlocality/balancer_test.go @@ -37,6 +37,7 @@ import ( "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" ) const ( @@ -178,7 +179,7 @@ func (s) TestUpdateClientConnState(t *testing.T) { addr1 := resolver.Address{ Addr: "locality-1", } - addr1 = internal.SetLocalityID(addr1, internal.LocalityID{ + addr1 = internal.SetLocalityID(addr1, clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -188,7 +189,7 @@ func (s) TestUpdateClientConnState(t *testing.T) { addr2 := resolver.Address{ Addr: "locality-2", } - addr2 = internal.SetLocalityID(addr2, internal.LocalityID{ + addr2 = internal.SetLocalityID(addr2, clients.Locality{ Region: "region-2", Zone: "zone-2", SubZone: "subzone-2", diff --git a/xds/internal/clients/lrsclient/load_store.go b/xds/internal/clients/lrsclient/load_store.go index 0a3de8b3a5bb..6b35fd979aab 100644 --- a/xds/internal/clients/lrsclient/load_store.go +++ b/xds/internal/clients/lrsclient/load_store.go @@ -24,6 +24,7 @@ import ( "sync/atomic" "time" + "google.golang.org/grpc/xds/internal/clients" lrsclientinternal "google.golang.org/grpc/xds/internal/clients/lrsclient/internal" ) @@ -35,7 +36,8 @@ import ( // // It is safe for concurrent use. type LoadStore struct { - stop func(ctx context.Context) // Function to call to Stop the LoadStore + // stop is the function to call to Stop the LoadStore reporting. + stop func(ctx context.Context) // mu only protects the map (2 layers). The read/write to // *PerClusterReporter doesn't need to hold the mu. @@ -65,10 +67,13 @@ func newLoadStore() *LoadStore { // Stop signals the LoadStore to stop reporting. // // Before closing the underlying LRS stream, this method may block until a -// final load report send attempt completes or the provided context `ctx` expires. +// final load report send attempt completes or the provided context `ctx` +// expires. // // The provided context must have a deadline or timeout set to prevent Stop // from blocking indefinitely if the final send attempt fails to complete. +// +// Calling Stop on an already stopped LoadStore is a no-op. func (ls *LoadStore) Stop(ctx context.Context) { ls.stop(ctx) } @@ -142,14 +147,14 @@ func (ls *LoadStore) stats(clusterNames []string) []*loadData { type PerClusterReporter struct { cluster, service string drops sync.Map // map[string]*uint64 - localityRPCCount sync.Map // map[string]*rpcCountData + localityRPCCount sync.Map // map[clients.Locality]*rpcCountData mu sync.Mutex lastLoadReportAt time.Time } // CallStarted records a call started in the LoadStore. -func (p *PerClusterReporter) CallStarted(locality string) { +func (p *PerClusterReporter) CallStarted(locality clients.Locality) { s, ok := p.localityRPCCount.Load(locality) if !ok { tp := newRPCCountData() @@ -160,7 +165,7 @@ func (p *PerClusterReporter) CallStarted(locality string) { } // CallFinished records a call finished in the LoadStore. -func (p *PerClusterReporter) CallFinished(locality string, err error) { +func (p *PerClusterReporter) CallFinished(locality clients.Locality, err error) { f, ok := p.localityRPCCount.Load(locality) if !ok { // The map is never cleared, only values in the map are reset. So the @@ -176,7 +181,7 @@ func (p *PerClusterReporter) CallFinished(locality string, err error) { } // CallServerLoad records the server load in the LoadStore. -func (p *PerClusterReporter) CallServerLoad(locality, name string, val float64) { +func (p *PerClusterReporter) CallServerLoad(locality clients.Locality, name string, val float64) { s, ok := p.localityRPCCount.Load(locality) if !ok { // The map is never cleared, only values in the map are reset. So the @@ -246,7 +251,7 @@ func (p *PerClusterReporter) stats() *loadData { } return true }) - sd.localityStats[key.(string)] = ld + sd.localityStats[key.(clients.Locality)] = ld return true }) @@ -273,7 +278,7 @@ type loadData struct { // drops is the number of dropped requests per category. drops map[string]uint64 // localityStats contains load reports per locality. - localityStats map[string]localityData + localityStats map[clients.Locality]localityData // reportInternal is the duration since last time load was reported (stats() // was called). reportInterval time.Duration @@ -329,7 +334,7 @@ func newLoadData(cluster, service string) *loadData { cluster: cluster, service: service, drops: make(map[string]uint64), - localityStats: make(map[string]localityData), + localityStats: make(map[clients.Locality]localityData), } } diff --git a/xds/internal/clients/lrsclient/load_store_test.go b/xds/internal/clients/lrsclient/load_store_test.go index a4c191921544..ecbc2ccf5dd0 100644 --- a/xds/internal/clients/lrsclient/load_store_test.go +++ b/xds/internal/clients/lrsclient/load_store_test.go @@ -26,12 +26,13 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "google.golang.org/grpc/xds/internal/clients" lrsclientinternal "google.golang.org/grpc/xds/internal/clients/lrsclient/internal" ) var ( dropCategories = []string{"drop_for_real", "drop_for_fun"} - localities = []string{"locality-A", "locality-B"} + localities = []clients.Locality{{Region: "locality-A"}, {Region: "locality-B"}} errTest = fmt.Errorf("test error") ) @@ -91,7 +92,7 @@ func TestDrops(t *testing.T) { // Store and makes sure they are as expected. func TestLocalityStats(t *testing.T) { var ( - ld = map[string]rpcData{ + ld = map[clients.Locality]rpcData{ localities[0]: { start: 40, success: 20, @@ -106,7 +107,7 @@ func TestLocalityStats(t *testing.T) { }, } wantStoreData = &loadData{ - localityStats: map[string]localityData{ + localityStats: map[clients.Locality]localityData{ localities[0]: { requestStats: requestData{ succeeded: 20, @@ -144,7 +145,7 @@ func TestLocalityStats(t *testing.T) { for locality, data := range ld { wg.Add(data.start) for i := 0; i < data.start; i++ { - go func(l string) { + go func(l clients.Locality) { ls.CallStarted(l) wg.Done() }(locality) @@ -155,7 +156,7 @@ func TestLocalityStats(t *testing.T) { wg.Add(data.success) for i := 0; i < data.success; i++ { - go func(l string, serverData map[string]float64) { + go func(l clients.Locality, serverData map[string]float64) { ls.CallFinished(l, nil) for n, d := range serverData { ls.CallServerLoad(l, n, d) @@ -165,7 +166,7 @@ func TestLocalityStats(t *testing.T) { } wg.Add(data.failure) for i := 0; i < data.failure; i++ { - go func(l string) { + go func(l clients.Locality) { ls.CallFinished(l, errTest) wg.Done() }(locality) @@ -189,7 +190,7 @@ func TestResetAfterStats(t *testing.T) { dropCategories[0]: 30, dropCategories[1]: 40, } - ld = map[string]rpcData{ + ld = map[clients.Locality]rpcData{ localities[0]: { start: 40, success: 20, @@ -209,7 +210,7 @@ func TestResetAfterStats(t *testing.T) { dropCategories[0]: 30, dropCategories[1]: 40, }, - localityStats: map[string]localityData{ + localityStats: map[clients.Locality]localityData{ localities[0]: { requestStats: requestData{ succeeded: 20, @@ -310,7 +311,7 @@ func TestStoreStats(t *testing.T) { var ( testClusters = []string{"c0", "c1", "c2"} testServices = []string{"s0", "s1"} - testLocality = "test-locality" + testLocality = clients.Locality{Region: "test-locality"} ) store := newLoadStore() @@ -327,8 +328,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c0", service: "s0", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -337,8 +338,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c0", service: "s1", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -354,8 +355,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c1", service: "s0", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -364,8 +365,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c1", service: "s1", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -374,8 +375,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c2", service: "s0", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -384,8 +385,8 @@ func TestStoreStats(t *testing.T) { { cluster: "c2", service: "s1", totalDrops: 1, drops: map[string]uint64{"dropped": 1}, - localityStats: map[string]localityData{ - "test-locality": { + localityStats: map[clients.Locality]localityData{ + testLocality: { requestStats: requestData{succeeded: 1, issued: 1}, loadStats: map[string]serverLoadData{"abc": {count: 1, sum: 123}}, }, @@ -405,7 +406,7 @@ func TestStoreStats(t *testing.T) { func TestStoreStatsEmptyDataNotReported(t *testing.T) { var ( testServices = []string{"s0", "s1"} - testLocality = "test-locality" + testLocality = clients.Locality{Region: "test-locality"} ) store := newLoadStore() @@ -422,26 +423,26 @@ func TestStoreStatsEmptyDataNotReported(t *testing.T) { want0 := []*loadData{ { cluster: "c0", service: "s0", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{succeeded: 1, issued: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{succeeded: 1, issued: 1}}, }, }, { cluster: "c0", service: "s1", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{succeeded: 1, issued: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{succeeded: 1, issued: 1}}, }, }, { cluster: "c1", service: "s0", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{inProgress: 1, issued: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{inProgress: 1, issued: 1}}, }, }, { cluster: "c1", service: "s1", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{inProgress: 1, issued: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{inProgress: 1, issued: 1}}, }, }, } @@ -455,14 +456,14 @@ func TestStoreStatsEmptyDataNotReported(t *testing.T) { want1 := []*loadData{ { cluster: "c1", service: "s0", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{inProgress: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{inProgress: 1}}, }, }, { cluster: "c1", service: "s1", - localityStats: map[string]localityData{ - "test-locality": {requestStats: requestData{inProgress: 1}}, + localityStats: map[clients.Locality]localityData{ + testLocality: {requestStats: requestData{inProgress: 1}}, }, }, } diff --git a/xds/internal/clients/lrsclient/loadreport_test.go b/xds/internal/clients/lrsclient/loadreport_test.go index ed79c6162202..cdddb39f98ed 100644 --- a/xds/internal/clients/lrsclient/loadreport_test.go +++ b/xds/internal/clients/lrsclient/loadreport_test.go @@ -55,8 +55,6 @@ func Test(t *testing.T) { } const ( - testLocality1 = `{"region":"test-region1"}` - testLocality2 = `{"region":"test-region2"}` testKey1 = "test-key1" testKey2 = "test-key2" defaultTestWatchExpiryTimeout = 100 * time.Millisecond @@ -65,6 +63,8 @@ const ( ) var ( + testLocality1 = clients.Locality{Region: "test-region1"} + testLocality2 = clients.Locality{Region: "test-region2"} toleranceCmpOpt = cmpopts.EquateApprox(0, 1e-5) ignoreOrderCmpOpt = protocmp.FilterField(&v3endpointpb.ClusterStats{}, "upstream_locality_stats", cmpopts.SortSlices(func(a, b protocmp.Message) bool { @@ -147,7 +147,7 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { if err != nil { t.Fatalf("client.ReportLoad() failed: %v", err) } - ssCtx, ssCancel := context.WithTimeout(context.Background(), time.Millisecond) + ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) defer ssCancel() defer loadStore1.Stop(ssCtx) @@ -234,7 +234,7 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { } // Stop this load reporting stream, server should see error canceled. - ssCtx, ssCancel = context.WithTimeout(context.Background(), time.Millisecond) + ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) defer ssCancel() loadStore2.Stop(ssCtx) @@ -423,7 +423,7 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { // Cancel the first load reporting call, and ensure that the stream does not // close (because we have another call open). - ssCtx, ssCancel := context.WithTimeout(context.Background(), time.Millisecond) + ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) defer ssCancel() loadStore1.Stop(ssCtx) sCtx, sCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) @@ -433,7 +433,7 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { } // Stop the second load reporting call, and ensure the stream is closed. - ssCtx, ssCancel = context.WithTimeout(context.Background(), time.Millisecond) + ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) defer ssCancel() loadStore2.Stop(ssCtx) if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil { @@ -450,7 +450,7 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) } - ssCtx, ssCancel = context.WithTimeout(context.Background(), time.Millisecond) + ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) defer ssCancel() loadStore3.Stop(ssCtx) } @@ -574,7 +574,7 @@ func (s) TestReportLoad_StopWithContext(t *testing.T) { } req, err = lrsServer.LRSRequestChan.Receive(ctx) - if err != nil { + if err != nil || req.(*fakeserver.Request).Err != nil { continue } if req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) == nil { diff --git a/xds/internal/clients/lrsclient/lrs_stream.go b/xds/internal/clients/lrsclient/lrs_stream.go index df761d408e76..bb275bdb468e 100644 --- a/xds/internal/clients/lrsclient/lrs_stream.go +++ b/xds/internal/clients/lrsclient/lrs_stream.go @@ -19,7 +19,6 @@ package lrsclient import ( "context" - "encoding/json" "fmt" "io" "time" @@ -243,11 +242,7 @@ func (lrs *streamImpl) sendLoadStatsRequest(stream clients.Stream, loads []*load }) } localityStats := make([]*v3endpointpb.UpstreamLocalityStats, 0, len(sd.localityStats)) - for l, localityData := range sd.localityStats { - lid, err := localityFromString(l) - if err != nil { - return err - } + for lid, localityData := range sd.localityStats { loadMetricStats := make([]*v3endpointpb.EndpointLoadMetricStats, 0, len(localityData.loadStats)) for name, loadData := range localityData.loadStats { loadMetricStats = append(loadMetricStats, &v3endpointpb.EndpointLoadMetricStats{ @@ -306,13 +301,3 @@ func getStreamError(stream clients.Stream) error { } } } - -// localityFromString converts a json representation of locality, into a -// clients.Locality struct. -func localityFromString(s string) (ret clients.Locality, _ error) { - err := json.Unmarshal([]byte(s), &ret) - if err != nil { - return clients.Locality{}, fmt.Errorf("%s is not a well formatted locality, error: %v", s, err) - } - return ret, nil -} diff --git a/xds/internal/clients/xdsclient/channel.go b/xds/internal/clients/xdsclient/channel.go index 6faf16881599..97438353507c 100644 --- a/xds/internal/clients/xdsclient/channel.go +++ b/xds/internal/clients/xdsclient/channel.go @@ -253,6 +253,15 @@ func decodeResponse(opts *DecodeOptions, rType *ResourceType, resp response) (ma perResourceErrors := make(map[string]error) // Tracks resource validation errors, where we have a resource name. ret := make(map[string]dataAndErrTuple) // Return result, a map from resource name to either resource data or error. for _, r := range resp.resources { + r, err := xdsresource.UnwrapResource(r) + if err != nil { + topLevelErrors = append(topLevelErrors, err) + continue + } + if _, ok := opts.Config.ResourceTypes[r.TypeUrl]; !ok || r.TypeUrl != resp.typeURL { + topLevelErrors = append(topLevelErrors, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceTypeUnsupported, "unexpected resource type: %q ", r.GetTypeUrl())) + continue + } result, err := rType.Decoder.Decode(r.GetValue(), *opts) // Name field of the result is left unpopulated only when resource diff --git a/xds/internal/clients/xdsclient/internal/xdsresource/type.go b/xds/internal/clients/xdsclient/internal/xdsresource/type.go index 647c36f06abb..ea4d85447d1a 100644 --- a/xds/internal/clients/xdsclient/internal/xdsresource/type.go +++ b/xds/internal/clients/xdsclient/internal/xdsresource/type.go @@ -20,7 +20,10 @@ package xdsresource import ( "time" + "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" + + v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" ) // UpdateMetadata contains the metadata for each update, including timestamp, @@ -51,6 +54,21 @@ func IsHTTPConnManagerResource(url string) bool { return url == V3HTTPConnManagerURL } +// UnwrapResource unwraps and returns the inner resource if it's in a resource +// wrapper. The original resource is returned if it's not wrapped. +func UnwrapResource(r *anypb.Any) (*anypb.Any, error) { + url := r.GetTypeUrl() + if url != V3ResourceWrapperURL { + // Not wrapped. + return r, nil + } + inner := &v3discoverypb.Resource{} + if err := proto.Unmarshal(r.GetValue(), inner); err != nil { + return nil, err + } + return inner.Resource, nil +} + // ServiceStatus is the status of the update. type ServiceStatus int @@ -81,13 +99,3 @@ type UpdateErrorMetadata struct { // Timestamp is when the NACKed response was received. Timestamp time.Time } - -// UpdateWithMD contains the raw message of the update and the metadata, -// including version, raw message, timestamp. -// -// This is to be used for config dump and CSDS, not directly by users (like -// resolvers/balancers). -type UpdateWithMD struct { - MD UpdateMetadata - Raw *anypb.Any -} diff --git a/xds/internal/clients/xdsclient/internal/xdsresource/version.go b/xds/internal/clients/xdsclient/internal/xdsresource/version.go index 68e67d7f6dc9..60f47e69428b 100644 --- a/xds/internal/clients/xdsclient/internal/xdsresource/version.go +++ b/xds/internal/clients/xdsclient/internal/xdsresource/version.go @@ -27,4 +27,5 @@ const ( V3ListenerURL = googleapiPrefix + "envoy.config.listener.v3.Listener" V3HTTPConnManagerURL = googleapiPrefix + "envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager" + V3ResourceWrapperURL = googleapiPrefix + "envoy.service.discovery.v3.Resource" ) diff --git a/xds/internal/internal.go b/xds/internal/internal.go index 23db64a4d487..9e0b7931b5f7 100644 --- a/xds/internal/internal.go +++ b/xds/internal/internal.go @@ -23,44 +23,30 @@ import ( "fmt" "google.golang.org/grpc/resolver" + "google.golang.org/grpc/xds/internal/clients" ) -// LocalityID is xds.Locality without XXX fields, so it can be used as map -// keys. -// -// xds.Locality cannot be map keys because one of the XXX fields is a slice. -type LocalityID struct { - Region string `json:"region,omitempty"` - Zone string `json:"zone,omitempty"` - SubZone string `json:"subZone,omitempty"` -} - -// ToString generates a string representation of LocalityID in the format -// specified in gRFC A76. Not calling it String() so printf won't call it. -func (l LocalityID) ToString() string { +// LocalityString generates a string representation of clients.Locality in the +// format specified in gRFC A76. +func LocalityString(l clients.Locality) string { return fmt.Sprintf("{region=%q, zone=%q, sub_zone=%q}", l.Region, l.Zone, l.SubZone) } -// Equal allows the values to be compared by Attributes.Equal. -func (l LocalityID) Equal(o any) bool { - ol, ok := o.(LocalityID) +// IsLocalityEqual allows the values to be compared by Attributes.Equal. +func IsLocalityEqual(l clients.Locality, o any) bool { + ol, ok := o.(clients.Locality) if !ok { return false } return l.Region == ol.Region && l.Zone == ol.Zone && l.SubZone == ol.SubZone } -// Empty returns whether or not the locality ID is empty. -func (l LocalityID) Empty() bool { - return l.Region == "" && l.Zone == "" && l.SubZone == "" -} - -// LocalityIDFromString converts a string representation of locality as +// LocalityFromString converts a string representation of clients.locality as // specified in gRFC A76, into a LocalityID struct. -func LocalityIDFromString(s string) (ret LocalityID, _ error) { +func LocalityFromString(s string) (ret clients.Locality, _ error) { _, err := fmt.Sscanf(s, "{region=%q, zone=%q, sub_zone=%q}", &ret.Region, &ret.Zone, &ret.SubZone) if err != nil { - return LocalityID{}, fmt.Errorf("%s is not a well formatted locality ID, error: %v", s, err) + return clients.Locality{}, fmt.Errorf("%s is not a well formatted locality ID, error: %v", s, err) } return ret, nil } @@ -70,19 +56,19 @@ type localityKeyType string const localityKey = localityKeyType("grpc.xds.internal.address.locality") // GetLocalityID returns the locality ID of addr. -func GetLocalityID(addr resolver.Address) LocalityID { - path, _ := addr.BalancerAttributes.Value(localityKey).(LocalityID) +func GetLocalityID(addr resolver.Address) clients.Locality { + path, _ := addr.BalancerAttributes.Value(localityKey).(clients.Locality) return path } // SetLocalityID sets locality ID in addr to l. -func SetLocalityID(addr resolver.Address, l LocalityID) resolver.Address { +func SetLocalityID(addr resolver.Address, l clients.Locality) resolver.Address { addr.BalancerAttributes = addr.BalancerAttributes.WithValue(localityKey, l) return addr } // SetLocalityIDInEndpoint sets locality ID in endpoint to l. -func SetLocalityIDInEndpoint(endpoint resolver.Endpoint, l LocalityID) resolver.Endpoint { +func SetLocalityIDInEndpoint(endpoint resolver.Endpoint, l clients.Locality) resolver.Endpoint { endpoint.Attributes = endpoint.Attributes.WithValue(localityKey, l) return endpoint } diff --git a/xds/internal/internal_test.go b/xds/internal/internal_test.go index 9c9a299a3b6f..2f6893b5c657 100644 --- a/xds/internal/internal_test.go +++ b/xds/internal/internal_test.go @@ -26,6 +26,7 @@ import ( corepb "github.com/envoyproxy/go-control-plane/envoy/api/v2/core" "github.com/google/go-cmp/cmp" "google.golang.org/grpc/internal/grpctest" + "google.golang.org/grpc/xds/internal/clients" ) const ignorePrefix = "XXX_" @@ -49,7 +50,7 @@ func ignore(name string) bool { // fields (expect for XXX_) from the proto message. func (s) TestLocalityMatchProtoMessage(t *testing.T) { want1 := make(map[string]string) - for ty, i := reflect.TypeOf(LocalityID{}), 0; i < ty.NumField(); i++ { + for ty, i := reflect.TypeOf(clients.Locality{}), 0; i < ty.NumField(); i++ { f := ty.Field(i) if ignore(f.Name) { continue @@ -74,40 +75,40 @@ func (s) TestLocalityMatchProtoMessage(t *testing.T) { func TestLocalityToAndFromString(t *testing.T) { tests := []struct { name string - localityID LocalityID + localityID clients.Locality str string wantErr bool }{ { name: "3 fields", - localityID: LocalityID{Region: "r:r", Zone: "z#z", SubZone: "s^s"}, + localityID: clients.Locality{Region: "r:r", Zone: "z#z", SubZone: "s^s"}, str: `{region="r:r", zone="z#z", sub_zone="s^s"}`, }, { name: "2 fields", - localityID: LocalityID{Region: "r:r", Zone: "z#z"}, + localityID: clients.Locality{Region: "r:r", Zone: "z#z"}, str: `{region="r:r", zone="z#z", sub_zone=""}`, }, { name: "1 field", - localityID: LocalityID{Region: "r:r"}, + localityID: clients.Locality{Region: "r:r"}, str: `{region="r:r", zone="", sub_zone=""}`, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotStr := tt.localityID.ToString() + gotStr := LocalityString(tt.localityID) if gotStr != tt.str { t.Errorf("%#v.String() = %q, want %q", tt.localityID, gotStr, tt.str) } - gotID, err := LocalityIDFromString(tt.str) + gotID, err := LocalityFromString(tt.str) if (err != nil) != tt.wantErr { - t.Errorf("LocalityIDFromString(%q) error = %v, wantErr %v", tt.str, err, tt.wantErr) + t.Errorf("clients.LocalityFromString(%q) error = %v, wantErr %v", tt.str, err, tt.wantErr) return } if diff := cmp.Diff(gotID, tt.localityID); diff != "" { - t.Errorf("LocalityIDFromString() got = %v, want %v, diff: %s", gotID, tt.localityID, diff) + t.Errorf("clients.LocalityFromString() got = %v, want %v, diff: %s", gotID, tt.localityID, diff) } }) } diff --git a/xds/internal/testutils/fakeclient/client.go b/xds/internal/testutils/fakeclient/client.go index 806a207fabe5..bfcbf5ae9eb0 100644 --- a/xds/internal/testutils/fakeclient/client.go +++ b/xds/internal/testutils/fakeclient/client.go @@ -24,8 +24,9 @@ import ( "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/xds/bootstrap" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/lrsclient" "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/load" ) // Client is a fake implementation of an xds client. It exposes a bunch of @@ -39,7 +40,7 @@ type Client struct { name string loadReportCh *testutils.Channel lrsCancelCh *testutils.Channel - loadStore *load.Store + loadStore *lrsclient.LoadStore bootstrapCfg *bootstrap.Config } @@ -49,10 +50,44 @@ type ReportLoadArgs struct { Server *bootstrap.ServerConfig } +type transportBuilder struct { +} + +func (*transportBuilder) Build(clients.ServerIdentifier) (clients.Transport, error) { + return &transport{}, nil +} + +type transport struct { +} + +func (*transport) NewStream(context.Context, string) (clients.Stream, error) { + return &stream{}, nil +} + +func (*transport) Close() { +} + +type stream struct { +} + +func (*stream) Send([]byte) error { + return nil +} + +func (*stream) Recv() ([]byte, error) { + return nil, nil + +} + // ReportLoad starts reporting load about clusterName to server. -func (xdsC *Client) ReportLoad(server *bootstrap.ServerConfig) (loadStore *load.Store, cancel func()) { +func (xdsC *Client) ReportLoad(server *bootstrap.ServerConfig) (loadStore *lrsclient.LoadStore, cancel func(context.Context)) { + lrsClient, _ := lrsclient.New(lrsclient.Config{Node: clients.Node{ID: "fake-node-id"}, TransportBuilder: &transportBuilder{}}) + xdsC.loadStore, _ = lrsClient.ReportLoad(clients.ServerIdentifier{ServerURI: server.ServerURI()}) + xdsC.loadReportCh.Send(ReportLoadArgs{Server: server}) - return xdsC.loadStore, func() { + + return xdsC.loadStore, func(ctx context.Context) { + xdsC.loadStore.Stop(ctx) xdsC.lrsCancelCh.Send(nil) } } @@ -65,7 +100,7 @@ func (xdsC *Client) WaitForCancelReportLoad(ctx context.Context) error { } // LoadStore returns the underlying load data store. -func (xdsC *Client) LoadStore() *load.Store { +func (xdsC *Client) LoadStore() *lrsclient.LoadStore { return xdsC.loadStore } @@ -107,7 +142,6 @@ func NewClientWithName(name string) *Client { name: name, loadReportCh: testutils.NewChannel(), lrsCancelCh: testutils.NewChannel(), - loadStore: load.NewStore(), bootstrapCfg: &bootstrap.Config{}, } } diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go deleted file mode 100644 index ec3a7352f9b9..000000000000 --- a/xds/internal/xdsclient/authority.go +++ /dev/null @@ -1,884 +0,0 @@ -/* - * - * Copyright 2021 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package xdsclient - -import ( - "context" - "fmt" - "sync" - "sync/atomic" - - "google.golang.org/grpc/experimental/stats" - "google.golang.org/grpc/grpclog" - igrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/grpcsync" - "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/protobuf/types/known/anypb" - "google.golang.org/protobuf/types/known/timestamppb" - - v3adminpb "github.com/envoyproxy/go-control-plane/envoy/admin/v3" - v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" -) - -type resourceState struct { - watchers map[xdsresource.ResourceWatcher]bool // Set of watchers for this resource. - cache xdsresource.ResourceData // Most recent ACKed update for this resource. - md xdsresource.UpdateMetadata // Metadata for the most recent update. - deletionIgnored bool // True, if resource deletion was ignored for a prior update. - xdsChannelConfigs map[*xdsChannelWithConfig]bool // Set of xdsChannels where this resource is subscribed. -} - -// xdsChannelForADS is used to acquire a reference to an xdsChannel. This -// functionality is provided by the xdsClient. -// -// The arguments to the function are as follows: -// - the server config for the xdsChannel -// - the calling authority on which a set of callbacks are invoked by the -// xdsChannel on ADS stream events -// -// Returns a reference to the xdsChannel and a function to release the same. A -// non-nil error is returned if the channel creation fails and the first two -// return values are meaningless in this case. -type xdsChannelForADS func(*bootstrap.ServerConfig, *authority) (*xdsChannel, func(), error) - -// xdsChannelWithConfig is a struct that holds an xdsChannel and its associated -// ServerConfig, along with a cleanup function to release the xdsChannel. -type xdsChannelWithConfig struct { - channel *xdsChannel - serverConfig *bootstrap.ServerConfig - cleanup func() -} - -// authority provides the functionality required to communicate with a -// management server corresponding to an authority name specified in the -// bootstrap configuration. -// -// It holds references to one or more xdsChannels, one for each server -// configuration in the bootstrap, to allow fallback from a primary management -// server to a secondary management server. Authorities that contain similar -// server configuration entries will end up sharing the xdsChannel for that -// server configuration. The xdsChannels are owned and managed by the xdsClient. -// -// It also contains a cache of resource state for resources requested from -// management server(s). This cache contains the list of registered watchers and -// the most recent resource configuration received from the management server. -type authority struct { - // The following fields are initialized at creation time and are read-only - // afterwards, and therefore don't need to be protected with a mutex. - name string // Name of the authority from bootstrap configuration. - watcherCallbackSerializer *grpcsync.CallbackSerializer // Serializer to run watcher callbacks, owned by the xDS client implementation. - getChannelForADS xdsChannelForADS // Function to get an xdsChannel for ADS, provided by the xDS client implementation. - xdsClientSerializer *grpcsync.CallbackSerializer // Serializer to run call ins from the xDS client, owned by this authority. - xdsClientSerializerClose func() // Function to close the above serializer. - logger *igrpclog.PrefixLogger // Logger for this authority. - target string // The gRPC Channel target. - metricsRecorder stats.MetricsRecorder // The metrics recorder used for emitting metrics. - - // The below defined fields must only be accessed in the context of the - // serializer callback, owned by this authority. - - // A two level map containing the state of all the resources being watched. - // - // The first level map key is the ResourceType (Listener, Route etc). This - // allows us to have a single map for all resources instead of having per - // resource-type maps. - // - // The second level map key is the resource name, with the value being the - // actual state of the resource. - resources map[xdsresource.Type]map[string]*resourceState - - // An ordered list of xdsChannels corresponding to the list of server - // configurations specified for this authority in the bootstrap. The - // ordering specifies the order in which these channels are preferred for - // fallback. - xdsChannelConfigs []*xdsChannelWithConfig - - // The current active xdsChannel. Here, active does not mean that the - // channel has a working connection to the server. It simply points to the - // channel that we are trying to work with, based on fallback logic. - activeXDSChannel *xdsChannelWithConfig -} - -// authorityBuildOptions wraps arguments required to create a new authority. -type authorityBuildOptions struct { - serverConfigs bootstrap.ServerConfigs // Server configs for the authority - name string // Name of the authority - serializer *grpcsync.CallbackSerializer // Callback serializer for invoking watch callbacks - getChannelForADS xdsChannelForADS // Function to acquire a reference to an xdsChannel - logPrefix string // Prefix for logging - target string // Target for the gRPC Channel that owns xDS Client/Authority - metricsRecorder stats.MetricsRecorder // metricsRecorder to emit metrics -} - -// newAuthority creates a new authority instance with the provided -// configuration. The authority is responsible for managing the state of -// resources requested from the management server, as well as acquiring and -// releasing references to channels used to communicate with the management -// server. -// -// Note that no channels to management servers are created at this time. Instead -// a channel to the first server configuration is created when the first watch -// is registered, and more channels are created as needed by the fallback logic. -func newAuthority(args authorityBuildOptions) *authority { - ctx, cancel := context.WithCancel(context.Background()) - l := grpclog.Component("xds") - logPrefix := args.logPrefix + fmt.Sprintf("[authority %q] ", args.name) - ret := &authority{ - name: args.name, - watcherCallbackSerializer: args.serializer, - getChannelForADS: args.getChannelForADS, - xdsClientSerializer: grpcsync.NewCallbackSerializer(ctx), - xdsClientSerializerClose: cancel, - logger: igrpclog.NewPrefixLogger(l, logPrefix), - resources: make(map[xdsresource.Type]map[string]*resourceState), - target: args.target, - metricsRecorder: args.metricsRecorder, - } - - // Create an ordered list of xdsChannels with their server configs. The - // actual channel to the first server configuration is created when the - // first watch is registered, and channels to other server configurations - // are created as needed to support fallback. - for _, sc := range args.serverConfigs { - ret.xdsChannelConfigs = append(ret.xdsChannelConfigs, &xdsChannelWithConfig{serverConfig: sc}) - } - return ret -} - -// adsStreamFailure is called to notify the authority about an ADS stream -// failure on an xdsChannel to the management server identified by the provided -// server config. The error is forwarded to all the resource watchers. -// -// This method is called by the xDS client implementation (on all interested -// authorities) when a stream error is reported by an xdsChannel. -// -// Errors of type xdsresource.ErrTypeStreamFailedAfterRecv are ignored. -func (a *authority) adsStreamFailure(serverConfig *bootstrap.ServerConfig, err error) { - a.xdsClientSerializer.TrySchedule(func(context.Context) { - a.handleADSStreamFailure(serverConfig, err) - }) -} - -// Handles ADS stream failure by invoking watch callbacks and triggering -// fallback if the associated conditions are met. -// -// Only executed in the context of a serializer callback. -func (a *authority) handleADSStreamFailure(serverConfig *bootstrap.ServerConfig, err error) { - if a.logger.V(2) { - a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err) - } - - // We do not consider it an error if the ADS stream was closed after having - // received a response on the stream. This is because there are legitimate - // reasons why the server may need to close the stream during normal - // operations, such as needing to rebalance load or the underlying - // connection hitting its max connection age limit. See gRFC A57 for more - // details. - if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv { - a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err) - return - } - - // Two conditions need to be met for fallback to be triggered: - // 1. There is a connectivity failure on the ADS stream, as described in - // gRFC A57. For us, this means that the ADS stream was closed before the - // first server response was received. We already checked that condition - // earlier in this method. - // 2. There is at least one watcher for a resource that is not cached. - // Cached resources include ones that - // - have been successfully received and can be used. - // - are considered non-existent according to xDS Protocol Specification. - if !a.watcherExistsForUncachedResource() { - if a.logger.V(2) { - a.logger.Infof("No watchers for uncached resources. Not triggering fallback") - } - // Since we are not triggering fallback, propagate the connectivity - // error to all watchers and return early. - a.propagateConnectivityErrorToAllWatchers(err) - return - } - - // Attempt to fallback to servers with lower priority than the failing one. - currentServerIdx := a.serverIndexForConfig(serverConfig) - for i := currentServerIdx + 1; i < len(a.xdsChannelConfigs); i++ { - if a.fallbackToServer(a.xdsChannelConfigs[i]) { - // Since we have successfully triggered fallback, we don't have to - // notify watchers about the connectivity error. - return - } - } - - // Having exhausted all available servers, we must notify watchers of the - // connectivity error - A71. - a.propagateConnectivityErrorToAllWatchers(err) -} - -// propagateConnectivityErrorToAllWatchers propagates the given connection error -// to all watchers of all resources. -// -// Only executed in the context of a serializer callback. -func (a *authority) propagateConnectivityErrorToAllWatchers(err error) { - for _, rType := range a.resources { - for _, state := range rType { - for watcher := range state.watchers { - if state.cache == nil { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) - }) - } else { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.AmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) - }) - } - } - } - } -} - -// serverIndexForConfig returns the index of the xdsChannelConfig matching the -// provided server config, panicking if no match is found (which indicates a -// programming error). -func (a *authority) serverIndexForConfig(sc *bootstrap.ServerConfig) int { - for i, cfg := range a.xdsChannelConfigs { - if cfg.serverConfig.Equal(sc) { - return i - } - } - panic(fmt.Sprintf("no server config matching %v found", sc)) -} - -// Determines the server to fallback to and triggers fallback to the same. If -// required, creates an xdsChannel to that server, and re-subscribes to all -// existing resources. -// -// Only executed in the context of a serializer callback. -func (a *authority) fallbackToServer(xc *xdsChannelWithConfig) bool { - if a.logger.V(2) { - a.logger.Infof("Attempting to initiate fallback to server %q", xc.serverConfig) - } - - if xc.channel != nil { - if a.logger.V(2) { - a.logger.Infof("Channel to the next server in the list %q already exists", xc.serverConfig) - } - return false - } - - channel, cleanup, err := a.getChannelForADS(xc.serverConfig, a) - if err != nil { - a.logger.Errorf("Failed to create xDS channel: %v", err) - return false - } - xc.channel = channel - xc.cleanup = cleanup - a.activeXDSChannel = xc - - // Subscribe to all existing resources from the new management server. - for typ, resources := range a.resources { - for name, state := range resources { - if a.logger.V(2) { - a.logger.Infof("Resubscribing to resource of type %q and name %q", typ.TypeName(), name) - } - xc.channel.subscribe(typ, name) - - // Add the new channel to the list of xdsChannels from which this - // resource has been requested from. Retain the cached resource and - // the set of existing watchers (and other metadata fields) in the - // resource state. - state.xdsChannelConfigs[xc] = true - } - } - return true -} - -// adsResourceUpdate is called to notify the authority about a resource update -// received on the ADS stream. -// -// This method is called by the xDS client implementation (on all interested -// authorities) when a stream error is reported by an xdsChannel. -func (a *authority) adsResourceUpdate(serverConfig *bootstrap.ServerConfig, rType xdsresource.Type, updates map[string]ads.DataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { - a.xdsClientSerializer.TrySchedule(func(context.Context) { - a.handleADSResourceUpdate(serverConfig, rType, updates, md, onDone) - }) -} - -// handleADSResourceUpdate processes an update from the xDS client, updating the -// resource cache and notifying any registered watchers of the update. -// -// If the update is received from a higher priority xdsChannel that was -// previously down, we revert to it and close all lower priority xdsChannels. -// -// Once the update has been processed by all watchers, the authority is expected -// to invoke the onDone callback. -// -// Only executed in the context of a serializer callback. -func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig, rType xdsresource.Type, updates map[string]ads.DataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { - a.handleRevertingToPrimaryOnUpdate(serverConfig) - - // We build a list of callback funcs to invoke, and invoke them at the end - // of this method instead of inline (when handling the update for a - // particular resource), because we want to make sure that all calls to - // increment watcherCnt happen before any callbacks are invoked. This will - // ensure that the onDone callback is never invoked before all watcher - // callbacks are invoked, and the watchers have processed the update. - watcherCnt := new(atomic.Int64) - done := func() { - if watcherCnt.Add(-1) == 0 { - onDone() - } - } - funcsToSchedule := []func(context.Context){} - defer func() { - if len(funcsToSchedule) == 0 { - // When there are no watchers for the resources received as part of - // this update, invoke onDone explicitly to unblock the next read on - // the ADS stream. - onDone() - return - } - for _, f := range funcsToSchedule { - a.watcherCallbackSerializer.ScheduleOr(f, onDone) - } - }() - - resourceStates := a.resources[rType] - for name, uErr := range updates { - state, ok := resourceStates[name] - if !ok { - continue - } - - // On error, keep previous version of the resource. But update status - // and error. - if uErr.Err != nil { - xdsClientResourceUpdatesInvalidMetric.Record(a.metricsRecorder, 1, a.target, serverConfig.ServerURI(), rType.TypeName()) - state.md.ErrState = md.ErrState - state.md.Status = md.Status - for watcher := range state.watchers { - watcher := watcher - err := uErr.Err - watcherCnt.Add(1) - if state.cache == nil { - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceError(err, done) }) - } else { - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) - } - } - continue - } - - xdsClientResourceUpdatesValidMetric.Record(a.metricsRecorder, 1, a.target, serverConfig.ServerURI(), rType.TypeName()) - - if state.deletionIgnored { - state.deletionIgnored = false - a.logger.Infof("A valid update was received for resource %q of type %q after previously ignoring a deletion", name, rType.TypeName()) - } - // Notify watchers if any of these conditions are met: - // - this is the first update for this resource - // - this update is different from the one currently cached - // - the previous update for this resource was NACKed, but the update - // before that was the same as this update. - if state.cache == nil || !state.cache.RawEqual(uErr.Resource) || state.md.ErrState != nil { - // Update the resource cache. - if a.logger.V(2) { - a.logger.Infof("Resource type %q with name %q added to cache", rType.TypeName(), name) - } - state.cache = uErr.Resource - - for watcher := range state.watchers { - watcher := watcher - resource := uErr.Resource - watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceChanged(resource, done) }) - } - } - - // Set status to ACK, and clear error state. The metadata might be a - // NACK metadata because some other resources in the same response - // are invalid. - state.md = md - state.md.ErrState = nil - state.md.Status = xdsresource.ServiceStatusACKed - if md.ErrState != nil { - state.md.Version = md.ErrState.Version - } - } - - // If this resource type requires that all resources be present in every - // SotW response from the server, a response that does not include a - // previously seen resource will be interpreted as a deletion of that - // resource unless ignore_resource_deletion option was set in the server - // config. - if !rType.AllResourcesRequiredInSotW() { - return - } - for name, state := range resourceStates { - if state.cache == nil { - // If the resource state does not contain a cached update, which can - // happen when: - // - resource was newly requested but has not yet been received, or, - // - resource was removed as part of a previous update, - // we don't want to generate an error for the watchers. - // - // For the first of the above two conditions, this ADS response may - // be in reaction to an earlier request that did not yet request the - // new resource, so its absence from the response does not - // necessarily indicate that the resource does not exist. For that - // case, we rely on the request timeout instead. - // - // For the second of the above two conditions, we already generated - // an error when we received the first response which removed this - // resource. So, there is no need to generate another one. - continue - } - if _, ok := updates[name]; ok { - // If the resource was present in the response, move on. - continue - } - if state.md.Status == xdsresource.ServiceStatusNotExist { - // The metadata status is set to "ServiceStatusNotExist" if a - // previous update deleted this resource, in which case we do not - // want to repeatedly call the watch callbacks with a - // "resource-not-found" error. - continue - } - if serverConfig.ServerFeaturesIgnoreResourceDeletion() { - // Per A53, resource deletions are ignored if the - // `ignore_resource_deletion` server feature is enabled through the - // bootstrap configuration. If the resource deletion is to be - // ignored, the resource is not removed from the cache and the - // corresponding ResourceError() callback is not invoked on - // the watchers. - if !state.deletionIgnored { - state.deletionIgnored = true - a.logger.Warningf("Ignoring resource deletion for resource %q of type %q", name, rType.TypeName()) - } - continue - } - - // If we get here, it means that the resource exists in cache, but not - // in the new update. Delete the resource from cache, and send a - // resource not found error to indicate that the resource has been - // removed. Metadata for the resource is still maintained, as this is - // required by CSDS. - state.cache = nil - state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} - for watcher := range state.watchers { - watcher := watcher - watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName()), done) - }) - } - } -} - -// adsResourceDoesNotExist is called by the xDS client implementation (on all -// interested authorities) to notify the authority that a subscribed resource -// does not exist. -func (a *authority) adsResourceDoesNotExist(rType xdsresource.Type, resourceName string) { - a.xdsClientSerializer.TrySchedule(func(context.Context) { - a.handleADSResourceDoesNotExist(rType, resourceName) - }) -} - -// handleADSResourceDoesNotExist is called when a subscribed resource does not -// exist. It removes the resource from the cache, updates the metadata status -// to ServiceStatusNotExist, and notifies all watchers that the resource does -// not exist. -func (a *authority) handleADSResourceDoesNotExist(rType xdsresource.Type, resourceName string) { - if a.logger.V(2) { - a.logger.Infof("Watch for resource %q of type %s timed out", resourceName, rType.TypeName()) - } - - resourceStates := a.resources[rType] - if resourceStates == nil { - if a.logger.V(2) { - a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName()) - } - return - } - state, ok := resourceStates[resourceName] - if !ok { - if a.logger.V(2) { - a.logger.Infof("Resource %q of type %s currently not being watched", resourceName, rType.TypeName()) - } - return - } - - state.cache = nil - state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} - for watcher := range state.watchers { - watcher := watcher - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName()), func() {}) - }) - } -} - -// handleRevertingToPrimaryOnUpdate is called when a resource update is received -// from the xDS client. -// -// If the update is from the currently active server, nothing is done. Else, all -// lower priority servers are closed and the active server is reverted to the -// highest priority server that sent the update. -// -// This method is only executed in the context of a serializer callback. -func (a *authority) handleRevertingToPrimaryOnUpdate(serverConfig *bootstrap.ServerConfig) { - if a.activeXDSChannel != nil && a.activeXDSChannel.serverConfig.Equal(serverConfig) { - // If the resource update is from the current active server, nothing - // needs to be done from fallback point of view. - return - } - - if a.logger.V(2) { - a.logger.Infof("Received update from non-active server %q", serverConfig) - } - - // If the resource update is not from the current active server, it means - // that we have received an update from a higher priority server and we need - // to revert back to it. This method guarantees that when an update is - // received from a server, all lower priority servers are closed. - serverIdx := a.serverIndexForConfig(serverConfig) - a.activeXDSChannel = a.xdsChannelConfigs[serverIdx] - - // Close all lower priority channels. - // - // But before closing any channel, we need to unsubscribe from any resources - // that were subscribed to on this channel. Resources could be subscribed to - // from multiple channels as we fallback to lower priority servers. But when - // a higher priority one comes back up, we need to unsubscribe from all - // lower priority ones before releasing the reference to them. - for i := serverIdx + 1; i < len(a.xdsChannelConfigs); i++ { - cfg := a.xdsChannelConfigs[i] - - for rType, rState := range a.resources { - for resourceName, state := range rState { - for xcc := range state.xdsChannelConfigs { - if xcc != cfg { - continue - } - // If the current resource is subscribed to on this channel, - // unsubscribe, and remove the channel from the list of - // channels that this resource is subscribed to. - xcc.channel.unsubscribe(rType, resourceName) - delete(state.xdsChannelConfigs, xcc) - } - } - } - - // Release the reference to the channel. - if cfg.cleanup != nil { - if a.logger.V(2) { - a.logger.Infof("Closing lower priority server %q", cfg.serverConfig) - } - cfg.cleanup() - cfg.cleanup = nil - } - cfg.channel = nil - } -} - -// watchResource registers a new watcher for the specified resource type and -// name. It returns a function that can be called to cancel the watch. -// -// If this is the first watch for any resource on this authority, an xdsChannel -// to the first management server (from the list of server configurations) will -// be created. -// -// If this is the first watch for the given resource name, it will subscribe to -// the resource with the xdsChannel. If a cached copy of the resource exists, it -// will immediately notify the new watcher. When the last watcher for a resource -// is removed, it will unsubscribe the resource from the xdsChannel. -func (a *authority) watchResource(rType xdsresource.Type, resourceName string, watcher xdsresource.ResourceWatcher) func() { - cleanup := func() {} - done := make(chan struct{}) - - a.xdsClientSerializer.ScheduleOr(func(context.Context) { - defer close(done) - - if a.logger.V(2) { - a.logger.Infof("New watch for type %q, resource name %q", rType.TypeName(), resourceName) - } - - xdsChannel, err := a.xdsChannelToUse() - if err != nil { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) - return - } - - // Lookup the entry for the resource type in the top-level map. If there is - // no entry for this resource type, create one. - resources := a.resources[rType] - if resources == nil { - resources = make(map[string]*resourceState) - a.resources[rType] = resources - } - - // Lookup the resource state for the particular resource name that the watch - // is being registered for. If this is the first watch for this resource - // name, request it from the management server. - state := resources[resourceName] - if state == nil { - if a.logger.V(2) { - a.logger.Infof("First watch for type %q, resource name %q", rType.TypeName(), resourceName) - } - state = &resourceState{ - watchers: make(map[xdsresource.ResourceWatcher]bool), - md: xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusRequested}, - xdsChannelConfigs: map[*xdsChannelWithConfig]bool{xdsChannel: true}, - } - resources[resourceName] = state - xdsChannel.channel.subscribe(rType, resourceName) - } - // Always add the new watcher to the set of watchers. - state.watchers[watcher] = true - - // If we have a cached copy of the resource, notify the new watcher - // immediately. - if state.cache != nil { - if a.logger.V(2) { - a.logger.Infof("Resource type %q with resource name %q found in cache: %s", rType.TypeName(), resourceName, state.cache.ToJSON()) - } - // state can only be accessed in the context of an - // xdsClientSerializer callback. Hence making a copy of the cached - // resource here for watchCallbackSerializer. - resource := state.cache - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceChanged(resource, func() {}) }) - } - // If last update was NACK'd, notify the new watcher of error - // immediately as well. - if state.md.Status == xdsresource.ServiceStatusNACKed { - if a.logger.V(2) { - a.logger.Infof("Resource type %q with resource name %q was NACKed", rType.TypeName(), resourceName) - } - // state can only be accessed in the context of an - // xdsClientSerializer callback. Hence making a copy of the error - // here for watchCallbackSerializer. - err := state.md.ErrState.Err - if state.cache == nil { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) - } else { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) - } - } - // If the metadata field is updated to indicate that the management - // server does not have this resource, notify the new watcher. - if state.md.Status == xdsresource.ServiceStatusNotExist { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName()), func() {}) - }) - } - cleanup = a.unwatchResource(rType, resourceName, watcher) - }, func() { - if a.logger.V(2) { - a.logger.Infof("Failed to schedule a watch for type %q, resource name %q, because the xDS client is closed", rType.TypeName(), resourceName) - } - close(done) - }) - <-done - return cleanup -} - -func (a *authority) unwatchResource(rType xdsresource.Type, resourceName string, watcher xdsresource.ResourceWatcher) func() { - return sync.OnceFunc(func() { - done := make(chan struct{}) - a.xdsClientSerializer.ScheduleOr(func(context.Context) { - defer close(done) - - if a.logger.V(2) { - a.logger.Infof("Canceling a watch for type %q, resource name %q", rType.TypeName(), resourceName) - } - - // Lookup the resource type from the resource cache. The entry is - // guaranteed to be present, since *we* were the ones who added it in - // there when the watch was registered. - resources := a.resources[rType] - state := resources[resourceName] - - // Delete this particular watcher from the list of watchers, so that its - // callback will not be invoked in the future. - delete(state.watchers, watcher) - if len(state.watchers) > 0 { - if a.logger.V(2) { - a.logger.Infof("Other watchers exist for type %q, resource name %q", rType.TypeName(), resourceName) - } - return - } - - // There are no more watchers for this resource. Unsubscribe this - // resource from all channels where it was subscribed to and delete - // the state associated with it. - if a.logger.V(2) { - a.logger.Infof("Removing last watch for resource name %q", resourceName) - } - for xcc := range state.xdsChannelConfigs { - xcc.channel.unsubscribe(rType, resourceName) - } - delete(resources, resourceName) - - // If there are no more watchers for this resource type, delete the - // resource type from the top-level map. - if len(resources) == 0 { - if a.logger.V(2) { - a.logger.Infof("Removing last watch for resource type %q", rType.TypeName()) - } - delete(a.resources, rType) - } - // If there are no more watchers for any resource type, release the - // reference to the xdsChannels. - if len(a.resources) == 0 { - if a.logger.V(2) { - a.logger.Infof("Removing last watch for for any resource type, releasing reference to the xdsChannel") - } - a.closeXDSChannels() - } - }, func() { close(done) }) - <-done - }) -} - -// xdsChannelToUse returns the xdsChannel to use for communicating with the -// management server. If an active channel is available, it returns that. -// Otherwise, it creates a new channel using the first server configuration in -// the list of configurations, and returns that. -// -// A non-nil error is returned if the channel creation fails. -// -// Only executed in the context of a serializer callback. -func (a *authority) xdsChannelToUse() (*xdsChannelWithConfig, error) { - if a.activeXDSChannel != nil { - return a.activeXDSChannel, nil - } - - sc := a.xdsChannelConfigs[0].serverConfig - xc, cleanup, err := a.getChannelForADS(sc, a) - if err != nil { - return nil, err - } - a.xdsChannelConfigs[0].channel = xc - a.xdsChannelConfigs[0].cleanup = cleanup - a.activeXDSChannel = a.xdsChannelConfigs[0] - return a.activeXDSChannel, nil -} - -// closeXDSChannels closes all the xDS channels associated with this authority, -// when there are no more watchers for any resource type. -// -// Only executed in the context of a serializer callback. -func (a *authority) closeXDSChannels() { - for _, xcc := range a.xdsChannelConfigs { - if xcc.cleanup != nil { - xcc.cleanup() - xcc.cleanup = nil - } - xcc.channel = nil - } - a.activeXDSChannel = nil -} - -// watcherExistsForUncachedResource returns true if there is at least one -// watcher for a resource that has not yet been cached. -// -// Only executed in the context of a serializer callback. -func (a *authority) watcherExistsForUncachedResource() bool { - for _, resourceStates := range a.resources { - for _, state := range resourceStates { - if state.md.Status == xdsresource.ServiceStatusRequested { - return true - } - } - } - return false -} - -// dumpResources returns a dump of the resource configuration cached by this -// authority, for CSDS purposes. -func (a *authority) dumpResources() []*v3statuspb.ClientConfig_GenericXdsConfig { - var ret []*v3statuspb.ClientConfig_GenericXdsConfig - done := make(chan struct{}) - - a.xdsClientSerializer.ScheduleOr(func(context.Context) { - defer close(done) - ret = a.resourceConfig() - }, func() { close(done) }) - <-done - return ret -} - -// resourceConfig returns a slice of GenericXdsConfig objects representing the -// current state of all resources managed by this authority. This is used for -// reporting the current state of the xDS client. -// -// Only executed in the context of a serializer callback. -func (a *authority) resourceConfig() []*v3statuspb.ClientConfig_GenericXdsConfig { - var ret []*v3statuspb.ClientConfig_GenericXdsConfig - for rType, resourceStates := range a.resources { - typeURL := rType.TypeURL() - for name, state := range resourceStates { - var raw *anypb.Any - if state.cache != nil { - raw = state.cache.Raw() - } - config := &v3statuspb.ClientConfig_GenericXdsConfig{ - TypeUrl: typeURL, - Name: name, - VersionInfo: state.md.Version, - XdsConfig: raw, - LastUpdated: timestamppb.New(state.md.Timestamp), - ClientStatus: serviceStatusToProto(state.md.Status), - } - if errState := state.md.ErrState; errState != nil { - config.ErrorState = &v3adminpb.UpdateFailureState{ - LastUpdateAttempt: timestamppb.New(errState.Timestamp), - Details: errState.Err.Error(), - VersionInfo: errState.Version, - } - } - ret = append(ret, config) - } - } - return ret -} - -func (a *authority) close() { - a.xdsClientSerializerClose() - <-a.xdsClientSerializer.Done() - if a.logger.V(2) { - a.logger.Infof("Closed") - } -} - -func serviceStatusToProto(serviceStatus xdsresource.ServiceStatus) v3adminpb.ClientResourceStatus { - switch serviceStatus { - case xdsresource.ServiceStatusUnknown: - return v3adminpb.ClientResourceStatus_UNKNOWN - case xdsresource.ServiceStatusRequested: - return v3adminpb.ClientResourceStatus_REQUESTED - case xdsresource.ServiceStatusNotExist: - return v3adminpb.ClientResourceStatus_DOES_NOT_EXIST - case xdsresource.ServiceStatusACKed: - return v3adminpb.ClientResourceStatus_ACKED - case xdsresource.ServiceStatusNACKed: - return v3adminpb.ClientResourceStatus_NACKED - default: - return v3adminpb.ClientResourceStatus_UNKNOWN - } -} diff --git a/xds/internal/xdsclient/channel.go b/xds/internal/xdsclient/channel.go deleted file mode 100644 index 60ab9290b553..000000000000 --- a/xds/internal/xdsclient/channel.go +++ /dev/null @@ -1,342 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package xdsclient - -import ( - "errors" - "fmt" - "strings" - "time" - - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal/backoff" - igrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/grpcsync" - "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient/load" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" - "google.golang.org/grpc/xds/internal/xdsclient/transport/lrs" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" -) - -// xdsChannelEventHandler wraps callbacks used to notify the xDS client about -// events on the xdsChannel. Methods in this interface may be invoked -// concurrently and the xDS client implementation needs to handle them in a -// thread-safe manner. -type xdsChannelEventHandler interface { - // adsStreamFailure is called when the xdsChannel encounters an ADS stream - // failure. - adsStreamFailure(error) - - // adsResourceUpdate is called when the xdsChannel receives an ADS response - // from the xDS management server. The callback is provided with the - // following: - // - the resource type of the resources in the response - // - a map of resources in the response, keyed by resource name - // - the metadata associated with the response - // - a callback to be invoked when the updated is processed - adsResourceUpdate(xdsresource.Type, map[string]ads.DataAndErrTuple, xdsresource.UpdateMetadata, func()) - - // adsResourceDoesNotExist is called when the xdsChannel determines that a - // requested ADS resource does not exist. - adsResourceDoesNotExist(xdsresource.Type, string) -} - -// xdsChannelOpts holds the options for creating a new xdsChannel. -type xdsChannelOpts struct { - transport transport.Transport // Takes ownership of this transport. - serverConfig *bootstrap.ServerConfig // Configuration of the server to connect to. - bootstrapConfig *bootstrap.Config // Complete bootstrap configuration, used to decode resources. - resourceTypeGetter func(string) xdsresource.Type // Function to retrieve resource parsing functionality, based on resource type. - eventHandler xdsChannelEventHandler // Callbacks for ADS stream events. - backoff func(int) time.Duration // Backoff function to use for stream retries. Defaults to exponential backoff, if unset. - watchExpiryTimeout time.Duration // Timeout for ADS resource watch expiry. - logPrefix string // Prefix to use for logging. -} - -// newXDSChannel creates a new xdsChannel instance with the provided options. -// It performs basic validation on the provided options and initializes the -// xdsChannel with the necessary components. -func newXDSChannel(opts xdsChannelOpts) (*xdsChannel, error) { - switch { - case opts.transport == nil: - return nil, errors.New("xdsChannel: transport is nil") - case opts.serverConfig == nil: - return nil, errors.New("xdsChannel: serverConfig is nil") - case opts.bootstrapConfig == nil: - return nil, errors.New("xdsChannel: bootstrapConfig is nil") - case opts.resourceTypeGetter == nil: - return nil, errors.New("xdsChannel: resourceTypeGetter is nil") - case opts.eventHandler == nil: - return nil, errors.New("xdsChannel: eventHandler is nil") - } - - xc := &xdsChannel{ - transport: opts.transport, - serverConfig: opts.serverConfig, - bootstrapConfig: opts.bootstrapConfig, - resourceTypeGetter: opts.resourceTypeGetter, - eventHandler: opts.eventHandler, - closed: grpcsync.NewEvent(), - } - - l := grpclog.Component("xds") - logPrefix := opts.logPrefix + fmt.Sprintf("[xds-channel %p] ", xc) - xc.logger = igrpclog.NewPrefixLogger(l, logPrefix) - - if opts.backoff == nil { - opts.backoff = backoff.DefaultExponential.Backoff - } - xc.ads = ads.NewStreamImpl(ads.StreamOpts{ - Transport: xc.transport, - EventHandler: xc, - Backoff: opts.backoff, - NodeProto: xc.bootstrapConfig.Node(), - WatchExpiryTimeout: opts.watchExpiryTimeout, - LogPrefix: logPrefix, - }) - xc.lrs = lrs.NewStreamImpl(lrs.StreamOpts{ - Transport: xc.transport, - Backoff: opts.backoff, - NodeProto: xc.bootstrapConfig.Node(), - LogPrefix: logPrefix, - }) - return xc, nil -} - -// xdsChannel represents a client channel to a management server, and is -// responsible for managing the lifecycle of the ADS and LRS streams. It invokes -// callbacks on the registered event handler for various ADS stream events. -type xdsChannel struct { - // The following fields are initialized at creation time and are read-only - // after that, and hence need not be guarded by a mutex. - transport transport.Transport // Takes ownership of this transport (used to make streaming calls). - ads *ads.StreamImpl // An ADS stream to the management server. - lrs *lrs.StreamImpl // An LRS stream to the management server. - serverConfig *bootstrap.ServerConfig // Configuration of the server to connect to. - bootstrapConfig *bootstrap.Config // Complete bootstrap configuration, used to decode resources. - resourceTypeGetter func(string) xdsresource.Type // Function to retrieve resource parsing functionality, based on resource type. - eventHandler xdsChannelEventHandler // Callbacks for ADS stream events. - logger *igrpclog.PrefixLogger // Logger to use for logging. - closed *grpcsync.Event // Fired when the channel is closed. -} - -func (xc *xdsChannel) close() { - xc.closed.Fire() - xc.ads.Stop() - xc.lrs.Stop() - xc.transport.Close() - xc.logger.Infof("Shutdown") -} - -// reportLoad returns a load.Store that can be used to report load to the LRS, and a -// function that can be called to stop reporting load. -func (xc *xdsChannel) reportLoad() (*load.Store, func()) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Attempt to start load reporting on closed channel") - } - return nil, func() {} - } - return xc.lrs.ReportLoad() -} - -// subscribe adds a subscription for the given resource name of the given -// resource type on the ADS stream. -func (xc *xdsChannel) subscribe(typ xdsresource.Type, name string) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Attempt to subscribe to an xDS resource of type %s and name %q on a closed channel", typ.TypeName(), name) - } - return - } - xc.ads.Subscribe(typ, name) -} - -// unsubscribe removes the subscription for the given resource name of the given -// resource type from the ADS stream. -func (xc *xdsChannel) unsubscribe(typ xdsresource.Type, name string) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Attempt to unsubscribe to an xDS resource of type %s and name %q on a closed channel", typ.TypeName(), name) - } - return - } - xc.ads.Unsubscribe(typ, name) -} - -// The following OnADSXxx() methods implement the ads.StreamEventHandler interface -// and are invoked by the ADS stream implementation. - -// OnADSStreamError is invoked when an error occurs on the ADS stream. It -// propagates the update to the xDS client. -func (xc *xdsChannel) OnADSStreamError(err error) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Received ADS stream error on a closed xdsChannel: %v", err) - } - return - } - xc.eventHandler.adsStreamFailure(err) -} - -// OnADSWatchExpiry is invoked when a watch for a resource expires. It -// propagates the update to the xDS client. -func (xc *xdsChannel) OnADSWatchExpiry(typ xdsresource.Type, name string) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Received ADS resource watch expiry for resource %q on a closed xdsChannel", name) - } - return - } - xc.eventHandler.adsResourceDoesNotExist(typ, name) -} - -// OnADSResponse is invoked when a response is received on the ADS stream. It -// decodes the resources in the response, and propagates the updates to the xDS -// client. -// -// It returns the list of resource names in the response and any errors -// encountered during decoding. -func (xc *xdsChannel) OnADSResponse(resp ads.Response, onDone func()) ([]string, error) { - if xc.closed.HasFired() { - if xc.logger.V(2) { - xc.logger.Infof("Received an update from the ADS stream on closed ADS stream") - } - return nil, errors.New("xdsChannel is closed") - } - - // Lookup the resource parser based on the resource type. - rType := xc.resourceTypeGetter(resp.TypeURL) - if rType == nil { - return nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceTypeUnsupported, "Resource type URL %q unknown in response from server", resp.TypeURL) - } - - // Decode the resources and build the list of resource names to return. - opts := &xdsresource.DecodeOptions{ - BootstrapConfig: xc.bootstrapConfig, - ServerConfig: xc.serverConfig, - } - updates, md, err := decodeResponse(opts, rType, resp) - var names []string - for name := range updates { - names = append(names, name) - } - - xc.eventHandler.adsResourceUpdate(rType, updates, md, onDone) - return names, err -} - -// decodeResponse decodes the resources in the given ADS response. -// -// The opts parameter provides configuration options for decoding the resources. -// The rType parameter specifies the resource type parser to use for decoding -// the resources. -// -// The returned map contains a key for each resource in the response, with the -// value being either the decoded resource data or an error if decoding failed. -// The returned metadata includes the version of the response, the timestamp of -// the update, and the status of the update (ACKed or NACKed). -// -// If there are any errors decoding the resources, the metadata will indicate -// that the update was NACKed, and the returned error will contain information -// about all errors encountered by this function. -func decodeResponse(opts *xdsresource.DecodeOptions, rType xdsresource.Type, resp ads.Response) (map[string]ads.DataAndErrTuple, xdsresource.UpdateMetadata, error) { - timestamp := time.Now() - md := xdsresource.UpdateMetadata{ - Version: resp.Version, - Timestamp: timestamp, - } - - topLevelErrors := make([]error, 0) // Tracks deserialization errors, where we don't have a resource name. - perResourceErrors := make(map[string]error) // Tracks resource validation errors, where we have a resource name. - ret := make(map[string]ads.DataAndErrTuple) // Return result, a map from resource name to either resource data or error. - for _, r := range resp.Resources { - result, err := rType.Decode(opts, r) - - // Name field of the result is left unpopulated only when resource - // deserialization fails. - name := "" - if result != nil { - name = xdsresource.ParseName(result.Name).String() - } - if err == nil { - ret[name] = ads.DataAndErrTuple{Resource: result.Resource} - continue - } - if name == "" { - topLevelErrors = append(topLevelErrors, err) - continue - } - perResourceErrors[name] = err - // Add place holder in the map so we know this resource name was in - // the response. - ret[name] = ads.DataAndErrTuple{Err: xdsresource.NewError(xdsresource.ErrorTypeNACKed, err.Error())} - } - - if len(topLevelErrors) == 0 && len(perResourceErrors) == 0 { - md.Status = xdsresource.ServiceStatusACKed - return ret, md, nil - } - - md.Status = xdsresource.ServiceStatusNACKed - errRet := combineErrors(rType.TypeName(), topLevelErrors, perResourceErrors) - md.ErrState = &xdsresource.UpdateErrorMetadata{ - Version: resp.Version, - Err: xdsresource.NewError(xdsresource.ErrorTypeNACKed, errRet.Error()), - Timestamp: timestamp, - } - return ret, md, errRet -} - -func combineErrors(rType string, topLevelErrors []error, perResourceErrors map[string]error) error { - var errStrB strings.Builder - errStrB.WriteString(fmt.Sprintf("error parsing %q response: ", rType)) - if len(topLevelErrors) > 0 { - errStrB.WriteString("top level errors: ") - for i, err := range topLevelErrors { - if i != 0 { - errStrB.WriteString(";\n") - } - errStrB.WriteString(err.Error()) - } - } - if len(perResourceErrors) > 0 { - var i int - for name, err := range perResourceErrors { - if i != 0 { - errStrB.WriteString(";\n") - } - i++ - errStrB.WriteString(fmt.Sprintf("resource %q: %v", name, err.Error())) - } - } - return errors.New(errStrB.String()) -} - -func (xc *xdsChannel) triggerResourceNotFoundForTesting(rType xdsresource.Type, resourceName string) error { - if xc.closed.HasFired() { - return fmt.Errorf("triggerResourceNotFoundForTesting() called on a closed channel") - } - if xc.logger.V(2) { - xc.logger.Infof("Triggering resource not found for type: %s, resource name: %s", rType.TypeName(), resourceName) - } - xc.ads.TriggerResourceNotFoundForTesting(rType, resourceName) - return nil -} diff --git a/xds/internal/xdsclient/channel_test.go b/xds/internal/xdsclient/channel_test.go deleted file mode 100644 index 976240ebc22a..000000000000 --- a/xds/internal/xdsclient/channel_test.go +++ /dev/null @@ -1,972 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package xdsclient - -import ( - "context" - "fmt" - "strings" - "testing" - "time" - - "github.com/envoyproxy/go-control-plane/pkg/wellknown" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/google/uuid" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/internal/testutils/xds/fakeserver" - "google.golang.org/grpc/internal/xds/bootstrap" - xdsinternal "google.golang.org/grpc/xds/internal" - "google.golang.org/grpc/xds/internal/httpfilter" - "google.golang.org/grpc/xds/internal/httpfilter/router" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" - "google.golang.org/grpc/xds/internal/xdsclient/transport/grpctransport" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - "google.golang.org/protobuf/testing/protocmp" - "google.golang.org/protobuf/types/known/anypb" - "google.golang.org/protobuf/types/known/durationpb" - - v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" - v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" - v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" - v3routerpb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/http/router/v3" - v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" -) - -// Lookup the listener resource type from the resource type map. This is used to -// parse listener resources used in this test. -var listenerType = xdsinternal.ResourceTypeMapForTesting[version.V3ListenerURL].(xdsresource.Type) - -// xdsChannelForTest creates an xdsChannel to the specified serverURI for -// testing purposes. -func xdsChannelForTest(t *testing.T, serverURI, nodeID string, watchExpiryTimeout time.Duration) *xdsChannel { - t.Helper() - - // Create server configuration for the above management server. - serverCfg, err := bootstrap.ServerConfigForTesting(bootstrap.ServerConfigTestingOptions{URI: serverURI}) - if err != nil { - t.Fatalf("Failed to create server config for testing: %v", err) - } - - // Create a grpc transport to the above management server. - tr, err := (&grpctransport.Builder{}).Build(transport.BuildOptions{ServerConfig: serverCfg}) - if err != nil { - t.Fatalf("Failed to create a transport for server config %s: %v", serverCfg, err) - } - - // Create bootstrap configuration with the top-level xds servers - // field containing the server configuration for the above - // management server. - contents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ - Servers: []byte(fmt.Sprintf(`[{ - "server_uri": %q, - "channel_creds": [{"type": "insecure"}] - }]`, serverURI)), - Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), - }) - if err != nil { - t.Fatalf("Failed to create bootstrap contents: %v", err) - } - bootstrapCfg, err := bootstrap.NewConfigFromContents(contents) - if err != nil { - t.Fatalf("Failed to create bootstrap configuration: %v", err) - } - - // Create an xdsChannel that uses everything set up above. - xc, err := newXDSChannel(xdsChannelOpts{ - transport: tr, - serverConfig: serverCfg, - bootstrapConfig: bootstrapCfg, - resourceTypeGetter: func(typeURL string) xdsresource.Type { - if typeURL != "type.googleapis.com/envoy.config.listener.v3.Listener" { - return nil - } - return listenerType - }, - eventHandler: newTestEventHandler(), - watchExpiryTimeout: watchExpiryTimeout, - }) - if err != nil { - t.Fatalf("Failed to create xdsChannel: %v", err) - } - t.Cleanup(func() { xc.close() }) - return xc -} - -// verifyUpdateAndMetadata verifies that the event handler received the expected -// updates and metadata. It checks that the received resource type matches the -// expected type, and that the received updates and metadata match the expected -// values. The function ignores the timestamp fields in the metadata, as those -// are expected to be different. -func verifyUpdateAndMetadata(ctx context.Context, t *testing.T, eh *testEventHandler, wantUpdates map[string]ads.DataAndErrTuple, wantMD xdsresource.UpdateMetadata) { - t.Helper() - - gotTyp, gotUpdates, gotMD, err := eh.waitForUpdate(ctx) - if err != nil { - t.Fatalf("Timeout when waiting for update callback to be invoked on the event handler") - } - - if gotTyp != listenerType { - t.Fatalf("Got resource type %v, want %v", gotTyp, listenerType) - } - opts := cmp.Options{ - protocmp.Transform(), - cmpopts.EquateEmpty(), - cmpopts.EquateErrors(), - cmpopts.IgnoreFields(xdsresource.UpdateMetadata{}, "Timestamp"), - cmpopts.IgnoreFields(xdsresource.UpdateErrorMetadata{}, "Timestamp"), - } - if diff := cmp.Diff(wantUpdates, gotUpdates, opts); diff != "" { - t.Fatalf("Got unexpected diff in update (-want +got):\n%s\n want: %+v\n got: %+v", diff, wantUpdates, gotUpdates) - } - if diff := cmp.Diff(wantMD, gotMD, opts); diff != "" { - t.Fatalf("Got unexpected diff in update (-want +got):\n%s\n want: %v\n got: %v", diff, wantMD, gotMD) - } -} - -// Tests different failure cases when creating a new xdsChannel. It checks that -// the xdsChannel creation fails when any of the required options (transport, -// serverConfig, bootstrapConfig, or resourceTypeGetter) are missing or nil. -func (s) TestChannel_New_FailureCases(t *testing.T) { - type fakeTransport struct { - transport.Transport - } - - tests := []struct { - name string - opts xdsChannelOpts - wantErrStr string - }{ - { - name: "emptyTransport", - opts: xdsChannelOpts{}, - wantErrStr: "transport is nil", - }, - { - name: "emptyServerConfig", - opts: xdsChannelOpts{transport: &fakeTransport{}}, - wantErrStr: "serverConfig is nil", - }, - { - name: "emptyBootstrapConfig", - opts: xdsChannelOpts{ - transport: &fakeTransport{}, - serverConfig: &bootstrap.ServerConfig{}, - }, - wantErrStr: "bootstrapConfig is nil", - }, - { - name: "emptyResourceTypeGetter", - opts: xdsChannelOpts{ - transport: &fakeTransport{}, - serverConfig: &bootstrap.ServerConfig{}, - bootstrapConfig: &bootstrap.Config{}, - }, - wantErrStr: "resourceTypeGetter is nil", - }, - { - name: "emptyEventHandler", - opts: xdsChannelOpts{ - transport: &fakeTransport{}, - serverConfig: &bootstrap.ServerConfig{}, - bootstrapConfig: &bootstrap.Config{}, - resourceTypeGetter: func(string) xdsresource.Type { return nil }, - }, - wantErrStr: "eventHandler is nil", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if _, err := newXDSChannel(test.opts); err == nil || !strings.Contains(err.Error(), test.wantErrStr) { - t.Fatalf("newXDSChannel() = %v, want %q", err, test.wantErrStr) - } - }) - } -} - -// Tests different scenarios of the xdsChannel receiving a response from the -// management server. In all scenarios, the xdsChannel is expected to pass the -// received responses as-is to the resource parsing functionality specified by -// the resourceTypeGetter. -func (s) TestChannel_ADS_HandleResponseFromManagementServer(t *testing.T) { - const ( - listenerName1 = "listener-name-1" - listenerName2 = "listener-name-2" - routeName = "route-name" - clusterName = "cluster-name" - ) - var ( - badlyMarshaledResource = &anypb.Any{ - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Value: []byte{1, 2, 3, 4}, - } - apiListener = &v3listenerpb.ApiListener{ - ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ - RouteSpecifier: &v3httppb.HttpConnectionManager_RouteConfig{ - RouteConfig: &v3routepb.RouteConfiguration{ - Name: routeName, - VirtualHosts: []*v3routepb.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*v3routepb.Route{{ - Match: &v3routepb.RouteMatch{ - PathSpecifier: &v3routepb.RouteMatch_Prefix{Prefix: "/"}, - }, - Action: &v3routepb.Route_Route{ - Route: &v3routepb.RouteAction{ - ClusterSpecifier: &v3routepb.RouteAction_Cluster{Cluster: clusterName}, - }}}}}}}, - }, - HttpFilters: []*v3httppb.HttpFilter{e2e.RouterHTTPFilter}, - CommonHttpProtocolOptions: &v3corepb.HttpProtocolOptions{ - MaxStreamDuration: durationpb.New(time.Second), - }, - }), - } - listener1 = testutils.MarshalAny(t, &v3listenerpb.Listener{ - Name: listenerName1, - ApiListener: apiListener, - }) - listener2 = testutils.MarshalAny(t, &v3listenerpb.Listener{ - Name: listenerName2, - ApiListener: apiListener, - }) - ) - - tests := []struct { - desc string - resourceNamesToRequest []string - managementServerResponse *v3discoverypb.DiscoveryResponse - wantUpdates map[string]ads.DataAndErrTuple - wantMD xdsresource.UpdateMetadata - wantErr error - }{ - { - desc: "one bad resource - deserialization failure", - resourceNamesToRequest: []string{listenerName1}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{badlyMarshaledResource}, - }, - wantUpdates: nil, // No updates expected as the response runs into unmarshaling errors. - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusNACKed, - Version: "0", - ErrState: &xdsresource.UpdateErrorMetadata{ - Version: "0", - Err: cmpopts.AnyError, - }, - }, - wantErr: cmpopts.AnyError, - }, - { - desc: "one bad resource - validation failure", - resourceNamesToRequest: []string{listenerName1}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{ - Name: listenerName1, - ApiListener: &v3listenerpb.ApiListener{ - ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ - RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{}, - }), - }, - })}, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName1: { - Err: cmpopts.AnyError, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusNACKed, - Version: "0", - ErrState: &xdsresource.UpdateErrorMetadata{ - Version: "0", - Err: cmpopts.AnyError, - }, - }, - }, - { - desc: "two bad resources", - resourceNamesToRequest: []string{listenerName1, listenerName2}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{ - badlyMarshaledResource, - testutils.MarshalAny(t, &v3listenerpb.Listener{ - Name: listenerName2, - ApiListener: &v3listenerpb.ApiListener{ - ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ - RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{}, - }), - }, - }), - }, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName2: { - Err: cmpopts.AnyError, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusNACKed, - Version: "0", - ErrState: &xdsresource.UpdateErrorMetadata{ - Version: "0", - Err: cmpopts.AnyError, - }, - }, - }, - { - desc: "one good resource", - resourceNamesToRequest: []string{listenerName1}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{listener1}, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName1: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener1, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusACKed, - Version: "0", - }, - }, - { - desc: "one good and one bad - deserialization failure", - resourceNamesToRequest: []string{listenerName1, listenerName2}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{ - badlyMarshaledResource, - listener2, - }, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName2: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener2, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusNACKed, - Version: "0", - ErrState: &xdsresource.UpdateErrorMetadata{ - Version: "0", - Err: cmpopts.AnyError, - }, - }, - }, - { - desc: "one good and one bad - validation failure", - resourceNamesToRequest: []string{listenerName1, listenerName2}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{ - testutils.MarshalAny(t, &v3listenerpb.Listener{ - Name: listenerName1, - ApiListener: &v3listenerpb.ApiListener{ - ApiListener: testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ - RouteSpecifier: &v3httppb.HttpConnectionManager_ScopedRoutes{}, - }), - }, - }), - listener2, - }, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName1: {Err: cmpopts.AnyError}, - listenerName2: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener2, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusNACKed, - Version: "0", - ErrState: &xdsresource.UpdateErrorMetadata{ - Version: "0", - Err: cmpopts.AnyError, - }, - }, - }, - { - desc: "two good resources", - resourceNamesToRequest: []string{listenerName1, listenerName2}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{listener1, listener2}, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName1: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener1, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - listenerName2: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener2, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusACKed, - Version: "0", - }, - }, - { - desc: "two resources when we requested one", - resourceNamesToRequest: []string{listenerName1}, - managementServerResponse: &v3discoverypb.DiscoveryResponse{ - VersionInfo: "0", - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - Resources: []*anypb.Any{listener1, listener2}, - }, - wantUpdates: map[string]ads.DataAndErrTuple{ - listenerName1: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener1, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - listenerName2: { - Resource: &xdsresource.ListenerResourceData{Resource: xdsresource.ListenerUpdate{ - InlineRouteConfig: &xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{{ - Domains: []string{"*"}, - Routes: []*xdsresource.Route{{ - Prefix: newStringP("/"), - WeightedClusters: map[string]xdsresource.WeightedCluster{clusterName: {Weight: 1}}, - ActionType: xdsresource.RouteActionRoute}, - }, - }}}, - MaxStreamDuration: time.Second, - Raw: listener2, - HTTPFilters: makeRouterFilterList(t), - }}, - }, - }, - wantMD: xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusACKed, - Version: "0", - }, - }, - } - - for _, test := range tests { - t.Run(test.desc, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Start a fake xDS management server and configure the response it - // would send to its client. - mgmtServer, cleanup, err := fakeserver.StartServer(nil) - if err != nil { - t.Fatalf("Failed to start fake xDS server: %v", err) - } - defer cleanup() - t.Logf("Started xDS management server on %s", mgmtServer.Address) - mgmtServer.XDSResponseChan <- &fakeserver.Response{Resp: test.managementServerResponse} - - // Create an xdsChannel for the test with a long watch expiry timer - // to ensure that watches don't expire for the duration of the test. - nodeID := uuid.New().String() - xc := xdsChannelForTest(t, mgmtServer.Address, nodeID, 2*defaultTestTimeout) - defer xc.close() - - // Subscribe to the resources specified in the test table. - for _, name := range test.resourceNamesToRequest { - xc.subscribe(listenerType, name) - } - - // Wait for an update callback on the event handler and verify the - // contents of the update and the metadata. - verifyUpdateAndMetadata(ctx, t, xc.eventHandler.(*testEventHandler), test.wantUpdates, test.wantMD) - }) - } -} - -// Tests that the xdsChannel correctly handles the expiry of a watch for a -// resource by ensuring that the watch expiry callback is invoked on the event -// handler with the expected resource type and name. -func (s) TestChannel_ADS_HandleResponseWatchExpiry(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Start an xDS management server, but do not configure any resources on it. - // This will result in the watch for a resource to timeout. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create an xdsChannel for the test with a short watch expiry timer to - // ensure that the test does not run very long, as it needs to wait for the - // watch to expire. - nodeID := uuid.New().String() - xc := xdsChannelForTest(t, mgmtServer.Address, nodeID, 2*defaultTestShortTimeout) - defer xc.close() - - // Subscribe to a listener resource. - const listenerName = "listener-name" - xc.subscribe(listenerType, listenerName) - - // Wait for the watch expiry callback on the authority to be invoked and - // verify that the watch expired for the expected resource name and type. - eventHandler := xc.eventHandler.(*testEventHandler) - gotTyp, gotName, err := eventHandler.waitForResourceDoesNotExist(ctx) - if err != nil { - t.Fatal("Timeout when waiting for the watch expiry callback to be invoked on the xDS client") - } - - if gotTyp != listenerType { - t.Fatalf("Got type %v, want %v", gotTyp, listenerType) - } - if gotName != listenerName { - t.Fatalf("Got name %v, want %v", gotName, listenerName) - } -} - -// Tests that the xdsChannel correctly handles stream failures by ensuring that -// the stream failure callback is invoked on the event handler. -func (s) TestChannel_ADS_StreamFailure(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Start an xDS management server with a restartable listener to simulate - // connection failures. - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("net.Listen() failed: %v", err) - } - lis := testutils.NewRestartableListener(l) - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis}) - - // Configure a listener resource on the management server. - const listenerResourceName = "test-listener-resource" - const routeConfigurationName = "test-route-configuration-resource" - nodeID := uuid.New().String() - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Create an xdsChannel for the test with a long watch expiry timer - // to ensure that watches don't expire for the duration of the test. - xc := xdsChannelForTest(t, mgmtServer.Address, nodeID, 2*defaultTestTimeout) - defer xc.close() - - // Subscribe to the resource created above. - xc.subscribe(listenerType, listenerResourceName) - - // Wait for an update callback on the event handler and verify the - // contents of the update and the metadata. - hcm := testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ - RouteSpecifier: &v3httppb.HttpConnectionManager_Rds{Rds: &v3httppb.Rds{ - ConfigSource: &v3corepb.ConfigSource{ - ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{Ads: &v3corepb.AggregatedConfigSource{}}, - }, - RouteConfigName: routeConfigurationName, - }}, - HttpFilters: []*v3httppb.HttpFilter{e2e.HTTPFilter("router", &v3routerpb.Router{})}, - }) - listenerResource, err := anypb.New(&v3listenerpb.Listener{ - Name: listenerResourceName, - ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, - FilterChains: []*v3listenerpb.FilterChain{{ - Name: "filter-chain-name", - Filters: []*v3listenerpb.Filter{{ - Name: wellknown.HTTPConnectionManager, - ConfigType: &v3listenerpb.Filter_TypedConfig{TypedConfig: hcm}, - }}, - }}, - }) - if err != nil { - t.Fatalf("Failed to create listener resource: %v", err) - } - - wantUpdates := map[string]ads.DataAndErrTuple{ - listenerResourceName: { - Resource: &xdsresource.ListenerResourceData{ - Resource: xdsresource.ListenerUpdate{ - RouteConfigName: routeConfigurationName, - HTTPFilters: makeRouterFilterList(t), - Raw: listenerResource, - }, - }, - }, - } - wantMD := xdsresource.UpdateMetadata{ - Status: xdsresource.ServiceStatusACKed, - Version: "1", - } - - eventHandler := xc.eventHandler.(*testEventHandler) - verifyUpdateAndMetadata(ctx, t, eventHandler, wantUpdates, wantMD) - - lis.Stop() - if err := eventHandler.waitForStreamFailure(ctx); err != nil { - t.Fatalf("Timeout when waiting for the stream failure callback to be invoked on the xDS client: %v", err) - } -} - -// Tests the behavior of the xdsChannel when a resource is unsubscribed. -// Verifies that when a previously subscribed resource is unsubscribed, a -// request is sent without the previously subscribed resource name. -func (s) TestChannel_ADS_ResourceUnsubscribe(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Start an xDS management server that uses a channel to inform the test - // about the specific LDS resource names being requested. - ldsResourcesCh := make(chan []string, 1) - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ - OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { - t.Logf("Received request for resources: %v of type %s", req.GetResourceNames(), req.GetTypeUrl()) - - if req.TypeUrl != version.V3ListenerURL { - return fmt.Errorf("unexpected resource type URL: %q", req.TypeUrl) - } - - // Make the most recently requested names available to the test. - ldsResourcesCh <- req.GetResourceNames() - return nil - }, - }) - - // Configure two listener resources on the management server. - const listenerResourceName1 = "test-listener-resource-1" - const routeConfigurationName1 = "test-route-configuration-resource-1" - const listenerResourceName2 = "test-listener-resource-2" - const routeConfigurationName2 = "test-route-configuration-resource-2" - nodeID := uuid.New().String() - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{ - e2e.DefaultClientListener(listenerResourceName1, routeConfigurationName1), - e2e.DefaultClientListener(listenerResourceName2, routeConfigurationName2), - }, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Create an xdsChannel for the test with a long watch expiry timer - // to ensure that watches don't expire for the duration of the test. - xc := xdsChannelForTest(t, mgmtServer.Address, nodeID, 2*defaultTestTimeout) - defer xc.close() - - // Subscribe to the resources created above and verify that a request is - // sent for the same. - xc.subscribe(listenerType, listenerResourceName1) - xc.subscribe(listenerType, listenerResourceName2) - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerResourceName1, listenerResourceName2}); err != nil { - t.Fatal(err) - } - - // Wait for the above resources to be ACKed. - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerResourceName1, listenerResourceName2}); err != nil { - t.Fatal(err) - } - - // Unsubscribe to one of the resources created above, and ensure that the - // other resource is still being requested. - xc.unsubscribe(listenerType, listenerResourceName1) - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerResourceName2}); err != nil { - t.Fatal(err) - } - - // Since the version on the management server for the above resource is not - // changed, we will not receive an update from it for the one resource that - // we are still requesting. - - // Unsubscribe to the remaining resource, and ensure that no more resources - // are being requested. - xc.unsubscribe(listenerType, listenerResourceName2) - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{}); err != nil { - t.Fatal(err) - } -} - -// Tests the load reporting functionality of the xdsChannel. It creates an -// xdsChannel, starts load reporting, and verifies that an LRS streaming RPC is -// created. It then makes another call to the load reporting API and ensures -// that a new LRS stream is not created. Finally, it cancels the load reporting -// calls and ensures that the stream is closed when the last call is canceled. -// -// Note that this test does not actually report any load. That is already tested -// by an e2e style test in the xdsclient package. -func (s) TestChannel_LRS_ReportLoad(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Create a management server that serves LRS. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true}) - - // Create an xdsChannel for the test. Node id and watch expiry timer don't - // matter for LRS. - xc := xdsChannelForTest(t, mgmtServer.Address, "", defaultTestTimeout) - defer xc.close() - - // Start load reporting and verify that an LRS streaming RPC is created. - _, stopLRS1 := xc.reportLoad() - lrsServer := mgmtServer.LRSServer - if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { - t.Fatalf("Timeout when waiting for an LRS streaming RPC to be created: %v", err) - } - - // Make another call to the load reporting API, and ensure that a new LRS - // stream is not created. - _, stopLRS2 := xc.reportLoad() - sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer sCancel() - if _, err := lrsServer.LRSStreamOpenChan.Receive(sCtx); err != context.DeadlineExceeded { - t.Fatal("New LRS streaming RPC created when expected to use an existing one") - } - - // Cancel the first load reporting call, and ensure that the stream does not - // close (because we have another call open). - stopLRS1() - sCtx, sCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) - defer sCancel() - if _, err := lrsServer.LRSStreamCloseChan.Receive(sCtx); err != context.DeadlineExceeded { - t.Fatal("LRS stream closed when expected to stay open") - } - - // Cancel the second load reporting call, and ensure the stream is closed. - stopLRS2() - if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil { - t.Fatal("Timeout waiting for LRS stream to close") - } -} - -// waitForResourceNames waits for the wantNames to be received on namesCh. -// Returns a non-nil error if the context expires before that. -func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error { - t.Helper() - - var lastRequestedNames []string - for ; ; <-time.After(defaultTestShortTimeout) { - select { - case <-ctx.Done(): - return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames) - case gotNames := <-namesCh: - if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) { - return nil - } - lastRequestedNames = gotNames - } - } -} - -// newTestEventHandler creates a new testEventHandler instance with the -// necessary channels for testing the xdsChannel. -func newTestEventHandler() *testEventHandler { - return &testEventHandler{ - typeCh: make(chan xdsresource.Type, 1), - updateCh: make(chan map[string]ads.DataAndErrTuple, 1), - mdCh: make(chan xdsresource.UpdateMetadata, 1), - nameCh: make(chan string, 1), - connErrCh: make(chan error, 1), - } -} - -// testEventHandler is a struct that implements the xdsChannelEventhandler -// interface. It is used to receive events from an xdsChannel, and has multiple -// channels on which it makes these events available to the test. -type testEventHandler struct { - typeCh chan xdsresource.Type // Resource type of an update or resource-does-not-exist error. - updateCh chan map[string]ads.DataAndErrTuple // Resource updates. - mdCh chan xdsresource.UpdateMetadata // Metadata from an update. - nameCh chan string // Name of the non-existent resource. - connErrCh chan error // Connectivity error. - -} - -func (ta *testEventHandler) adsStreamFailure(err error) { - ta.connErrCh <- err -} - -func (ta *testEventHandler) waitForStreamFailure(ctx context.Context) error { - select { - case <-ctx.Done(): - return ctx.Err() - case <-ta.connErrCh: - } - return nil -} - -func (ta *testEventHandler) adsResourceUpdate(typ xdsresource.Type, updates map[string]ads.DataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { - ta.typeCh <- typ - ta.updateCh <- updates - ta.mdCh <- md - onDone() -} - -// waitForUpdate waits for the next resource update event from the xdsChannel. -// It returns the resource type, the resource updates, and the update metadata. -// If the context is canceled, it returns an error. -func (ta *testEventHandler) waitForUpdate(ctx context.Context) (xdsresource.Type, map[string]ads.DataAndErrTuple, xdsresource.UpdateMetadata, error) { - var typ xdsresource.Type - var updates map[string]ads.DataAndErrTuple - var md xdsresource.UpdateMetadata - - select { - case typ = <-ta.typeCh: - case <-ctx.Done(): - return nil, nil, xdsresource.UpdateMetadata{}, ctx.Err() - } - - select { - case updates = <-ta.updateCh: - case <-ctx.Done(): - return nil, nil, xdsresource.UpdateMetadata{}, ctx.Err() - } - - select { - case md = <-ta.mdCh: - case <-ctx.Done(): - return nil, nil, xdsresource.UpdateMetadata{}, ctx.Err() - } - return typ, updates, md, nil -} - -func (ta *testEventHandler) adsResourceDoesNotExist(typ xdsresource.Type, name string) { - ta.typeCh <- typ - ta.nameCh <- name -} - -// waitForResourceDoesNotExist waits for the next resource-does-not-exist event -// from the xdsChannel. It returns the resource type and the resource name. If -// the context is canceled, it returns an error. -func (ta *testEventHandler) waitForResourceDoesNotExist(ctx context.Context) (xdsresource.Type, string, error) { - var typ xdsresource.Type - var name string - - select { - case typ = <-ta.typeCh: - case <-ctx.Done(): - return nil, "", ctx.Err() - } - - select { - case name = <-ta.nameCh: - case <-ctx.Done(): - return nil, "", ctx.Err() - } - return typ, name, nil -} - -func newStringP(s string) *string { - return &s -} - -func makeRouterFilter(t *testing.T) xdsresource.HTTPFilter { - routerBuilder := httpfilter.Get(router.TypeURL) - routerConfig, _ := routerBuilder.ParseFilterConfig(testutils.MarshalAny(t, &v3routerpb.Router{})) - return xdsresource.HTTPFilter{Name: "router", Filter: routerBuilder, Config: routerConfig} -} - -func makeRouterFilterList(t *testing.T) []xdsresource.HTTPFilter { - return []xdsresource.HTTPFilter{makeRouterFilter(t)} -} diff --git a/xds/internal/xdsclient/client.go b/xds/internal/xdsclient/client.go index 8fa6b1a52261..42f1a28f0c08 100644 --- a/xds/internal/xdsclient/client.go +++ b/xds/internal/xdsclient/client.go @@ -21,9 +21,11 @@ package xdsclient import ( + "context" + v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient/load" + "google.golang.org/grpc/xds/internal/clients/lrsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) @@ -47,7 +49,7 @@ type XDSClient interface { // the watcher is canceled. Callers need to handle this case. WatchResource(rType xdsresource.Type, resourceName string, watcher xdsresource.ResourceWatcher) (cancel func()) - ReportLoad(*bootstrap.ServerConfig) (*load.Store, func()) + ReportLoad(*bootstrap.ServerConfig) (*lrsclient.LoadStore, func(context.Context)) BootstrapConfig() *bootstrap.Config } diff --git a/xds/internal/xdsclient/clientimpl.go b/xds/internal/xdsclient/clientimpl.go index d8e727e3180e..ce7cc5dddc59 100644 --- a/xds/internal/xdsclient/clientimpl.go +++ b/xds/internal/xdsclient/clientimpl.go @@ -19,25 +19,24 @@ package xdsclient import ( - "context" - "errors" "fmt" - "sync" "sync/atomic" "time" - v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" + "google.golang.org/grpc" estats "google.golang.org/grpc/experimental/stats" - "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/backoff" "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/xds/bootstrap" - xdsclientinternal "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" - "google.golang.org/grpc/xds/internal/xdsclient/transport/grpctransport" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" + "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" + + xdsbootstrap "google.golang.org/grpc/xds/bootstrap" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/lrsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/clients/xdsclient/metrics" ) const ( @@ -50,11 +49,6 @@ const ( ) var ( - _ XDSClient = &clientImpl{} - - // ErrClientClosed is returned when the xDS client is closed. - ErrClientClosed = errors.New("xds: the xDS client is closed") - // The following functions are no-ops in the actual code, but can be // overridden in tests to give them visibility into certain events. xdsClientImplCreateHook = func(string) {} @@ -85,88 +79,59 @@ var ( }) ) -// clientImpl is the real implementation of the xDS client. The exported Client -// is a wrapper of this struct with a ref count. +// clientImpl embed xdsclient.XDSClient and implement internal XDSClient +// interface with ref counting so that it can be shared by the xds resolver and +// balancer implementations, across multiple ClientConns and Servers. type clientImpl struct { + *xdsclient.XDSClient // TODO: #8313 - get rid of embedding, if possible. + // The following fields are initialized at creation time and are read-only - // after that, and therefore can be accessed without a mutex. - done *grpcsync.Event // Fired when the client is closed. - topLevelAuthority *authority // The top-level authority, used only for old-style names without an authority. - authorities map[string]*authority // Map from authority names in bootstrap to authority struct. - config *bootstrap.Config // Complete bootstrap configuration. - watchExpiryTimeout time.Duration // Expiry timeout for ADS watch. - backoff func(int) time.Duration // Backoff for ADS and LRS stream failures. - transportBuilder transport.Builder // Builder to create transports to xDS server. - resourceTypes *resourceTypeRegistry // Registry of resource types, for parsing incoming ADS responses. - serializer *grpcsync.CallbackSerializer // Serializer for invoking resource watcher callbacks. - serializerClose func() // Function to close the serializer. - logger *grpclog.PrefixLogger // Logger for this client. - metricsRecorder estats.MetricsRecorder // Metrics recorder for metrics. - target string // The gRPC target for this client. + // after that. + xdsClientConfig xdsclient.Config + bootstrapConfig *bootstrap.Config + logger *grpclog.PrefixLogger + target string + lrsClient *lrsclient.LRSClient + + // Accessed atomically + refCount int32 +} - // The clientImpl owns a bunch of channels to individual xDS servers - // specified in the bootstrap configuration. Authorities acquire references - // to these channels based on server configs within the authority config. - // The clientImpl maintains a list of interested authorities for each of - // these channels, and forwards updates from the channels to each of these - // authorities. - // - // Once all references to a channel are dropped, the channel is closed. - channelsMu sync.Mutex - xdsActiveChannels map[string]*channelState // Map from server config to in-use xdsChannels. +// metricsReporter implements the clients.MetricsReporter interface and uses an +// underlying stats.MetricsRecorderList to record metrics. +type metricsReporter struct { + recorder estats.MetricsRecorder + target string } -func init() { - internal.TriggerXDSResourceNotFoundForTesting = triggerXDSResourceNotFoundForTesting - xdsclientinternal.ResourceWatchStateForTesting = resourceWatchStateForTesting +// ReportMetric implements the clients.MetricsReporter interface. +// It receives metric data, determines the appropriate metric based on the type +// of the data, and records it using the embedded MetricsRecorderList. +func (mr *metricsReporter) ReportMetric(metric any) { + if mr.recorder == nil { + return + } - DefaultPool = &Pool{clients: make(map[string]*clientRefCounted)} + switch m := metric.(type) { + case *metrics.ResourceUpdateValid: + xdsClientResourceUpdatesValidMetric.Record(mr.recorder, 1, mr.target, m.ServerURI, m.ResourceType) + case *metrics.ResourceUpdateInvalid: + xdsClientResourceUpdatesInvalidMetric.Record(mr.recorder, 1, mr.target, m.ServerURI, m.ResourceType) + case *metrics.ServerFailure: + xdsClientServerFailureMetric.Record(mr.recorder, 1, mr.target, m.ServerURI) + } } -// newClientImpl returns a new xdsClient with the given config. -func newClientImpl(config *bootstrap.Config, watchExpiryTimeout time.Duration, streamBackoff func(int) time.Duration, mr estats.MetricsRecorder, target string) (*clientImpl, error) { - ctx, cancel := context.WithCancel(context.Background()) - c := &clientImpl{ - metricsRecorder: mr, - target: target, - done: grpcsync.NewEvent(), - authorities: make(map[string]*authority), - config: config, - watchExpiryTimeout: watchExpiryTimeout, - backoff: streamBackoff, - serializer: grpcsync.NewCallbackSerializer(ctx), - serializerClose: cancel, - transportBuilder: &grpctransport.Builder{}, - resourceTypes: newResourceTypeRegistry(), - xdsActiveChannels: make(map[string]*channelState), +func newClientImpl(config *bootstrap.Config, metricsRecorder estats.MetricsRecorder, target string) (*clientImpl, error) { + gConfig, err := buildXDSClientConfig(config, metricsRecorder, target) + if err != nil { + return nil, err } - - for name, cfg := range config.Authorities() { - // If server configs are specified in the authorities map, use that. - // Else, use the top-level server configs. - serverCfg := config.XDSServers() - if len(cfg.XDSServers) >= 1 { - serverCfg = cfg.XDSServers - } - c.authorities[name] = newAuthority(authorityBuildOptions{ - serverConfigs: serverCfg, - name: name, - serializer: c.serializer, - getChannelForADS: c.getChannelForADS, - logPrefix: clientPrefix(c), - target: target, - metricsRecorder: c.metricsRecorder, - }) + client, err := xdsclient.New(gConfig) + if err != nil { + return nil, err } - c.topLevelAuthority = newAuthority(authorityBuildOptions{ - serverConfigs: config.XDSServers(), - name: "", - serializer: c.serializer, - getChannelForADS: c.getChannelForADS, - logPrefix: clientPrefix(c), - target: target, - metricsRecorder: c.metricsRecorder, - }) + c := &clientImpl{XDSClient: client, xdsClientConfig: gConfig, bootstrapConfig: config, target: target, refCount: 1} c.logger = prefixLogger(c) return c, nil } @@ -174,332 +139,133 @@ func newClientImpl(config *bootstrap.Config, watchExpiryTimeout time.Duration, s // BootstrapConfig returns the configuration read from the bootstrap file. // Callers must treat the return value as read-only. func (c *clientImpl) BootstrapConfig() *bootstrap.Config { - return c.config + return c.bootstrapConfig } -// close closes the xDS client and releases all resources. -func (c *clientImpl) close() { - if c.done.HasFired() { - return - } - c.done.Fire() - - c.topLevelAuthority.close() - for _, a := range c.authorities { - a.close() - } +func (c *clientImpl) incrRef() int32 { + return atomic.AddInt32(&c.refCount, 1) +} - // Channel close cannot be invoked with the lock held, because it can race - // with stream failure happening at the same time. The latter will callback - // into the clientImpl and will attempt to grab the lock. This will result - // in a deadlock. So instead, we release the lock and wait for all active - // channels to be closed. - var channelsToClose []*xdsChannel - c.channelsMu.Lock() - for _, cs := range c.xdsActiveChannels { - channelsToClose = append(channelsToClose, cs.channel) - } - c.xdsActiveChannels = nil - c.channelsMu.Unlock() - for _, c := range channelsToClose { - c.close() - } +func (c *clientImpl) decrRef() int32 { + return atomic.AddInt32(&c.refCount, -1) +} - c.serializerClose() - <-c.serializer.Done() +// buildXDSClientConfig builds the xdsclient.Config from the bootstrap.Config. +func buildXDSClientConfig(config *bootstrap.Config, metricsRecorder estats.MetricsRecorder, target string) (xdsclient.Config, error) { + grpcTransportConfigs := make(map[string]grpctransport.Config) + gServerCfgMap := make(map[xdsclient.ServerConfig]*bootstrap.ServerConfig) - for _, s := range c.config.XDSServers() { - for _, f := range s.Cleanups() { - f() + gAuthorities := make(map[string]xdsclient.Authority) + for name, cfg := range config.Authorities() { + // If server configs are specified in the authorities map, use that. + // Else, use the top-level server configs. + serverCfg := config.XDSServers() + if len(cfg.XDSServers) >= 1 { + serverCfg = cfg.XDSServers } - } - for _, a := range c.config.Authorities() { - for _, s := range a.XDSServers { - for _, f := range s.Cleanups() { - f() + var gServerCfg []xdsclient.ServerConfig + for _, sc := range serverCfg { + if err := populateGRPCTransportConfigsFromServerConfig(sc, grpcTransportConfigs); err != nil { + return xdsclient.Config{}, err } + gsc := xdsclient.ServerConfig{ + ServerIdentifier: clients.ServerIdentifier{ServerURI: sc.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: sc.SelectedCreds().Type}}, + IgnoreResourceDeletion: sc.ServerFeaturesIgnoreResourceDeletion()} + gServerCfg = append(gServerCfg, gsc) + gServerCfgMap[gsc] = sc } - } - c.logger.Infof("Shutdown") -} - -// getChannelForADS returns an xdsChannel for the given server configuration. -// -// If an xdsChannel exists for the given server configuration, it is returned. -// Else a new one is created. It also ensures that the calling authority is -// added to the set of interested authorities for the returned channel. -// -// It returns the xdsChannel and a function to release the calling authority's -// reference on the channel. The caller must call the cancel function when it is -// no longer interested in this channel. -// -// A non-nil error is returned if an xdsChannel was not created. -func (c *clientImpl) getChannelForADS(serverConfig *bootstrap.ServerConfig, callingAuthority *authority) (*xdsChannel, func(), error) { - if c.done.HasFired() { - return nil, nil, ErrClientClosed + gAuthorities[name] = xdsclient.Authority{XDSServers: gServerCfg} } - initLocked := func(s *channelState) { - if c.logger.V(2) { - c.logger.Infof("Adding authority %q to the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) + gServerCfgs := make([]xdsclient.ServerConfig, 0, len(config.XDSServers())) + for _, sc := range config.XDSServers() { + if err := populateGRPCTransportConfigsFromServerConfig(sc, grpcTransportConfigs); err != nil { + return xdsclient.Config{}, err } - s.interestedAuthorities[callingAuthority] = true - } - deInitLocked := func(s *channelState) { - if c.logger.V(2) { - c.logger.Infof("Removing authority %q from the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) - } - delete(s.interestedAuthorities, callingAuthority) - } - - return c.getOrCreateChannel(serverConfig, initLocked, deInitLocked) -} - -// getChannelForLRS returns an xdsChannel for the given server configuration. -// -// If an xdsChannel exists for the given server configuration, it is returned. -// Else a new one is created. A reference count that tracks the number of LRS -// calls on the returned channel is incremented before returning the channel. -// -// It returns the xdsChannel and a function to decrement the reference count -// that tracks the number of LRS calls on the returned channel. The caller must -// call the cancel function when it is no longer interested in this channel. -// -// A non-nil error is returned if an xdsChannel was not created. -func (c *clientImpl) getChannelForLRS(serverConfig *bootstrap.ServerConfig) (*xdsChannel, func(), error) { - if c.done.HasFired() { - return nil, nil, ErrClientClosed + gsc := xdsclient.ServerConfig{ + ServerIdentifier: clients.ServerIdentifier{ServerURI: sc.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: sc.SelectedCreds().Type}}, + IgnoreResourceDeletion: sc.ServerFeaturesIgnoreResourceDeletion()} + gServerCfgs = append(gServerCfgs, gsc) + gServerCfgMap[gsc] = sc } - initLocked := func(s *channelState) { s.lrsRefs++ } - deInitLocked := func(s *channelState) { s.lrsRefs-- } - - return c.getOrCreateChannel(serverConfig, initLocked, deInitLocked) -} - -// getOrCreateChannel returns an xdsChannel for the given server configuration. -// -// If an active xdsChannel exists for the given server configuration, it is -// returned. If an idle xdsChannel exists for the given server configuration, it -// is revived from the idle cache and returned. Else a new one is created. -// -// The initLocked function runs some initialization logic before the channel is -// returned. This includes adding the calling authority to the set of interested -// authorities for the channel or incrementing the count of the number of LRS -// calls on the channel. -// -// The deInitLocked function runs some cleanup logic when the returned cleanup -// function is called. This involves removing the calling authority from the set -// of interested authorities for the channel or decrementing the count of the -// number of LRS calls on the channel. -// -// Both initLocked and deInitLocked are called with the c.channelsMu held. -// -// Returns the xdsChannel and a cleanup function to be invoked when the channel -// is no longer required. A non-nil error is returned if an xdsChannel was not -// created. -func (c *clientImpl) getOrCreateChannel(serverConfig *bootstrap.ServerConfig, initLocked, deInitLocked func(*channelState)) (*xdsChannel, func(), error) { - c.channelsMu.Lock() - defer c.channelsMu.Unlock() - - if c.logger.V(2) { - c.logger.Infof("Received request for a reference to an xdsChannel for server config %q", serverConfig) + node := config.Node() + gNode := clients.Node{ + ID: node.GetId(), + Cluster: node.GetCluster(), + Metadata: node.Metadata, + UserAgentName: node.UserAgentName, + UserAgentVersion: node.GetUserAgentVersion(), } - - // Use an existing channel, if one exists for this server config. - if state, ok := c.xdsActiveChannels[serverConfig.String()]; ok { - if c.logger.V(2) { - c.logger.Infof("Reusing an existing xdsChannel for server config %q", serverConfig) + if node.Locality != nil { + gNode.Locality = clients.Locality{ + Region: node.Locality.Region, + Zone: node.Locality.Zone, + SubZone: node.Locality.SubZone, } - initLocked(state) - return state.channel, c.releaseChannel(serverConfig, state, deInitLocked), nil - } - - if c.logger.V(2) { - c.logger.Infof("Creating a new xdsChannel for server config %q", serverConfig) } - // Create a new transport and create a new xdsChannel, and add it to the - // map of xdsChannels. - tr, err := c.transportBuilder.Build(transport.BuildOptions{ServerConfig: serverConfig}) - if err != nil { - return nil, func() {}, fmt.Errorf("xds: failed to create transport for server config %s: %v", serverConfig, err) - } - state := &channelState{ - parent: c, - serverConfig: serverConfig, - interestedAuthorities: make(map[*authority]bool), - } - channel, err := newXDSChannel(xdsChannelOpts{ - transport: tr, - serverConfig: serverConfig, - bootstrapConfig: c.config, - resourceTypeGetter: c.resourceTypes.get, - eventHandler: state, - backoff: c.backoff, - watchExpiryTimeout: c.watchExpiryTimeout, - logPrefix: clientPrefix(c), - }) - if err != nil { - return nil, func() {}, fmt.Errorf("xds: failed to create xdsChannel for server config %s: %v", serverConfig, err) + gTransportBuilder := grpctransport.NewBuilder(grpcTransportConfigs) + + resourceTypes := map[string]xdsclient.ResourceType{ + version.V3ListenerURL: { + TypeURL: version.V3ListenerURL, + TypeName: xdsresource.ListenerResourceTypeName, + AllResourcesRequiredInSotW: true, + Decoder: xdsresource.NewGenericListenerResourceTypeDecoder(config), + }, + version.V3RouteConfigURL: { + TypeURL: version.V3RouteConfigURL, + TypeName: xdsresource.RouteConfigTypeName, + AllResourcesRequiredInSotW: false, + Decoder: xdsresource.NewGenericRouteConfigResourceTypeDecoder(), + }, + version.V3ClusterURL: { + TypeURL: version.V3ClusterURL, + TypeName: xdsresource.ClusterResourceTypeName, + AllResourcesRequiredInSotW: true, + Decoder: xdsresource.NewGenericClusterResourceTypeDecoder(config, gServerCfgMap), + }, + version.V3EndpointsURL: { + TypeURL: version.V3EndpointsURL, + TypeName: xdsresource.EndpointsResourceTypeName, + AllResourcesRequiredInSotW: false, + Decoder: xdsresource.NewGenericEndpointsResourceTypeDecoder(), + }, } - state.channel = channel - c.xdsActiveChannels[serverConfig.String()] = state - initLocked(state) - return state.channel, c.releaseChannel(serverConfig, state, deInitLocked), nil -} -// releaseChannel is a function that is called when a reference to an xdsChannel -// needs to be released. It handles closing channels with no active references. -// -// The function takes the following parameters: -// - serverConfig: the server configuration for the xdsChannel -// - state: the state of the xdsChannel -// - deInitLocked: a function that performs any necessary cleanup for the xdsChannel -// -// The function returns another function that can be called to release the -// reference to the xdsChannel. This returned function is idempotent, meaning -// it can be called multiple times without any additional effect. -func (c *clientImpl) releaseChannel(serverConfig *bootstrap.ServerConfig, state *channelState, deInitLocked func(*channelState)) func() { - return sync.OnceFunc(func() { - c.channelsMu.Lock() + mr := &metricsReporter{recorder: metricsRecorder, target: target} - if c.logger.V(2) { - c.logger.Infof("Received request to release a reference to an xdsChannel for server config %q", serverConfig) - } - deInitLocked(state) + return xdsclient.Config{ + Authorities: gAuthorities, + Servers: gServerCfgs, + Node: gNode, + TransportBuilder: gTransportBuilder, + ResourceTypes: resourceTypes, + MetricsReporter: mr, + }, nil +} - // The channel has active users. Do nothing and return. - if state.lrsRefs != 0 || len(state.interestedAuthorities) != 0 { - if c.logger.V(2) { - c.logger.Infof("xdsChannel %p has other active references", state.channel) - } - c.channelsMu.Unlock() - return +// populateGRPCTransportConfigsFromServerConfig iterates through the channel +// credentials of the provided server configuration, builds credential bundles, +// and populates the grpctransport.Config map. +func populateGRPCTransportConfigsFromServerConfig(sc *bootstrap.ServerConfig, grpcTransportConfigs map[string]grpctransport.Config) error { + for _, cc := range sc.ChannelCreds() { + c := xdsbootstrap.GetCredentials(cc.Type) + if c == nil { + continue } - - delete(c.xdsActiveChannels, serverConfig.String()) - if c.logger.V(2) { - c.logger.Infof("Closing xdsChannel [%p] for server config %s", state.channel, serverConfig) + bundle, _, err := c.Build(cc.Config) + if err != nil { + return fmt.Errorf("xds: failed to build credentials bundle from bootstrap for %q: %v", cc.Type, err) } - channelToClose := state.channel - c.channelsMu.Unlock() - - channelToClose.close() - }) -} - -// dumpResources returns the status and contents of all xDS resources. -func (c *clientImpl) dumpResources() *v3statuspb.ClientConfig { - retCfg := c.topLevelAuthority.dumpResources() - for _, a := range c.authorities { - retCfg = append(retCfg, a.dumpResources()...) - } - - return &v3statuspb.ClientConfig{ - Node: c.config.Node(), - GenericXdsConfigs: retCfg, - } -} - -// channelState represents the state of an xDS channel. It tracks the number of -// LRS references, the authorities interested in the channel, and the server -// configuration used for the channel. -// -// It receives callbacks for events on the underlying ADS stream and invokes -// corresponding callbacks on interested authorities. -type channelState struct { - parent *clientImpl - serverConfig *bootstrap.ServerConfig - - // Access to the following fields should be protected by the parent's - // channelsMu. - channel *xdsChannel - lrsRefs int - interestedAuthorities map[*authority]bool -} - -func (cs *channelState) adsStreamFailure(err error) { - if cs.parent.done.HasFired() { - return - } - - if xdsresource.ErrType(err) != xdsresource.ErrTypeStreamFailedAfterRecv { - xdsClientServerFailureMetric.Record(cs.parent.metricsRecorder, 1, cs.parent.target, cs.serverConfig.ServerURI()) - } - - cs.parent.channelsMu.Lock() - defer cs.parent.channelsMu.Unlock() - for authority := range cs.interestedAuthorities { - authority.adsStreamFailure(cs.serverConfig, err) - } -} - -func (cs *channelState) adsResourceUpdate(typ xdsresource.Type, updates map[string]ads.DataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { - if cs.parent.done.HasFired() { - return - } - - cs.parent.channelsMu.Lock() - defer cs.parent.channelsMu.Unlock() - - if len(cs.interestedAuthorities) == 0 { - onDone() - return - } - - authorityCnt := new(atomic.Int64) - authorityCnt.Add(int64(len(cs.interestedAuthorities))) - done := func() { - if authorityCnt.Add(-1) == 0 { - onDone() + grpcTransportConfigs[cc.Type] = grpctransport.Config{ + Credentials: bundle, + GRPCNewClient: func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + opts = append(opts, sc.DialOptions()...) + return grpc.NewClient(target, opts...) + }, } } - for authority := range cs.interestedAuthorities { - authority.adsResourceUpdate(cs.serverConfig, typ, updates, md, done) - } -} - -func (cs *channelState) adsResourceDoesNotExist(typ xdsresource.Type, resourceName string) { - if cs.parent.done.HasFired() { - return - } - - cs.parent.channelsMu.Lock() - defer cs.parent.channelsMu.Unlock() - for authority := range cs.interestedAuthorities { - authority.adsResourceDoesNotExist(typ, resourceName) - } -} - -// clientRefCounted is ref-counted, and to be shared by the xds resolver and -// balancer implementations, across multiple ClientConns and Servers. -type clientRefCounted struct { - *clientImpl - - refCount int32 // accessed atomically -} - -func (c *clientRefCounted) incrRef() int32 { - return atomic.AddInt32(&c.refCount, 1) -} - -func (c *clientRefCounted) decrRef() int32 { - return atomic.AddInt32(&c.refCount, -1) -} - -func triggerXDSResourceNotFoundForTesting(client XDSClient, typ xdsresource.Type, name string) error { - crc, ok := client.(*clientRefCounted) - if !ok { - return fmt.Errorf("xds: xDS client is of type %T, want %T", client, &clientRefCounted{}) - } - return crc.clientImpl.triggerResourceNotFoundForTesting(typ, name) -} - -func resourceWatchStateForTesting(client XDSClient, typ xdsresource.Type, name string) (ads.ResourceWatchState, error) { - crc, ok := client.(*clientRefCounted) - if !ok { - return ads.ResourceWatchState{}, fmt.Errorf("xds: xDS client is of type %T, want %T", client, &clientRefCounted{}) - } - return crc.clientImpl.resourceWatchStateForTesting(typ, name) + return nil } diff --git a/xds/internal/xdsclient/clientimpl_loadreport.go b/xds/internal/xdsclient/clientimpl_loadreport.go index efb41b87db53..39004ae7ef77 100644 --- a/xds/internal/xdsclient/clientimpl_loadreport.go +++ b/xds/internal/xdsclient/clientimpl_loadreport.go @@ -18,24 +18,44 @@ package xdsclient import ( + "context" + "sync" + "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient/load" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/lrsclient" ) // ReportLoad starts a load reporting stream to the given server. All load // reports to the same server share the LRS stream. // -// It returns a Store for the user to report loads, a function to cancel the -// load reporting stream. -func (c *clientImpl) ReportLoad(server *bootstrap.ServerConfig) (*load.Store, func()) { - xc, releaseChannelRef, err := c.getChannelForLRS(server) +// It returns a lrsclient.LoadStore for the user to report loads. +func (c *clientImpl) ReportLoad(server *bootstrap.ServerConfig) (*lrsclient.LoadStore, func(context.Context)) { + if c.lrsClient == nil { + lrsC, err := lrsclient.New(lrsclient.Config{ + Node: c.xdsClientConfig.Node, + TransportBuilder: c.xdsClientConfig.TransportBuilder, + }) + if err != nil { + c.logger.Warningf("Failed to create an lrs client to the management server to report load: %v", server, err) + return nil, func(context.Context) {} + } + c.lrsClient = lrsC + } + + load, err := c.lrsClient.ReportLoad(clients.ServerIdentifier{ + ServerURI: server.ServerURI(), + Extensions: grpctransport.ServerIdentifierExtension{ + ConfigName: server.SelectedCreds().Type, + }, + }) if err != nil { - c.logger.Warningf("Failed to create a channel to the management server to report load: %v", server, err) - return nil, func() {} + c.logger.Warningf("Failed to create a load store to the management server to report load: %v", server, err) + return nil, func(context.Context) {} } - load, stopLoadReporting := xc.reportLoad() - return load, func() { - stopLoadReporting() - releaseChannelRef() + var loadStop sync.Once + return load, func(ctx context.Context) { + loadStop.Do(func() { load.Stop(ctx) }) } } diff --git a/xds/internal/xdsclient/clientimpl_test.go b/xds/internal/xdsclient/clientimpl_test.go new file mode 100644 index 000000000000..fbfc24a074ec --- /dev/null +++ b/xds/internal/xdsclient/clientimpl_test.go @@ -0,0 +1,261 @@ +/* + * + * Copyright 2025 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package xdsclient + +import ( + "encoding/json" + "fmt" + "reflect" + "sync" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/internal/testutils/stats" + "google.golang.org/grpc/internal/xds/bootstrap" + "google.golang.org/grpc/xds/internal/clients" + "google.golang.org/grpc/xds/internal/clients/grpctransport" + "google.golang.org/grpc/xds/internal/clients/xdsclient" + "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" + "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" + "google.golang.org/protobuf/testing/protocmp" +) + +const ( + testXDSServerURL = "xds.example.com:8080" + testXDSServerURL2 = "xds.example.com:8081" + testNodeID = "test-node-id" + testClusterName = "test-cluster" + testUserAgentName = "test-ua-name" + testUserAgentVer = "test-ua-ver" + testLocalityRegion = "test-region" + testLocalityZone = "test-zone" + testLocalitySubZone = "test-sub-zone" + testTargetName = "test-target" +) + +var ( + testMetadataJSON, _ = json.Marshal(map[string]any{"foo": "bar", "baz": float64(1)}) +) + +func (s) TestBuildXDSClientConfig_Success(t *testing.T) { + tests := []struct { + name string + bootstrapContents []byte + wantXDSClientConfig func(bootstrapCfg *bootstrap.Config) xdsclient.Config + }{ + { + name: "without authorities", + bootstrapContents: []byte(fmt.Sprintf(`{ + "xds_servers": [{"server_uri": "%s", "channel_creds": [{"type": "insecure"}]}], + "node": { + "id": "%s", "cluster": "%s", "metadata": %s, + "locality": {"region": "%s", "zone": "%s", "sub_zone": "%s"}, + "user_agent_name": "%s", "user_agent_version": "%s" + } + }`, testXDSServerURL, testNodeID, testClusterName, testMetadataJSON, testLocalityRegion, testLocalityZone, testLocalitySubZone, testUserAgentName, testUserAgentVer)), + wantXDSClientConfig: func(c *bootstrap.Config) xdsclient.Config { + node, serverCfg := c.Node(), c.XDSServers()[0] + expectedServer := xdsclient.ServerConfig{ServerIdentifier: clients.ServerIdentifier{ServerURI: serverCfg.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}} + gServerCfgMap := map[xdsclient.ServerConfig]*bootstrap.ServerConfig{expectedServer: serverCfg} + return xdsclient.Config{ + Servers: []xdsclient.ServerConfig{expectedServer}, + Node: clients.Node{ID: node.GetId(), Cluster: node.GetCluster(), Metadata: node.Metadata, Locality: clients.Locality{Region: node.Locality.Region, Zone: node.Locality.Zone, SubZone: node.Locality.SubZone}, UserAgentName: node.UserAgentName, UserAgentVersion: node.GetUserAgentVersion()}, + Authorities: map[string]xdsclient.Authority{}, + ResourceTypes: map[string]xdsclient.ResourceType{ + version.V3ListenerURL: {TypeURL: version.V3ListenerURL, TypeName: xdsresource.ListenerResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericListenerResourceTypeDecoder(c)}, + version.V3RouteConfigURL: {TypeURL: version.V3RouteConfigURL, TypeName: xdsresource.RouteConfigTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericRouteConfigResourceTypeDecoder()}, + version.V3ClusterURL: {TypeURL: version.V3ClusterURL, TypeName: xdsresource.ClusterResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericClusterResourceTypeDecoder(c, gServerCfgMap)}, + version.V3EndpointsURL: {TypeURL: version.V3EndpointsURL, TypeName: xdsresource.EndpointsResourceTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericEndpointsResourceTypeDecoder()}, + }, + MetricsReporter: &metricsReporter{recorder: stats.NewTestMetricsRecorder(), target: testTargetName}, + TransportBuilder: grpctransport.NewBuilder(map[string]grpctransport.Config{ + "insecure": { + Credentials: insecure.NewBundle(), + GRPCNewClient: func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + opts = append(opts, serverCfg.DialOptions()...) + return grpc.NewClient(target, opts...) + }}, + }), + } + }, + }, + { + name: "with authorities", + bootstrapContents: []byte(fmt.Sprintf(`{ + "xds_servers": [{"server_uri": "%s", "channel_creds": [{"type": "insecure"}]}], + "node": {"id": "%s"}, + "authorities": { + "auth1": {}, + "auth2": {"xds_servers": [{"server_uri": "%s", "channel_creds": [{"type": "insecure"}]}]} + } + }`, testXDSServerURL, testNodeID, testXDSServerURL2)), + wantXDSClientConfig: func(c *bootstrap.Config) xdsclient.Config { + node := c.Node() + topLevelSCfg, auth2SCfg := c.XDSServers()[0], c.Authorities()["auth2"].XDSServers[0] + expTopLevelS := xdsclient.ServerConfig{ServerIdentifier: clients.ServerIdentifier{ServerURI: topLevelSCfg.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}} + expAuth2S := xdsclient.ServerConfig{ServerIdentifier: clients.ServerIdentifier{ServerURI: auth2SCfg.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}} + gSCfgMap := map[xdsclient.ServerConfig]*bootstrap.ServerConfig{expTopLevelS: topLevelSCfg, expAuth2S: auth2SCfg} + return xdsclient.Config{ + Servers: []xdsclient.ServerConfig{expTopLevelS}, + Node: clients.Node{ID: node.GetId(), Cluster: node.GetCluster(), Metadata: node.Metadata, UserAgentName: node.UserAgentName, UserAgentVersion: node.GetUserAgentVersion()}, + Authorities: map[string]xdsclient.Authority{"auth1": {XDSServers: []xdsclient.ServerConfig{expTopLevelS}}, "auth2": {XDSServers: []xdsclient.ServerConfig{expAuth2S}}}, + ResourceTypes: map[string]xdsclient.ResourceType{ + version.V3ListenerURL: {TypeURL: version.V3ListenerURL, TypeName: xdsresource.ListenerResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericListenerResourceTypeDecoder(c)}, + version.V3RouteConfigURL: {TypeURL: version.V3RouteConfigURL, TypeName: xdsresource.RouteConfigTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericRouteConfigResourceTypeDecoder()}, + version.V3ClusterURL: {TypeURL: version.V3ClusterURL, TypeName: xdsresource.ClusterResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericClusterResourceTypeDecoder(c, gSCfgMap)}, + version.V3EndpointsURL: {TypeURL: version.V3EndpointsURL, TypeName: xdsresource.EndpointsResourceTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericEndpointsResourceTypeDecoder()}, + }, + MetricsReporter: &metricsReporter{recorder: stats.NewTestMetricsRecorder(), target: testTargetName}, + TransportBuilder: grpctransport.NewBuilder(map[string]grpctransport.Config{ + "insecure": { + Credentials: insecure.NewBundle(), + GRPCNewClient: func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + opts = append(opts, topLevelSCfg.DialOptions()...) + return grpc.NewClient(target, opts...) + }}, + }), + } + }, + }, + { + name: "server features with ignore_resource_deletion", + bootstrapContents: []byte(fmt.Sprintf(`{ + "xds_servers": [{"server_uri": "%s", "channel_creds": [{"type": "insecure"}], "server_features": ["ignore_resource_deletion"]}], + "node": {"id": "%s"} + }`, testXDSServerURL, testNodeID)), + wantXDSClientConfig: func(c *bootstrap.Config) xdsclient.Config { + node, serverCfg := c.Node(), c.XDSServers()[0] + expectedServer := xdsclient.ServerConfig{ServerIdentifier: clients.ServerIdentifier{ServerURI: serverCfg.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}, IgnoreResourceDeletion: true} + gServerCfgMap := map[xdsclient.ServerConfig]*bootstrap.ServerConfig{expectedServer: serverCfg} + return xdsclient.Config{ + Servers: []xdsclient.ServerConfig{expectedServer}, + Node: clients.Node{ID: node.GetId(), Cluster: node.GetCluster(), Metadata: node.Metadata, UserAgentName: node.UserAgentName, UserAgentVersion: node.GetUserAgentVersion()}, + Authorities: map[string]xdsclient.Authority{}, + ResourceTypes: map[string]xdsclient.ResourceType{ + version.V3ListenerURL: {TypeURL: version.V3ListenerURL, TypeName: xdsresource.ListenerResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericListenerResourceTypeDecoder(c)}, + version.V3RouteConfigURL: {TypeURL: version.V3RouteConfigURL, TypeName: xdsresource.RouteConfigTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericRouteConfigResourceTypeDecoder()}, + version.V3ClusterURL: {TypeURL: version.V3ClusterURL, TypeName: xdsresource.ClusterResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericClusterResourceTypeDecoder(c, gServerCfgMap)}, + version.V3EndpointsURL: {TypeURL: version.V3EndpointsURL, TypeName: xdsresource.EndpointsResourceTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericEndpointsResourceTypeDecoder()}, + }, + MetricsReporter: &metricsReporter{recorder: stats.NewTestMetricsRecorder(), target: testTargetName}, + TransportBuilder: grpctransport.NewBuilder(map[string]grpctransport.Config{ + "insecure": { + Credentials: insecure.NewBundle(), + GRPCNewClient: func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + opts = append(opts, serverCfg.DialOptions()...) + return grpc.NewClient(target, opts...) + }}, + }), + } + }, + }, + { + name: "channel creds - unknown type skipped", + bootstrapContents: []byte(fmt.Sprintf(`{ + "xds_servers": [{"server_uri": "%s", "channel_creds": [{"type": "unknown-type"}, {"type": "insecure"}]}], + "node": {"id": "%s"} + }`, testXDSServerURL, testNodeID)), // "insecure" is selected + wantXDSClientConfig: func(c *bootstrap.Config) xdsclient.Config { + node, serverCfg := c.Node(), c.XDSServers()[0] // SelectedCreds will be "insecure" + expectedServer := xdsclient.ServerConfig{ServerIdentifier: clients.ServerIdentifier{ServerURI: serverCfg.ServerURI(), Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}} + gServerCfgMap := map[xdsclient.ServerConfig]*bootstrap.ServerConfig{expectedServer: serverCfg} + return xdsclient.Config{ + Servers: []xdsclient.ServerConfig{expectedServer}, + Node: clients.Node{ID: node.GetId(), Cluster: node.GetCluster(), Metadata: node.Metadata, UserAgentName: node.UserAgentName, UserAgentVersion: node.GetUserAgentVersion()}, + Authorities: map[string]xdsclient.Authority{}, + ResourceTypes: map[string]xdsclient.ResourceType{ + version.V3ListenerURL: {TypeURL: version.V3ListenerURL, TypeName: xdsresource.ListenerResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericListenerResourceTypeDecoder(c)}, + version.V3RouteConfigURL: {TypeURL: version.V3RouteConfigURL, TypeName: xdsresource.RouteConfigTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericRouteConfigResourceTypeDecoder()}, + version.V3ClusterURL: {TypeURL: version.V3ClusterURL, TypeName: xdsresource.ClusterResourceTypeName, AllResourcesRequiredInSotW: true, Decoder: xdsresource.NewGenericClusterResourceTypeDecoder(c, gServerCfgMap)}, + version.V3EndpointsURL: {TypeURL: version.V3EndpointsURL, TypeName: xdsresource.EndpointsResourceTypeName, AllResourcesRequiredInSotW: false, Decoder: xdsresource.NewGenericEndpointsResourceTypeDecoder()}, + }, + MetricsReporter: &metricsReporter{recorder: stats.NewTestMetricsRecorder(), target: testTargetName}, + TransportBuilder: grpctransport.NewBuilder(map[string]grpctransport.Config{ + "insecure": { + Credentials: insecure.NewBundle(), + GRPCNewClient: func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + opts = append(opts, serverCfg.DialOptions()...) + return grpc.NewClient(target, opts...) + }}, + }), + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + bootstrapConfig, err := bootstrap.NewConfigFromContents(tt.bootstrapContents) + if err != nil { + t.Fatalf("Failed to create bootstrap config: %v", err) + } + gotCfg, err := buildXDSClientConfig(bootstrapConfig, stats.NewTestMetricsRecorder(), testTargetName) + if err != nil { + t.Fatalf("Failed to build XDSClientConfig: %v", err) + } + + wantCfg := tt.wantXDSClientConfig(bootstrapConfig) + + unexportedTypeOpts := cmpopts.IgnoreUnexported(clients.Node{}, grpctransport.Builder{}) + ignoreTypeOpts := cmpopts.IgnoreTypes(sync.Mutex{}) + resourceTypeCmpOpts := cmp.Comparer(func(a, b xdsclient.ResourceType) bool { + return a.TypeURL == b.TypeURL && a.TypeName == b.TypeName && a.AllResourcesRequiredInSotW == b.AllResourcesRequiredInSotW && reflect.TypeOf(a.Decoder) == reflect.TypeOf(b.Decoder) + }) + metricsReporterCmpOpts := cmp.Comparer(func(a, b clients.MetricsReporter) bool { + if (a == nil) != (b == nil) { + return false + } + if a == nil { // Both are nil + return true + } + // Both are non-nil, compare type and target. + aConcrete, aOK := a.(*metricsReporter) + bConcrete, bOK := b.(*metricsReporter) + if !(aOK && bOK && aConcrete.target == bConcrete.target) { + return false + } + // Compare recorder by type. + if (aConcrete.recorder == nil) != (bConcrete.recorder == nil) { + return false + } + // If both are nil, recorder check passes. If both non-nil, check types. + return aConcrete.recorder == nil || reflect.TypeOf(aConcrete.recorder) == reflect.TypeOf(bConcrete.recorder) + }) + transportBuilderCmpOpts := cmp.Comparer(func(a, b grpctransport.Config) bool { + // Compare Credentials by type + credsEqual := true + if (a.Credentials == nil) != (b.Credentials == nil) { + credsEqual = false + } else if a.Credentials != nil && reflect.TypeOf(a.Credentials) != reflect.TypeOf(b.Credentials) { + credsEqual = false + } + // Compare GRPCNewClient by nil-ness + newClientEqual := (a.GRPCNewClient == nil) == (b.GRPCNewClient == nil) + return credsEqual && newClientEqual + }) + + if diff := cmp.Diff(wantCfg, gotCfg, protocmp.Transform(), unexportedTypeOpts, ignoreTypeOpts, resourceTypeCmpOpts, metricsReporterCmpOpts, transportBuilderCmpOpts); diff != "" { + t.Errorf("buildXDSClientConfig() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/xds/internal/xdsclient/clientimpl_watchers.go b/xds/internal/xdsclient/clientimpl_watchers.go index 2cce17b05a24..29435993f135 100644 --- a/xds/internal/xdsclient/clientimpl_watchers.go +++ b/xds/internal/xdsclient/clientimpl_watchers.go @@ -18,147 +18,14 @@ package xdsclient import ( - "context" - "fmt" - "sync" - - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" ) -// wrappingWatcher is a wrapper around an xdsresource.ResourceWatcher that adds -// the node ID to the error messages reported to the watcher. -type wrappingWatcher struct { - xdsresource.ResourceWatcher - nodeID string -} - -func (w *wrappingWatcher) ResourceError(err error, done func()) { - w.ResourceWatcher.ResourceError(fmt.Errorf("[xDS node id: %v]: %w", w.nodeID, err), done) -} - -func (w *wrappingWatcher) AmbientError(err error, done func()) { - w.ResourceWatcher.AmbientError(fmt.Errorf("[xDS node id: %v]: %w", w.nodeID, err), done) -} - // WatchResource uses xDS to discover the resource associated with the provided // resource name. The resource type implementation determines how xDS responses // are are deserialized and validated, as received from the xDS management // server. Upon receipt of a response from the management server, an // appropriate callback on the watcher is invoked. func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, watcher xdsresource.ResourceWatcher) (cancel func()) { - // Return early if the client is already closed. - // - // The client returned from the top-level API is a ref-counted client which - // contains a pointer to `clientImpl`. When all references are released, the - // ref-counted client sets its pointer to `nil`. And if any watch APIs are - // made on such a closed client, we will get here with a `nil` receiver. - if c == nil || c.done.HasFired() { - logger.Warningf("Watch registered for name %q of type %q, but client is closed", rType.TypeName(), resourceName) - return func() {} - } - - watcher = &wrappingWatcher{ - ResourceWatcher: watcher, - nodeID: c.config.Node().GetId(), - } - - if err := c.resourceTypes.maybeRegister(rType); err != nil { - logger.Warningf("Watch registered for type %q, which is already registered", rType.TypeName()) - c.serializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) - return func() {} - } - - n := xdsresource.ParseName(resourceName) - a := c.getAuthorityForResource(n) - if a == nil { - logger.Warningf("Watch registered for name %q of type %q, authority %q is not found", rType.TypeName(), resourceName, n.Authority) - watcher.ResourceError(fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) - return func() {} - } - // The watchResource method on the authority is invoked with n.String() - // instead of resourceName because n.String() canonicalizes the given name. - // So, two resource names which don't differ in the query string, but only - // differ in the order of context params will result in the same resource - // being watched by the authority. - return a.watchResource(rType, n.String(), watcher) -} - -// Gets the authority for the given resource name. -// -// See examples in this section of the gRFC: -// https://github.com/grpc/proposal/blob/master/A47-xds-federation.md#bootstrap-config-changes -func (c *clientImpl) getAuthorityForResource(name *xdsresource.Name) *authority { - // For new-style resource names, always lookup the authorities map. If the - // name does not specify an authority, we will end up looking for an entry - // in the map with the empty string as the key. - if name.Scheme == xdsresource.FederationScheme { - return c.authorities[name.Authority] - } - - // For old-style resource names, we use the top-level authority if the name - // does not specify an authority. - if name.Authority == "" { - return c.topLevelAuthority - } - return c.authorities[name.Authority] -} - -// A registry of xdsresource.Type implementations indexed by their corresponding -// type URLs. Registration of an xdsresource.Type happens the first time a watch -// for a resource of that type is invoked. -type resourceTypeRegistry struct { - mu sync.Mutex - types map[string]xdsresource.Type -} - -func newResourceTypeRegistry() *resourceTypeRegistry { - return &resourceTypeRegistry{types: make(map[string]xdsresource.Type)} -} - -func (r *resourceTypeRegistry) get(url string) xdsresource.Type { - r.mu.Lock() - defer r.mu.Unlock() - return r.types[url] -} - -func (r *resourceTypeRegistry) maybeRegister(rType xdsresource.Type) error { - r.mu.Lock() - defer r.mu.Unlock() - - url := rType.TypeURL() - typ, ok := r.types[url] - if ok && typ != rType { - return fmt.Errorf("attempt to re-register a resource type implementation for %v", rType.TypeName()) - } - r.types[url] = rType - return nil -} - -func (c *clientImpl) triggerResourceNotFoundForTesting(rType xdsresource.Type, resourceName string) error { - c.channelsMu.Lock() - defer c.channelsMu.Unlock() - - if c.logger.V(2) { - c.logger.Infof("Triggering resource not found for type: %s, resource name: %s", rType.TypeName(), resourceName) - } - - for _, state := range c.xdsActiveChannels { - if err := state.channel.triggerResourceNotFoundForTesting(rType, resourceName); err != nil { - return err - } - } - return nil -} - -func (c *clientImpl) resourceWatchStateForTesting(rType xdsresource.Type, resourceName string) (ads.ResourceWatchState, error) { - c.channelsMu.Lock() - defer c.channelsMu.Unlock() - - for _, state := range c.xdsActiveChannels { - if st, err := state.channel.ads.ResourceWatchStateForTesting(rType, resourceName); err == nil { - return st, nil - } - } - return ads.ResourceWatchState{}, fmt.Errorf("unable to find watch state for resource type %q and name %q", rType.TypeName(), resourceName) + return c.XDSClient.WatchResource(rType.TypeURL(), resourceName, xdsresource.GenericResourceWatcher(watcher)) } diff --git a/xds/internal/xdsclient/load/reporter.go b/xds/internal/xdsclient/load/reporter.go deleted file mode 100644 index 67e29e5bae13..000000000000 --- a/xds/internal/xdsclient/load/reporter.go +++ /dev/null @@ -1,27 +0,0 @@ -/* - * - * Copyright 2020 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package load - -// PerClusterReporter wraps the methods from the loadStore that are used here. -type PerClusterReporter interface { - CallStarted(locality string) - CallFinished(locality string, err error) - CallServerLoad(locality, name string, val float64) - CallDropped(category string) -} diff --git a/xds/internal/xdsclient/load/store.go b/xds/internal/xdsclient/load/store.go deleted file mode 100644 index 6c370ac3d935..000000000000 --- a/xds/internal/xdsclient/load/store.go +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright 2020 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package load provides functionality to record and maintain load data. -package load - -import ( - "sync" - "sync/atomic" - "time" -) - -const negativeOneUInt64 = ^uint64(0) - -// timeNow is used to get the current time. It can be overridden in tests. -var timeNow = time.Now - -// Store keeps the loads for multiple clusters and services to be reported via -// LRS. It contains loads to reported to one LRS server. Create multiple stores -// for multiple servers. -// -// It is safe for concurrent use. -type Store struct { - // mu only protects the map (2 layers). The read/write to *perClusterStore - // doesn't need to hold the mu. - mu sync.Mutex - // clusters is a map with cluster name as the key. The second layer is a map - // with service name as the key. Each value (perClusterStore) contains data - // for a (cluster, service) pair. - // - // Note that new entries are added to this map, but never removed. This is - // potentially a memory leak. But the memory is allocated for each new - // (cluster,service) pair, and the memory allocated is just pointers and - // maps. So this shouldn't get too bad. - clusters map[string]map[string]*perClusterStore -} - -// NewStore creates a Store. -func NewStore() *Store { - return &Store{ - clusters: make(map[string]map[string]*perClusterStore), - } -} - -// Stats returns the load data for the given cluster names. Data is returned in -// a slice with no specific order. -// -// If no clusterName is given (an empty slice), all data for all known clusters -// is returned. -// -// If a cluster's Data is empty (no load to report), it's not appended to the -// returned slice. -func (s *Store) Stats(clusterNames []string) []*Data { - var ret []*Data - s.mu.Lock() - defer s.mu.Unlock() - - if len(clusterNames) == 0 { - for _, c := range s.clusters { - ret = appendClusterStats(ret, c) - } - return ret - } - - for _, n := range clusterNames { - if c, ok := s.clusters[n]; ok { - ret = appendClusterStats(ret, c) - } - } - return ret -} - -// appendClusterStats gets Data for the given cluster, append to ret, and return -// the new slice. -// -// Data is only appended to ret if it's not empty. -func appendClusterStats(ret []*Data, cluster map[string]*perClusterStore) []*Data { - for _, d := range cluster { - data := d.stats() - if data == nil { - // Skip this data if it doesn't contain any information. - continue - } - ret = append(ret, data) - } - return ret -} - -// PerCluster returns the perClusterStore for the given clusterName + -// serviceName. -func (s *Store) PerCluster(clusterName, serviceName string) PerClusterReporter { - if s == nil { - return nil - } - - s.mu.Lock() - defer s.mu.Unlock() - c, ok := s.clusters[clusterName] - if !ok { - c = make(map[string]*perClusterStore) - s.clusters[clusterName] = c - } - - if p, ok := c[serviceName]; ok { - return p - } - p := &perClusterStore{ - cluster: clusterName, - service: serviceName, - lastLoadReportAt: timeNow(), - } - c[serviceName] = p - return p -} - -// perClusterStore is a repository for LB policy implementations to report store -// load data. It contains load for a (cluster, edsService) pair. -// -// It is safe for concurrent use. -// -// TODO(easwars): Use regular maps with mutexes instead of sync.Map here. The -// latter is optimized for two common use cases: (1) when the entry for a given -// key is only ever written once but read many times, as in caches that only -// grow, or (2) when multiple goroutines read, write, and overwrite entries for -// disjoint sets of keys. In these two cases, use of a Map may significantly -// reduce lock contention compared to a Go map paired with a separate Mutex or -// RWMutex. -// Neither of these conditions are met here, and we should transition to a -// regular map with a mutex for better type safety. -type perClusterStore struct { - cluster, service string - drops sync.Map // map[string]*uint64 - localityRPCCount sync.Map // map[string]*rpcCountData - - mu sync.Mutex - lastLoadReportAt time.Time -} - -// Update functions are called by picker for each RPC. To avoid contention, all -// updates are done atomically. - -// CallDropped adds one drop record with the given category to store. -func (ls *perClusterStore) CallDropped(category string) { - if ls == nil { - return - } - - p, ok := ls.drops.Load(category) - if !ok { - tp := new(uint64) - p, _ = ls.drops.LoadOrStore(category, tp) - } - atomic.AddUint64(p.(*uint64), 1) -} - -// CallStarted adds one call started record for the given locality. -func (ls *perClusterStore) CallStarted(locality string) { - if ls == nil { - return - } - - p, ok := ls.localityRPCCount.Load(locality) - if !ok { - tp := newRPCCountData() - p, _ = ls.localityRPCCount.LoadOrStore(locality, tp) - } - p.(*rpcCountData).incrInProgress() - p.(*rpcCountData).incrIssued() -} - -// CallFinished adds one call finished record for the given locality. -// For successful calls, err needs to be nil. -func (ls *perClusterStore) CallFinished(locality string, err error) { - if ls == nil { - return - } - - p, ok := ls.localityRPCCount.Load(locality) - if !ok { - // The map is never cleared, only values in the map are reset. So the - // case where entry for call-finish is not found should never happen. - return - } - p.(*rpcCountData).decrInProgress() - if err == nil { - p.(*rpcCountData).incrSucceeded() - } else { - p.(*rpcCountData).incrErrored() - } -} - -// CallServerLoad adds one server load record for the given locality. The -// load type is specified by desc, and its value by val. -func (ls *perClusterStore) CallServerLoad(locality, name string, d float64) { - if ls == nil { - return - } - - p, ok := ls.localityRPCCount.Load(locality) - if !ok { - // The map is never cleared, only values in the map are reset. So the - // case where entry for callServerLoad is not found should never happen. - return - } - p.(*rpcCountData).addServerLoad(name, d) -} - -// Data contains all load data reported to the Store since the most recent call -// to stats(). -type Data struct { - // Cluster is the name of the cluster this data is for. - Cluster string - // Service is the name of the EDS service this data is for. - Service string - // TotalDrops is the total number of dropped requests. - TotalDrops uint64 - // Drops is the number of dropped requests per category. - Drops map[string]uint64 - // LocalityStats contains load reports per locality. - LocalityStats map[string]LocalityData - // ReportInternal is the duration since last time load was reported (stats() - // was called). - ReportInterval time.Duration -} - -// LocalityData contains load data for a single locality. -type LocalityData struct { - // RequestStats contains counts of requests made to the locality. - RequestStats RequestData - // LoadStats contains server load data for requests made to the locality, - // indexed by the load type. - LoadStats map[string]ServerLoadData -} - -// RequestData contains request counts. -type RequestData struct { - // Succeeded is the number of succeeded requests. - Succeeded uint64 - // Errored is the number of requests which ran into errors. - Errored uint64 - // InProgress is the number of requests in flight. - InProgress uint64 - // Issued is the total number requests that were sent. - Issued uint64 -} - -// ServerLoadData contains server load data. -type ServerLoadData struct { - // Count is the number of load reports. - Count uint64 - // Sum is the total value of all load reports. - Sum float64 -} - -func newData(cluster, service string) *Data { - return &Data{ - Cluster: cluster, - Service: service, - Drops: make(map[string]uint64), - LocalityStats: make(map[string]LocalityData), - } -} - -// stats returns and resets all loads reported to the store, except inProgress -// rpc counts. -// -// It returns nil if the store doesn't contain any (new) data. -func (ls *perClusterStore) stats() *Data { - if ls == nil { - return nil - } - - sd := newData(ls.cluster, ls.service) - ls.drops.Range(func(key, val any) bool { - d := atomic.SwapUint64(val.(*uint64), 0) - if d == 0 { - return true - } - sd.TotalDrops += d - keyStr := key.(string) - if keyStr != "" { - // Skip drops without category. They are counted in total_drops, but - // not in per category. One example is drops by circuit breaking. - sd.Drops[keyStr] = d - } - return true - }) - ls.localityRPCCount.Range(func(key, val any) bool { - countData := val.(*rpcCountData) - succeeded := countData.loadAndClearSucceeded() - inProgress := countData.loadInProgress() - errored := countData.loadAndClearErrored() - issued := countData.loadAndClearIssued() - if succeeded == 0 && inProgress == 0 && errored == 0 && issued == 0 { - return true - } - - ld := LocalityData{ - RequestStats: RequestData{ - Succeeded: succeeded, - Errored: errored, - InProgress: inProgress, - Issued: issued, - }, - LoadStats: make(map[string]ServerLoadData), - } - countData.serverLoads.Range(func(key, val any) bool { - sum, count := val.(*rpcLoadData).loadAndClear() - if count == 0 { - return true - } - ld.LoadStats[key.(string)] = ServerLoadData{ - Count: count, - Sum: sum, - } - return true - }) - sd.LocalityStats[key.(string)] = ld - return true - }) - - ls.mu.Lock() - sd.ReportInterval = timeNow().Sub(ls.lastLoadReportAt) - ls.lastLoadReportAt = timeNow() - ls.mu.Unlock() - - if sd.TotalDrops == 0 && len(sd.Drops) == 0 && len(sd.LocalityStats) == 0 { - return nil - } - return sd -} - -type rpcCountData struct { - // Only atomic accesses are allowed for the fields. - succeeded *uint64 - errored *uint64 - inProgress *uint64 - issued *uint64 - - // Map from load desc to load data (sum+count). Loading data from map is - // atomic, but updating data takes a lock, which could cause contention when - // multiple RPCs try to report loads for the same desc. - // - // To fix the contention, shard this map. - serverLoads sync.Map // map[string]*rpcLoadData -} - -func newRPCCountData() *rpcCountData { - return &rpcCountData{ - succeeded: new(uint64), - errored: new(uint64), - inProgress: new(uint64), - issued: new(uint64), - } -} - -func (rcd *rpcCountData) incrSucceeded() { - atomic.AddUint64(rcd.succeeded, 1) -} - -func (rcd *rpcCountData) loadAndClearSucceeded() uint64 { - return atomic.SwapUint64(rcd.succeeded, 0) -} - -func (rcd *rpcCountData) incrErrored() { - atomic.AddUint64(rcd.errored, 1) -} - -func (rcd *rpcCountData) loadAndClearErrored() uint64 { - return atomic.SwapUint64(rcd.errored, 0) -} - -func (rcd *rpcCountData) incrInProgress() { - atomic.AddUint64(rcd.inProgress, 1) -} - -func (rcd *rpcCountData) decrInProgress() { - atomic.AddUint64(rcd.inProgress, negativeOneUInt64) // atomic.Add(x, -1) -} - -func (rcd *rpcCountData) loadInProgress() uint64 { - return atomic.LoadUint64(rcd.inProgress) // InProgress count is not clear when reading. -} - -func (rcd *rpcCountData) incrIssued() { - atomic.AddUint64(rcd.issued, 1) -} - -func (rcd *rpcCountData) loadAndClearIssued() uint64 { - return atomic.SwapUint64(rcd.issued, 0) -} - -func (rcd *rpcCountData) addServerLoad(name string, d float64) { - loads, ok := rcd.serverLoads.Load(name) - if !ok { - tl := newRPCLoadData() - loads, _ = rcd.serverLoads.LoadOrStore(name, tl) - } - loads.(*rpcLoadData).add(d) -} - -// Data for server loads (from trailers or oob). Fields in this struct must be -// updated consistently. -// -// The current solution is to hold a lock, which could cause contention. To fix, -// shard serverLoads map in rpcCountData. -type rpcLoadData struct { - mu sync.Mutex - sum float64 - count uint64 -} - -func newRPCLoadData() *rpcLoadData { - return &rpcLoadData{} -} - -func (rld *rpcLoadData) add(v float64) { - rld.mu.Lock() - rld.sum += v - rld.count++ - rld.mu.Unlock() -} - -func (rld *rpcLoadData) loadAndClear() (s float64, c uint64) { - rld.mu.Lock() - s = rld.sum - rld.sum = 0 - c = rld.count - rld.count = 0 - rld.mu.Unlock() - return -} diff --git a/xds/internal/xdsclient/load/store_test.go b/xds/internal/xdsclient/load/store_test.go deleted file mode 100644 index a8a4ac9eeacf..000000000000 --- a/xds/internal/xdsclient/load/store_test.go +++ /dev/null @@ -1,518 +0,0 @@ -/* - * - * Copyright 2020 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package load - -import ( - "fmt" - "sort" - "sync" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" -) - -var ( - dropCategories = []string{"drop_for_real", "drop_for_fun"} - localities = []string{"locality-A", "locality-B"} - errTest = fmt.Errorf("test error") -) - -// rpcData wraps the rpc counts and load data to be pushed to the store. -type rpcData struct { - start, success, failure int - serverData map[string]float64 // Will be reported with successful RPCs. -} - -// TestDrops spawns a bunch of goroutines which report drop data. After the -// goroutines have exited, the test dumps the stats from the Store and makes -// sure they are as expected. -func TestDrops(t *testing.T) { - var ( - drops = map[string]int{ - dropCategories[0]: 30, - dropCategories[1]: 40, - "": 10, - } - wantStoreData = &Data{ - TotalDrops: 80, - Drops: map[string]uint64{ - dropCategories[0]: 30, - dropCategories[1]: 40, - }, - } - ) - - ls := perClusterStore{} - var wg sync.WaitGroup - for category, count := range drops { - for i := 0; i < count; i++ { - wg.Add(1) - go func(c string) { - ls.CallDropped(c) - wg.Done() - }(category) - } - } - wg.Wait() - - gotStoreData := ls.stats() - if diff := cmp.Diff(wantStoreData, gotStoreData, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval")); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } -} - -// TestLocalityStats spawns a bunch of goroutines which report rpc and load -// data. After the goroutines have exited, the test dumps the stats from the -// Store and makes sure they are as expected. -func TestLocalityStats(t *testing.T) { - var ( - localityData = map[string]rpcData{ - localities[0]: { - start: 40, - success: 20, - failure: 10, - serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4}, - }, - localities[1]: { - start: 80, - success: 40, - failure: 20, - serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4}, - }, - } - wantStoreData = &Data{ - LocalityStats: map[string]LocalityData{ - localities[0]: { - RequestStats: RequestData{ - Succeeded: 20, - Errored: 10, - InProgress: 10, - Issued: 40, - }, - LoadStats: map[string]ServerLoadData{ - "net": {Count: 20, Sum: 20}, - "disk": {Count: 20, Sum: 40}, - "cpu": {Count: 20, Sum: 60}, - "mem": {Count: 20, Sum: 80}, - }, - }, - localities[1]: { - RequestStats: RequestData{ - Succeeded: 40, - Errored: 20, - InProgress: 20, - Issued: 80, - }, - LoadStats: map[string]ServerLoadData{ - "net": {Count: 40, Sum: 40}, - "disk": {Count: 40, Sum: 80}, - "cpu": {Count: 40, Sum: 120}, - "mem": {Count: 40, Sum: 160}, - }, - }, - }, - } - ) - - ls := perClusterStore{} - var wg sync.WaitGroup - for locality, data := range localityData { - wg.Add(data.start) - for i := 0; i < data.start; i++ { - go func(l string) { - ls.CallStarted(l) - wg.Done() - }(locality) - } - // The calls to callStarted() need to happen before the other calls are - // made. Hence the wait here. - wg.Wait() - - wg.Add(data.success) - for i := 0; i < data.success; i++ { - go func(l string, serverData map[string]float64) { - ls.CallFinished(l, nil) - for n, d := range serverData { - ls.CallServerLoad(l, n, d) - } - wg.Done() - }(locality, data.serverData) - } - wg.Add(data.failure) - for i := 0; i < data.failure; i++ { - go func(l string) { - ls.CallFinished(l, errTest) - wg.Done() - }(locality) - } - wg.Wait() - } - - gotStoreData := ls.stats() - if diff := cmp.Diff(wantStoreData, gotStoreData, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval")); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } -} - -func TestResetAfterStats(t *testing.T) { - // Push a bunch of drops, call stats and load stats, and leave inProgress to be non-zero. - // Dump the stats. Verify expected - // Push the same set of loads as before - // Now dump and verify the newly expected ones. - var ( - drops = map[string]int{ - dropCategories[0]: 30, - dropCategories[1]: 40, - } - localityData = map[string]rpcData{ - localities[0]: { - start: 40, - success: 20, - failure: 10, - serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4}, - }, - localities[1]: { - start: 80, - success: 40, - failure: 20, - serverData: map[string]float64{"net": 1, "disk": 2, "cpu": 3, "mem": 4}, - }, - } - wantStoreData = &Data{ - TotalDrops: 70, - Drops: map[string]uint64{ - dropCategories[0]: 30, - dropCategories[1]: 40, - }, - LocalityStats: map[string]LocalityData{ - localities[0]: { - RequestStats: RequestData{ - Succeeded: 20, - Errored: 10, - InProgress: 10, - Issued: 40, - }, - - LoadStats: map[string]ServerLoadData{ - "net": {Count: 20, Sum: 20}, - "disk": {Count: 20, Sum: 40}, - "cpu": {Count: 20, Sum: 60}, - "mem": {Count: 20, Sum: 80}, - }, - }, - localities[1]: { - RequestStats: RequestData{ - Succeeded: 40, - Errored: 20, - InProgress: 20, - Issued: 80, - }, - - LoadStats: map[string]ServerLoadData{ - "net": {Count: 40, Sum: 40}, - "disk": {Count: 40, Sum: 80}, - "cpu": {Count: 40, Sum: 120}, - "mem": {Count: 40, Sum: 160}, - }, - }, - }, - } - ) - - reportLoad := func(ls *perClusterStore) { - for category, count := range drops { - for i := 0; i < count; i++ { - ls.CallDropped(category) - } - } - for locality, data := range localityData { - for i := 0; i < data.start; i++ { - ls.CallStarted(locality) - } - for i := 0; i < data.success; i++ { - ls.CallFinished(locality, nil) - for n, d := range data.serverData { - ls.CallServerLoad(locality, n, d) - } - } - for i := 0; i < data.failure; i++ { - ls.CallFinished(locality, errTest) - } - } - } - - ls := perClusterStore{} - reportLoad(&ls) - gotStoreData := ls.stats() - if diff := cmp.Diff(wantStoreData, gotStoreData, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval")); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } - - // The above call to stats() should have reset all load reports except the - // inProgress rpc count. We are now going to push the same load data into - // the store. So, we should expect to see twice the count for inProgress. - for _, l := range localities { - ls := wantStoreData.LocalityStats[l] - ls.RequestStats.InProgress *= 2 - wantStoreData.LocalityStats[l] = ls - } - reportLoad(&ls) - gotStoreData = ls.stats() - if diff := cmp.Diff(wantStoreData, gotStoreData, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval")); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } -} - -var sortDataSlice = cmp.Transformer("SortDataSlice", func(in []*Data) []*Data { - out := append([]*Data(nil), in...) // Copy input to avoid mutating it - sort.Slice(out, - func(i, j int) bool { - if out[i].Cluster < out[j].Cluster { - return true - } - if out[i].Cluster == out[j].Cluster { - return out[i].Service < out[j].Service - } - return false - }, - ) - return out -}) - -// Test all load are returned for the given clusters, and all clusters are -// reported if no cluster is specified. -func TestStoreStats(t *testing.T) { - var ( - testClusters = []string{"c0", "c1", "c2"} - testServices = []string{"s0", "s1"} - testLocality = "test-locality" - ) - - store := NewStore() - for _, c := range testClusters { - for _, s := range testServices { - store.PerCluster(c, s).CallStarted(testLocality) - store.PerCluster(c, s).CallServerLoad(testLocality, "abc", 123) - store.PerCluster(c, s).CallDropped("dropped") - store.PerCluster(c, s).CallFinished(testLocality, nil) - } - } - - wantC0 := []*Data{ - { - Cluster: "c0", Service: "s0", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - { - Cluster: "c0", Service: "s1", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - } - // Call Stats with just "c0", this should return data for "c0", and not - // touch data for other clusters. - gotC0 := store.Stats([]string{"c0"}) - if diff := cmp.Diff(wantC0, gotC0, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval"), sortDataSlice); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } - - wantOther := []*Data{ - { - Cluster: "c1", Service: "s0", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - { - Cluster: "c1", Service: "s1", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - { - Cluster: "c2", Service: "s0", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - { - Cluster: "c2", Service: "s1", - TotalDrops: 1, Drops: map[string]uint64{"dropped": 1}, - LocalityStats: map[string]LocalityData{ - "test-locality": { - RequestStats: RequestData{Succeeded: 1, Issued: 1}, - LoadStats: map[string]ServerLoadData{"abc": {Count: 1, Sum: 123}}, - }, - }, - }, - } - // Call Stats with empty slice, this should return data for all the - // remaining clusters, and not include c0 (because c0 data was cleared). - gotOther := store.Stats(nil) - if diff := cmp.Diff(wantOther, gotOther, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval"), sortDataSlice); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } -} - -// Test the cases that if a cluster doesn't have load to report, its data is not -// appended to the slice returned by Stats(). -func TestStoreStatsEmptyDataNotReported(t *testing.T) { - var ( - testServices = []string{"s0", "s1"} - testLocality = "test-locality" - ) - - store := NewStore() - // "c0"'s RPCs all finish with success. - for _, s := range testServices { - store.PerCluster("c0", s).CallStarted(testLocality) - store.PerCluster("c0", s).CallFinished(testLocality, nil) - } - // "c1"'s RPCs never finish (always inprocess). - for _, s := range testServices { - store.PerCluster("c1", s).CallStarted(testLocality) - } - - want0 := []*Data{ - { - Cluster: "c0", Service: "s0", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{Succeeded: 1, Issued: 1}}, - }, - }, - { - Cluster: "c0", Service: "s1", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{Succeeded: 1, Issued: 1}}, - }, - }, - { - Cluster: "c1", Service: "s0", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{InProgress: 1, Issued: 1}}, - }, - }, - { - Cluster: "c1", Service: "s1", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{InProgress: 1, Issued: 1}}, - }, - }, - } - // Call Stats with empty slice, this should return data for all the - // clusters. - got0 := store.Stats(nil) - if diff := cmp.Diff(want0, got0, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval"), sortDataSlice); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } - - want1 := []*Data{ - { - Cluster: "c1", Service: "s0", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{InProgress: 1}}, - }, - }, - { - Cluster: "c1", Service: "s1", - LocalityStats: map[string]LocalityData{ - "test-locality": {RequestStats: RequestData{InProgress: 1}}, - }, - }, - } - // Call Stats with empty slice again, this should return data only for "c1", - // because "c0" data was cleared, but "c1" has in-progress RPCs. - got1 := store.Stats(nil) - if diff := cmp.Diff(want1, got1, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(Data{}, "ReportInterval"), sortDataSlice); diff != "" { - t.Errorf("store.stats() returned unexpected diff (-want +got):\n%s", diff) - } -} - -// TestStoreReportInterval verify that the load report interval gets -// calculated at every stats() call and is the duration between start of last -// load reporting to next stats() call. -func TestStoreReportInterval(t *testing.T) { - originaltimeNow := timeNow - t.Cleanup(func() { timeNow = originaltimeNow }) - - // Initial time for reporter creation - currentTime := time.Now() - timeNow = func() time.Time { - return currentTime - } - - store := NewStore() - reporter := store.PerCluster("test-cluster", "test-service") - // Report dummy drop to ensure stats1 is not nil. - reporter.CallDropped("dummy-category") - - // Update currentTime to simulate the passage of time between the reporter - // creation and first stats() call. - currentTime = currentTime.Add(5 * time.Second) - stats1 := store.Stats(nil) - - if len(stats1) == 0 { - t.Fatalf("stats1 is empty after reporting a drop, want non-nil") - } - // Verify Stats() call calculate the report interval from the time of - // reporter creation. - if got, want := stats1[0].ReportInterval, 5*time.Second; got != want { - t.Errorf("stats1[0].ReportInterval = %v, want %v", stats1[0].ReportInterval, want) - } - - // Update currentTime to simulate the passage of time between the first - // and second stats() call. - currentTime = currentTime.Add(10 * time.Second) - // Report another dummy drop to ensure stats2 is not nil. - reporter.CallDropped("dummy-category-2") - stats2 := store.Stats(nil) - - if len(stats2) == 0 { - t.Fatalf("stats2 is empty after reporting a drop, want non-nil") - } - // Verify Stats() call calculate the report interval from the time of first - // Stats() call. - if got, want := stats2[0].ReportInterval, 10*time.Second; got != want { - t.Errorf("stats2[0].ReportInterval = %v, want %v", stats2[0].ReportInterval, want) - } -} diff --git a/xds/internal/xdsclient/metrics_test.go b/xds/internal/xdsclient/metrics_test.go index 369f7216411e..7a4ab0bd1934 100644 --- a/xds/internal/xdsclient/metrics_test.go +++ b/xds/internal/xdsclient/metrics_test.go @@ -32,6 +32,8 @@ import ( "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + + _ "google.golang.org/grpc/xds/internal/httpfilter/router" // Register the router filter. ) type noopListenerWatcher struct{} diff --git a/xds/internal/xdsclient/pool.go b/xds/internal/xdsclient/pool.go index 4a9c0e0922f3..113e6e8edd3c 100644 --- a/xds/internal/xdsclient/pool.go +++ b/xds/internal/xdsclient/pool.go @@ -25,15 +25,16 @@ import ( v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" estats "google.golang.org/grpc/experimental/stats" - "google.golang.org/grpc/internal/backoff" istats "google.golang.org/grpc/internal/stats" "google.golang.org/grpc/internal/xds/bootstrap" + "google.golang.org/protobuf/proto" ) var ( // DefaultPool is the default pool for xDS clients. It is created at init - // time by reading bootstrap configuration from env vars. - DefaultPool *Pool + // time and reads bootstrap configuration from env vars to create the xDS + // client. + DefaultPool = &Pool{clients: make(map[string]*clientImpl)} ) // Pool represents a pool of xDS clients that share the same bootstrap @@ -43,7 +44,7 @@ type Pool struct { // it to guard config as well since SetFallbackBootstrapConfig writes to // config. mu sync.Mutex - clients map[string]*clientRefCounted + clients map[string]*clientImpl config *bootstrap.Config } @@ -76,7 +77,7 @@ type OptionsForTesting struct { // bootstrap configuration), xDS client creation will fail. func NewPool(config *bootstrap.Config) *Pool { return &Pool{ - clients: make(map[string]*clientRefCounted), + clients: make(map[string]*clientImpl), config: config, } } @@ -89,7 +90,7 @@ func NewPool(config *bootstrap.Config) *Pool { // expected to invoke once they are done using the client. It is safe for the // caller to invoke this close function multiple times. func (p *Pool) NewClient(name string, metricsRecorder estats.MetricsRecorder) (XDSClient, func(), error) { - return p.newRefCounted(name, defaultWatchExpiryTimeout, backoff.DefaultExponential.Backoff, metricsRecorder) + return p.newRefCounted(name, metricsRecorder) } // NewClientForTesting returns an xDS client configured with the provided @@ -116,7 +117,12 @@ func (p *Pool) NewClientForTesting(opts OptionsForTesting) (XDSClient, func(), e if opts.MetricsRecorder == nil { opts.MetricsRecorder = istats.NewMetricsRecorderList(nil) } - return p.newRefCounted(opts.Name, opts.WatchExpiryTimeout, opts.StreamBackoffAfterFailure, opts.MetricsRecorder) + c, cancel, err := p.newRefCounted(opts.Name, opts.MetricsRecorder) + if err != nil { + return nil, nil, err + } + c.SetWatchExpiryTimeoutForTesting(opts.WatchExpiryTimeout) + return c, cancel, nil } // GetClientForTesting returns an xDS client created earlier using the given @@ -163,7 +169,15 @@ func (p *Pool) DumpResources() *v3statuspb.ClientStatusResponse { resp := &v3statuspb.ClientStatusResponse{} for key, client := range p.clients { - cfg := client.dumpResources() + b, err := client.DumpResources() + if err != nil { + return nil + } + r := &v3statuspb.ClientStatusResponse{} + if err := proto.Unmarshal(b, r); err != nil { + return nil + } + cfg := r.Config[0] cfg.ClientScope = key resp.Config = append(resp.Config, cfg) } @@ -203,19 +217,29 @@ func (p *Pool) clientRefCountedClose(name string) { return } delete(p.clients, name) + + client.Close() + for _, s := range client.bootstrapConfig.XDSServers() { + for _, f := range s.Cleanups() { + f() + } + } + for _, a := range client.bootstrapConfig.Authorities() { + for _, s := range a.XDSServers { + for _, f := range s.Cleanups() { + f() + } + } + } p.mu.Unlock() - // This attempts to close the transport to the management server and could - // theoretically call back into the xdsclient package again and deadlock. - // Hence, this needs to be called without holding the lock. - client.clientImpl.close() xdsClientImplCloseHook(name) } // newRefCounted creates a new reference counted xDS client implementation for // name, if one does not exist already. If an xDS client for the given name // exists, it gets a reference to it and returns it. -func (p *Pool) newRefCounted(name string, watchExpiryTimeout time.Duration, streamBackoff func(int) time.Duration, metricsRecorder estats.MetricsRecorder) (XDSClient, func(), error) { +func (p *Pool) newRefCounted(name string, metricsRecorder estats.MetricsRecorder) (*clientImpl, func(), error) { p.mu.Lock() defer p.mu.Unlock() @@ -246,17 +270,16 @@ func (p *Pool) newRefCounted(name string, watchExpiryTimeout time.Duration, stre return c, sync.OnceFunc(func() { p.clientRefCountedClose(name) }), nil } - c, err := newClientImpl(p.config, watchExpiryTimeout, streamBackoff, metricsRecorder, name) + c, err := newClientImpl(p.config, metricsRecorder, name) if err != nil { return nil, nil, err } if logger.V(2) { c.logger.Infof("Created client with name %q and bootstrap configuration:\n %s", name, p.config) } - client := &clientRefCounted{clientImpl: c, refCount: 1} - p.clients[name] = client + p.clients[name] = c xdsClientImplCreateHook(name) logger.Infof("xDS node ID: %s", p.config.Node().GetId()) - return client, sync.OnceFunc(func() { p.clientRefCountedClose(name) }), nil + return c, sync.OnceFunc(func() { p.clientRefCountedClose(name) }), nil } diff --git a/xds/internal/xdsclient/tests/ads_stream_ack_nack_test.go b/xds/internal/xdsclient/tests/ads_stream_ack_nack_test.go index 090faaa00de0..09e9b39edc0d 100644 --- a/xds/internal/xdsclient/tests/ads_stream_ack_nack_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_ack_nack_test.go @@ -41,6 +41,25 @@ import ( v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" ) +// Creates an xDS client with the given bootstrap contents. +func createXDSClient(t *testing.T, bootstrapContents []byte) xdsclient.XDSClient { + t.Helper() + + config, err := bootstrap.NewConfigFromContents(bootstrapContents) + if err != nil { + t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) + } + pool := xdsclient.NewPool(config) + client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ + Name: t.Name(), + }) + if err != nil { + t.Fatalf("Failed to create xDS client: %v", err) + } + t.Cleanup(close) + return client +} + // Tests simple ACK and NACK scenarios on the ADS stream: // 1. When a good response is received, i.e. once that is expected to be ACKed, // the test verifies that an ACK is sent matching the version and nonce from diff --git a/xds/internal/xdsclient/tests/ads_stream_backoff_test.go b/xds/internal/xdsclient/tests/ads_stream_backoff_test.go deleted file mode 100644 index 8b7c87072914..000000000000 --- a/xds/internal/xdsclient/tests/ads_stream_backoff_test.go +++ /dev/null @@ -1,453 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package xdsclient_test - -import ( - "context" - "errors" - "fmt" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/google/uuid" - "google.golang.org/grpc" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - "google.golang.org/protobuf/testing/protocmp" - - v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" - v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" -) - -// Creates an xDS client with the given bootstrap contents and backoff function. -func createXDSClientWithBackoff(t *testing.T, bootstrapContents []byte, streamBackoff func(int) time.Duration) xdsclient.XDSClient { - t.Helper() - - config, err := bootstrap.NewConfigFromContents(bootstrapContents) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - StreamBackoffAfterFailure: streamBackoff, - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - t.Cleanup(close) - return client -} - -// Tests the case where the management server returns an error in the ADS -// streaming RPC. Verifies that the ADS stream is restarted after a backoff -// period, and that the previously requested resources are re-requested on the -// new stream. -func (s) TestADS_BackoffAfterStreamFailure(t *testing.T) { - // Channels used for verifying different events in the test. - streamCloseCh := make(chan struct{}, 1) // ADS stream is closed. - ldsResourcesCh := make(chan []string, 1) // Listener resource names in the discovery request. - backoffCh := make(chan struct{}, 1) // Backoff after stream failure. - - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Create an xDS management server that returns RPC errors. - streamErr := errors.New("ADS stream error") - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ - OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { - // Push the requested resource names on to a channel. - if req.GetTypeUrl() == version.V3ListenerURL { - t.Logf("Received LDS request for resources: %v", req.GetResourceNames()) - select { - case ldsResourcesCh <- req.GetResourceNames(): - case <-ctx.Done(): - } - } - // Return an error everytime a request is sent on the stream. This - // should cause the transport to backoff before attempting to - // recreate the stream. - return streamErr - }, - // Push on a channel whenever the stream is closed. - OnStreamClosed: func(int64, *v3corepb.Node) { - select { - case streamCloseCh <- struct{}{}: - case <-ctx.Done(): - } - }, - }) - - // Override the backoff implementation to push on a channel that is read by - // the test goroutine. - backoffCtx, backoffCancel := context.WithCancel(ctx) - streamBackoff := func(int) time.Duration { - select { - case backoffCh <- struct{}{}: - case <-backoffCtx.Done(): - } - return 0 - } - defer backoffCancel() - - // Create an xDS client with bootstrap pointing to the above server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - client := createXDSClientWithBackoff(t, bc, streamBackoff) - - // Register a watch for a listener resource. - const listenerName = "listener" - lw := newListenerWatcher() - ldsCancel := xdsresource.WatchListener(client, listenerName, lw) - defer ldsCancel() - - // Verify that an ADS stream is created and an LDS request with the above - // resource name is sent. - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { - t.Fatal(err) - } - - // Verify that the received stream error is reported to the watcher. - if err := verifyListenerError(ctx, lw.updateCh, streamErr.Error(), nodeID); err != nil { - t.Fatal(err) - } - - // Verify that the stream is closed. - select { - case <-streamCloseCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for stream to be closed after an error") - } - - // Verify that the ADS stream backs off before recreating the stream. - select { - case <-backoffCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for ADS stream to backoff after stream failure") - } - - // Verify that the same resource name is re-requested on the new stream. - if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil { - t.Fatal(err) - } - - // To prevent indefinite blocking during xDS client close, which is caused - // by a blocking backoff channel write, cancel the backoff context early - // given that the test is complete. - backoffCancel() - -} - -// Tests the case where a stream breaks because the server goes down. Verifies -// that when the server comes back up, the same resources are re-requested, this -// time with the previously acked version and an empty nonce. -func (s) TestADS_RetriesAfterBrokenStream(t *testing.T) { - // Channels used for verifying different events in the test. - streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. - streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received. - - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Create an xDS management server listening on a local port. - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("Failed to create a local listener for the xDS management server: %v", err) - } - lis := testutils.NewRestartableListener(l) - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ - Listener: lis, - // Push the received request on to a channel for the test goroutine to - // verify that it matches expectations. - OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { - select { - case streamRequestCh <- req: - case <-ctx.Done(): - } - return nil - }, - // Push the response that the management server is about to send on to a - // channel. The test goroutine to uses this to extract the version and - // nonce, expected on subsequent requests. - OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) { - select { - case streamResponseCh <- resp: - case <-ctx.Done(): - } - }, - }) - - // Create a listener resource on the management server. - const listenerName = "listener" - const routeConfigName = "route-config" - nodeID := uuid.New().String() - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, routeConfigName)}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatal(err) - } - - // Override the backoff implementation to always return 0, to reduce test - // run time. Instead control when the backoff returns by blocking on a - // channel, that the test closes. - backoffCh := make(chan struct{}) - streamBackoff := func(int) time.Duration { - select { - case backoffCh <- struct{}{}: - case <-ctx.Done(): - } - return 0 - } - - // Create an xDS client with bootstrap pointing to the above server. - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - client := createXDSClientWithBackoff(t, bc, streamBackoff) - - // Register a watch for a listener resource. - lw := newListenerWatcher() - ldsCancel := xdsresource.WatchListener(client, listenerName, lw) - defer ldsCancel() - - // Verify that the initial discovery request matches expectation. - var gotReq *v3discoverypb.DiscoveryRequest - select { - case gotReq = <-streamRequestCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for discovery request on the stream") - } - wantReq := &v3discoverypb.DiscoveryRequest{ - VersionInfo: "", - Node: &v3corepb.Node{ - Id: nodeID, - UserAgentName: "gRPC Go", - UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version}, - ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, - }, - ResourceNames: []string{listenerName}, - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - ResponseNonce: "", - } - if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { - t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) - } - - // Capture the version and nonce from the response. - var gotResp *v3discoverypb.DiscoveryResponse - select { - case gotResp = <-streamResponseCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for discovery response on the stream") - } - version := gotResp.GetVersionInfo() - nonce := gotResp.GetNonce() - - // Verify that the ACK contains the appropriate version and nonce. - wantReq.VersionInfo = version - wantReq.ResponseNonce = nonce - select { - case gotReq = <-streamRequestCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for the discovery request ACK on the stream") - } - if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { - t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) - } - - // Verify the update received by the watcher. - wantUpdate := listenerUpdateErrTuple{ - update: xdsresource.ListenerUpdate{ - RouteConfigName: routeConfigName, - HTTPFilters: []xdsresource.HTTPFilter{{Name: "router"}}, - }, - } - if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate); err != nil { - t.Fatal(err) - } - - // Bring down the management server to simulate a broken stream. - lis.Stop() - - // Verify that the error callback on the watcher is not invoked. - verifyNoListenerUpdate(ctx, lw.updateCh) - - // Wait for backoff to kick in, and unblock the first backoff attempt. - select { - case <-backoffCh: - case <-ctx.Done(): - t.Fatal("Timeout waiting for stream backoff") - } - - // Bring up the management server. The test does not have prcecise control - // over when new streams to the management server will start succeeding. The - // ADS stream implementation will backoff as many times as required before - // it can successfully create a new stream. Therefore, we need to receive on - // the backoffCh as many times as required, and unblock the backoff - // implementation. - lis.Restart() - go func() { - for { - select { - case <-backoffCh: - case <-ctx.Done(): - return - } - } - }() - - // Verify that the transport creates a new stream and sends out a new - // request which contains the previously acked version, but an empty nonce. - wantReq.ResponseNonce = "" - select { - case gotReq = <-streamRequestCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for the discovery request ACK on the stream") - } - if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { - t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) - } -} - -// Tests the case where a resource is requested before the a valid ADS stream -// exists. Verifies that the a discovery request is sent out for the previously -// requested resource once a valid stream is created. -func (s) TestADS_ResourceRequestedBeforeStreamCreation(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Channels used for verifying different events in the test. - streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received. - - // Create an xDS management server listening on a local port. - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("Failed to create a local listener: %v", err) - } - lis := testutils.NewRestartableListener(l) - streamErr := errors.New("ADS stream error") - - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ - Listener: lis, - - // Return an error everytime a request is sent on the stream. This - // should cause the transport to backoff before attempting to recreate - // the stream. - OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { - select { - case streamRequestCh <- req: - default: - } - return streamErr - }, - }) - - // Bring down the management server before creating the transport. This - // allows us to test the case where SendRequest() is called when there is no - // stream to the management server. - lis.Stop() - - // Override the backoff implementation to always return 0, to reduce test - // run time. Instead control when the backoff returns by blocking on a - // channel, that the test closes. - backoffCh := make(chan struct{}, 1) - unblockBackoffCh := make(chan struct{}) - streamBackoff := func(int) time.Duration { - select { - case backoffCh <- struct{}{}: - default: - } - <-unblockBackoffCh - return 0 - } - - // Create an xDS client with bootstrap pointing to the above server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - client := createXDSClientWithBackoff(t, bc, streamBackoff) - - // Register a watch for a listener resource. - const listenerName = "listener" - lw := newListenerWatcher() - ldsCancel := xdsresource.WatchListener(client, listenerName, lw) - defer ldsCancel() - - // The above watch results in an attempt to create a new stream, which will - // fail, and will result in backoff. Wait for backoff to kick in. - select { - case <-backoffCh: - case <-ctx.Done(): - t.Fatal("Timeout waiting for stream backoff") - } - - // Bring up the connection to the management server, and unblock the backoff - // implementation. - lis.Restart() - close(unblockBackoffCh) - - // Verify that the initial discovery request matches expectation. - var gotReq *v3discoverypb.DiscoveryRequest - select { - case gotReq = <-streamRequestCh: - case <-ctx.Done(): - t.Fatalf("Timeout waiting for discovery request on the stream") - } - wantReq := &v3discoverypb.DiscoveryRequest{ - VersionInfo: "", - Node: &v3corepb.Node{ - Id: nodeID, - UserAgentName: "gRPC Go", - UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version}, - ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"}, - }, - ResourceNames: []string{listenerName}, - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - ResponseNonce: "", - } - if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" { - t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff) - } -} - -// waitForResourceNames waits for the wantNames to be received on namesCh. -// Returns a non-nil error if the context expires before that. -func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error { - t.Helper() - - var lastRequestedNames []string - for ; ; <-time.After(defaultTestShortTimeout) { - select { - case <-ctx.Done(): - return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames) - case gotNames := <-namesCh: - if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) { - return nil - } - lastRequestedNames = gotNames - } - } -} diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go deleted file mode 100644 index 09a39a5ddc8c..000000000000 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ /dev/null @@ -1,624 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package xdsclient_test - -import ( - "context" - "errors" - "slices" - "sort" - "testing" - "time" - - "github.com/google/uuid" - "google.golang.org/grpc" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient" - xdsclientinternal "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - - v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" - v3adsgrpc "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" -) - -// blockingListenerWatcher implements xdsresource.ListenerWatcher. It writes to -// a channel when it receives a callback from the watch. It also makes the -// DoneNotifier passed to the callback available to the test, thereby enabling -// the test to block this watcher for as long as required. -type blockingListenerWatcher struct { - doneNotifierCh chan func() // DoneNotifier passed to the callback. - updateCh chan struct{} // Written to when an update is received. - ambientErrCh chan struct{} // Written to when an ambient error is received. - resourceErrCh chan struct{} // Written to when a resource error is received. -} - -func newBLockingListenerWatcher() *blockingListenerWatcher { - return &blockingListenerWatcher{ - doneNotifierCh: make(chan func(), 1), - updateCh: make(chan struct{}, 1), - ambientErrCh: make(chan struct{}, 1), - resourceErrCh: make(chan struct{}, 1), - } -} - -func (lw *blockingListenerWatcher) ResourceChanged(_ *xdsresource.ListenerResourceData, done func()) { - // Notify receipt of the update. - select { - case lw.updateCh <- struct{}{}: - default: - } - - select { - case lw.doneNotifierCh <- done: - default: - } -} - -func (lw *blockingListenerWatcher) ResourceError(_ error, done func()) { - // Notify receipt of an error. - select { - case lw.resourceErrCh <- struct{}{}: - default: - } - - select { - case lw.doneNotifierCh <- done: - default: - } -} - -func (lw *blockingListenerWatcher) AmbientError(_ error, done func()) { - // Notify receipt of an error. - select { - case lw.ambientErrCh <- struct{}{}: - default: - } - - select { - case lw.doneNotifierCh <- done: - default: - } -} - -type wrappedADSStream struct { - v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient - recvCh chan struct{} - doneCh <-chan struct{} -} - -func newWrappedADSStream(stream v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient, doneCh <-chan struct{}) *wrappedADSStream { - return &wrappedADSStream{ - AggregatedDiscoveryService_StreamAggregatedResourcesClient: stream, - recvCh: make(chan struct{}, 1), - doneCh: doneCh, - } -} - -func (w *wrappedADSStream) Recv() (*v3discoverypb.DiscoveryResponse, error) { - select { - case w.recvCh <- struct{}{}: - case <-w.doneCh: - return nil, errors.New("Recv() called after the test has finished") - } - return w.AggregatedDiscoveryService_StreamAggregatedResourcesClient.Recv() -} - -// Overrides the function to create a new ADS stream (used by the xdsclient -// transport), and returns a wrapped ADS stream, where the test can monitor -// Recv() calls. -func overrideADSStreamCreation(t *testing.T) chan *wrappedADSStream { - t.Helper() - - adsStreamCh := make(chan *wrappedADSStream, 1) - origNewADSStream := xdsclientinternal.NewADSStream - xdsclientinternal.NewADSStream = func(ctx context.Context, cc *grpc.ClientConn) (v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient, error) { - s, err := v3adsgrpc.NewAggregatedDiscoveryServiceClient(cc).StreamAggregatedResources(ctx) - if err != nil { - return nil, err - } - ws := newWrappedADSStream(s, ctx.Done()) - select { - case adsStreamCh <- ws: - default: - } - return ws, nil - } - t.Cleanup(func() { xdsclientinternal.NewADSStream = origNewADSStream }) - return adsStreamCh -} - -// Creates an xDS client with the given bootstrap contents. -func createXDSClient(t *testing.T, bootstrapContents []byte) xdsclient.XDSClient { - t.Helper() - - config, err := bootstrap.NewConfigFromContents(bootstrapContents) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - t.Cleanup(close) - return client -} - -// Tests ADS stream level flow control with a single resource. The test does the -// following: -// - Starts a management server and configures a listener resource on it. -// - Creates an xDS client to the above management server, starts a couple of -// listener watchers for the above resource, and verifies that the update -// reaches these watchers. -// - These watchers don't invoke the onDone callback until explicitly -// triggered by the test. This allows the test to verify that the next -// Recv() call on the ADS stream does not happen until both watchers have -// completely processed the update, i.e invoke the onDone callback. -// - Resource is updated on the management server, and the test verifies that -// the update reaches the watchers. -func (s) TestADSFlowControl_ResourceUpdates_SingleResource(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Override the ADS stream creation. - adsStreamCh := overrideADSStreamCreation(t) - - // Start an xDS management server. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - client := createXDSClient(t, bc) - - // Configure two watchers for the same listener resource. - const listenerResourceName = "test-listener-resource" - const routeConfigurationName = "test-route-configuration-resource" - watcher1 := newBLockingListenerWatcher() - cancel1 := xdsresource.WatchListener(client, listenerResourceName, watcher1) - defer cancel1() - watcher2 := newBLockingListenerWatcher() - cancel2 := xdsresource.WatchListener(client, listenerResourceName, watcher2) - defer cancel2() - - // Wait for the wrapped ADS stream to be created. - var adsStream *wrappedADSStream - select { - case adsStream = <-adsStreamCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be created") - } - - // Configure the listener resource on the management server. - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be read from") - } - - // Wait for the update to reach the watchers. - select { - case <-watcher1.updateCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for update to reach watcher 1") - } - select { - case <-watcher2.updateCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for update to reach watcher 2") - } - - // Update the listener resource on the management server to point to a new - // route configuration resource. - resources = e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, "new-route")}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Unblock one watcher. - onDone := <-watcher1.doneNotifierCh - onDone() - - // Wait for a short duration and ensure that there is no read on the stream. - select { - case <-adsStream.recvCh: - t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") - case <-time.After(defaultTestShortTimeout): - } - - // Unblock the second watcher. - onDone = <-watcher2.doneNotifierCh - onDone() - - // Ensure that there is a read on the stream, now that the previous update - // has been consumed by all watchers. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") - } - - // Wait for the new update to reach the watchers. - select { - case <-watcher1.updateCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for update to reach watcher 1") - } - select { - case <-watcher2.updateCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for update to reach watcher 2") - } - - // At this point, the xDS client is shut down (and the associated transport - // is closed) without the watchers invoking their respective onDone - // callbacks. This verifies that the closing a transport that has pending - // watchers does not block. -} - -// Tests ADS stream level flow control with a multiple resources. The test does -// the following: -// - Starts a management server and configures two listener resources on it. -// - Creates an xDS client to the above management server, starts a couple of -// listener watchers for the two resources, and verifies that the update -// reaches these watchers. -// - These watchers don't invoke the onDone callback until explicitly -// triggered by the test. This allows the test to verify that the next -// Recv() call on the ADS stream does not happen until both watchers have -// completely processed the update, i.e invoke the onDone callback. -func (s) TestADSFlowControl_ResourceUpdates_MultipleResources(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Override the ADS stream creation. - adsStreamCh := overrideADSStreamCreation(t) - - // Start an xDS management server. - const listenerResourceName1 = "test-listener-resource-1" - const listenerResourceName2 = "test-listener-resource-2" - wantResourceNames := []string{listenerResourceName1, listenerResourceName2} - requestCh := make(chan struct{}, 1) - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ - OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { - if req.GetTypeUrl() != version.V3ListenerURL { - return nil - } - gotResourceNames := req.GetResourceNames() - sort.Slice(gotResourceNames, func(i, j int) bool { return req.ResourceNames[i] < req.ResourceNames[j] }) - if slices.Equal(gotResourceNames, wantResourceNames) { - // The two resource names will be part of the initial request - // and also the ACK. Hence, we need to make this write - // non-blocking. - select { - case requestCh <- struct{}{}: - default: - } - } - return nil - }, - }) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - client := createXDSClient(t, bc) - - // Configure two watchers for two different listener resources. - const routeConfigurationName1 = "test-route-configuration-resource-1" - watcher1 := newBLockingListenerWatcher() - cancel1 := xdsresource.WatchListener(client, listenerResourceName1, watcher1) - defer cancel1() - const routeConfigurationName2 = "test-route-configuration-resource-2" - watcher2 := newBLockingListenerWatcher() - cancel2 := xdsresource.WatchListener(client, listenerResourceName2, watcher2) - defer cancel2() - - // Wait for the wrapped ADS stream to be created. - var adsStream *wrappedADSStream - select { - case adsStream = <-adsStreamCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be created") - } - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be read from") - } - - // Wait for both resource names to be requested. - select { - case <-requestCh: - case <-ctx.Done(): - t.Fatal("Timed out waiting for both resource names to be requested") - } - - // Configure the listener resources on the management server. - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{ - e2e.DefaultClientListener(listenerResourceName1, routeConfigurationName1), - e2e.DefaultClientListener(listenerResourceName2, routeConfigurationName2), - }, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // At this point, we expect the management server to send both resources in - // the same response. So, both watchers would be notified at the same time, - // and no more Recv() calls should happen until both of them have invoked - // their respective onDone() callbacks. - - // The order of callback invocations among the two watchers is not - // guaranteed. So, we select on both of them and unblock the first watcher - // whose callback is invoked. - var otherWatcherUpdateCh chan struct{} - var otherWatcherDoneCh chan func() - select { - case <-watcher1.updateCh: - onDone := <-watcher1.doneNotifierCh - onDone() - otherWatcherUpdateCh = watcher2.updateCh - otherWatcherDoneCh = watcher2.doneNotifierCh - case <-watcher2.updateCh: - onDone := <-watcher2.doneNotifierCh - onDone() - otherWatcherUpdateCh = watcher1.updateCh - otherWatcherDoneCh = watcher1.doneNotifierCh - case <-ctx.Done(): - t.Fatal("Timed out waiting for update to reach first watchers") - } - - // Wait for a short duration and ensure that there is no read on the stream. - select { - case <-adsStream.recvCh: - t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") - case <-time.After(defaultTestShortTimeout): - } - - // Wait for the update on the second watcher and unblock it. - select { - case <-otherWatcherUpdateCh: - onDone := <-otherWatcherDoneCh - onDone() - case <-ctx.Done(): - t.Fatal("Timed out waiting for update to reach second watcher") - } - - // Ensure that there is a read on the stream, now that the previous update - // has been consumed by all watchers. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") - } -} - -// Test ADS stream flow control with a single resource that is expected to be -// NACKed by the xDS client and the watcher's ResourceError() callback is -// expected to be invoked because resource is not cached. Verifies that no -// further reads are attempted until the error is completely processed by the -// watcher. -func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Override the ADS stream creation. - adsStreamCh := overrideADSStreamCreation(t) - - // Start an xDS management server. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - client := createXDSClient(t, bc) - - // Configure a watcher for a listener resource. - const listenerResourceName = "test-listener-resource" - watcher := newBLockingListenerWatcher() - cancel = xdsresource.WatchListener(client, listenerResourceName, watcher) - defer cancel() - - // Wait for the wrapped ADS stream to be created. - var adsStream *wrappedADSStream - select { - case adsStream = <-adsStreamCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be created") - } - - // Configure the management server to return a single listener resource - // which is expected to be NACKed by the client. - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{badListenerResource(t, listenerResourceName)}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be read from") - } - - // Wait for the resource error to reach the watcher. - select { - case <-watcher.resourceErrCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for error to reach watcher") - } - - // Wait for a short duration and ensure that there is no read on the stream. - select { - case <-adsStream.recvCh: - t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") - case <-time.After(defaultTestShortTimeout): - } - - // Unblock one watcher. - onDone := <-watcher.doneNotifierCh - onDone() - - // Ensure that there is a read on the stream, now that the previous error - // has been consumed by the watcher. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream after all watchers have processed the previous update") - } -} - -// Test ADS stream flow control with a single resource that is deleted from the -// management server and therefore the watcher's ResourceError() -// callback is expected to be invoked. Verifies that no further reads are -// attempted until the callback is completely handled by the watcher. -func (s) TestADSFlowControl_ResourceDoesNotExist(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Override the ADS stream creation. - adsStreamCh := overrideADSStreamCreation(t) - - // Start an xDS management server. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - client := createXDSClient(t, bc) - - // Configure a watcher for a listener resource. - const listenerResourceName = "test-listener-resource" - const routeConfigurationName = "test-route-configuration-resource" - watcher := newBLockingListenerWatcher() - cancel = xdsresource.WatchListener(client, listenerResourceName, watcher) - defer cancel() - - // Wait for the wrapped ADS stream to be created. - var adsStream *wrappedADSStream - select { - case adsStream = <-adsStreamCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for ADS stream to be created") - } - - // Configure the listener resource on the management server. - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") - } - - // Wait for the update to reach the watcher and unblock it. - select { - case <-watcher.updateCh: - onDone := <-watcher.doneNotifierCh - onDone() - case <-ctx.Done(): - t.Fatalf("Timed out waiting for update to reach watcher 1") - } - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") - } - - // Remove the listener resource on the management server. - resources = e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Wait for the resource not found callback to be invoked. - select { - case <-watcher.resourceErrCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for resource not found callback to be invoked on the watcher") - } - - // Wait for a short duration and ensure that there is no read on the stream. - select { - case <-adsStream.recvCh: - t.Fatal("Recv() called on the ADS stream before all watchers have processed the previous update") - case <-time.After(defaultTestShortTimeout): - } - - // Unblock the watcher. - onDone := <-watcher.doneNotifierCh - onDone() - - // Ensure that there is a read on the stream. - select { - case <-adsStream.recvCh: - case <-ctx.Done(): - t.Fatalf("Timed out waiting for Recv() to be called on the ADS stream") - } -} diff --git a/xds/internal/xdsclient/tests/ads_stream_restart_test.go b/xds/internal/xdsclient/tests/ads_stream_restart_test.go index 522ecae6bfa5..a53f96fb6623 100644 --- a/xds/internal/xdsclient/tests/ads_stream_restart_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_restart_test.go @@ -20,8 +20,12 @@ package xdsclient_test import ( "context" + "fmt" "testing" + "time" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/google/uuid" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/xds/e2e" @@ -36,6 +40,25 @@ import ( v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" ) +// waitForResourceNames waits for the wantNames to be received on namesCh. +// Returns a non-nil error if the context expires before that. +func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error { + t.Helper() + + var lastRequestedNames []string + for ; ; <-time.After(defaultTestShortTimeout) { + select { + case <-ctx.Done(): + return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames) + case gotNames := <-namesCh: + if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) { + return nil + } + lastRequestedNames = gotNames + } + } +} + // Tests that an ADS stream is restarted after a connection failure. Also // verifies that if there were any watches registered before the connection // failed, those resources are re-requested after the stream is restarted. diff --git a/xds/internal/xdsclient/tests/ads_stream_watch_test.go b/xds/internal/xdsclient/tests/ads_stream_watch_test.go deleted file mode 100644 index 2672b0f29820..000000000000 --- a/xds/internal/xdsclient/tests/ads_stream_watch_test.go +++ /dev/null @@ -1,209 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package xdsclient_test - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/google/uuid" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/internal/xds/bootstrap" - xdsinternal "google.golang.org/grpc/xds/internal" - "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/transport/ads" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - - v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" -) - -// Tests the state transitions of the resource specific watch state within the -// ADS stream, specifically when the stream breaks (for both resources that have -// been previously received and for resources that are yet to be received). -func (s) TestADS_WatchState_StreamBreaks(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Create an xDS management server with a restartable listener. - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("Failed to create a local listener for the xDS management server: %v", err) - } - lis := testutils.NewRestartableListener(l) - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis}) - - // Create an xDS client with bootstrap pointing to the above server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - client := createXDSClient(t, bc) - - // Create a watch for the first listener resource and verify that the timer - // is running and the watch state is `requested`. - const listenerName1 = "listener1" - ldsCancel1 := xdsresource.WatchListener(client, listenerName1, noopListenerWatcher{}) - defer ldsCancel1() - if err := waitForResourceWatchState(ctx, client, listenerName1, ads.ResourceWatchStateRequested, true); err != nil { - t.Fatal(err) - } - - // Configure the first resource on the management server. This should result - // in the resource being pushed to the xDS client and should result in the - // timer getting stopped and the watch state moving to `received`. - const routeConfigName = "route-config" - listenerResource1 := e2e.DefaultClientListener(listenerName1, routeConfigName) - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{listenerResource1}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatal(err) - } - if err := waitForResourceWatchState(ctx, client, listenerName1, ads.ResourceWatchStateReceived, false); err != nil { - t.Fatal(err) - } - - // Create a watch for the second listener resource and verify that the timer - // is running and the watch state is `requested`. - const listenerName2 = "listener2" - ldsCancel2 := xdsresource.WatchListener(client, listenerName2, noopListenerWatcher{}) - defer ldsCancel2() - if err := waitForResourceWatchState(ctx, client, listenerName2, ads.ResourceWatchStateRequested, true); err != nil { - t.Fatal(err) - } - - // Stop the server to break the ADS stream. Since the first resource was - // already received, this should not change anything for it. But for the - // second resource, it should result in the timer getting stopped and the - // watch state moving to `started`. - lis.Stop() - if err := waitForResourceWatchState(ctx, client, listenerName2, ads.ResourceWatchStateStarted, false); err != nil { - t.Fatal(err) - } - if err := verifyResourceWatchState(client, listenerName1, ads.ResourceWatchStateReceived, false); err != nil { - t.Fatal(err) - } - - // Restart the server and verify that the timer is running and the watch - // state is `requested`, for the second resource. For the first resource, - // nothing should change. - lis.Restart() - if err := waitForResourceWatchState(ctx, client, listenerName2, ads.ResourceWatchStateRequested, true); err != nil { - t.Fatal(err) - } - if err := verifyResourceWatchState(client, listenerName1, ads.ResourceWatchStateReceived, false); err != nil { - t.Fatal(err) - } - - // Configure the second resource on the management server. This should result - // in the resource being pushed to the xDS client and should result in the - // timer getting stopped and the watch state moving to `received`. - listenerResource2 := e2e.DefaultClientListener(listenerName2, routeConfigName) - resources = e2e.UpdateOptions{ - NodeID: nodeID, - Listeners: []*v3listenerpb.Listener{listenerResource1, listenerResource2}, - SkipValidation: true, - } - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatal(err) - } - if err := waitForResourceWatchState(ctx, client, listenerName2, ads.ResourceWatchStateReceived, false); err != nil { - t.Fatal(err) - } -} - -// Tests the behavior of the xDS client when a resource watch timer expires and -// verifies the resource watch state transitions as expected. -func (s) TestADS_WatchState_TimerFires(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Start an xDS management server. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create an xDS client with bootstrap pointing to the above server, and a - // short resource expiry timeout. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - config, err := bootstrap.NewConfigFromContents(bc) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - WatchExpiryTimeout: defaultTestWatchExpiryTimeout, - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - // Create a watch for the first listener resource and verify that the timer - // is running and the watch state is `requested`. - const listenerName = "listener" - ldsCancel1 := xdsresource.WatchListener(client, listenerName, noopListenerWatcher{}) - defer ldsCancel1() - if err := waitForResourceWatchState(ctx, client, listenerName, ads.ResourceWatchStateRequested, true); err != nil { - t.Fatal(err) - } - - // Since the resource is not configured on the management server, the watch - // expiry timer is expected to fire, and the watch state should move to - // `timeout`. - if err := waitForResourceWatchState(ctx, client, listenerName, ads.ResourceWatchStateTimeout, false); err != nil { - t.Fatal(err) - } -} - -func waitForResourceWatchState(ctx context.Context, client xdsclient.XDSClient, resourceName string, wantState ads.WatchState, wantTimer bool) error { - var lastErr error - for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { - err := verifyResourceWatchState(client, resourceName, wantState, wantTimer) - if err == nil { - break - } - lastErr = err - } - if ctx.Err() != nil { - return fmt.Errorf("timeout when waiting for expected watch state for resource %q: %v", resourceName, lastErr) - } - return nil -} - -func verifyResourceWatchState(client xdsclient.XDSClient, resourceName string, wantState ads.WatchState, wantTimer bool) error { - resourceWatchStateForTesting := internal.ResourceWatchStateForTesting.(func(xdsclient.XDSClient, xdsresource.Type, string) (ads.ResourceWatchState, error)) - listenerResourceType := xdsinternal.ResourceTypeMapForTesting[version.V3ListenerURL].(xdsresource.Type) - gotState, err := resourceWatchStateForTesting(client, listenerResourceType, resourceName) - if err != nil { - return fmt.Errorf("failed to get watch state for resource %q: %v", resourceName, err) - } - if gotState.State != wantState { - return fmt.Errorf("watch state for resource %q is %v, want %v", resourceName, gotState.State, wantState) - } - if (gotState.ExpiryTimer != nil) != wantTimer { - return fmt.Errorf("expiry timer for resource %q is %t, want %t", resourceName, gotState.ExpiryTimer != nil, wantTimer) - } - return nil -} diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index 9118650a27c3..3df630e5be8d 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -111,8 +111,8 @@ func verifyClusterUpdate(ctx context.Context, updateCh *testutils.Channel, wantU } got := u.(clusterUpdateErrTuple) if wantUpdate.err != nil { - if gotType, wantType := xdsresource.ErrType(got.err), xdsresource.ErrType(wantUpdate.err); gotType != wantType { - return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) + if got.err == nil || !strings.Contains(got.err.Error(), wantUpdate.err.Error()) { + return fmt.Errorf("update received with error: %v, want %q", got.err, wantUpdate.err) } } cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.ClusterUpdate{}, "Raw", "LBPolicy", "TelemetryLabels")} diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index d6f64077423f..d551562c1606 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -33,7 +33,7 @@ import ( "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/xds/e2e" "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" "google.golang.org/protobuf/types/known/wrapperspb" @@ -121,8 +121,8 @@ func verifyEndpointsUpdate(ctx context.Context, updateCh *testutils.Channel, wan } got := u.(endpointsUpdateErrTuple) if wantUpdate.err != nil { - if gotType, wantType := xdsresource.ErrType(got.err), xdsresource.ErrType(wantUpdate.err); gotType != wantType { - return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) + if got.err == nil || !strings.Contains(got.err.Error(), wantUpdate.err.Error()) { + return fmt.Errorf("update received with error: %v, want %q", got.err, wantUpdate.err) } } cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.EndpointsUpdate{}, "Raw")} @@ -177,7 +177,7 @@ func (s) TestEDSWatch(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -200,7 +200,7 @@ func (s) TestEDSWatch(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -339,7 +339,7 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -355,7 +355,7 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost2, edsPort2)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -377,7 +377,7 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -393,7 +393,7 @@ func (s) TestEDSWatch_TwoWatchesForSameResourceName(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost2, edsPort2)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -591,7 +591,7 @@ func (s) TestEDSWatch_ThreeWatchesForDifferentResourceNames(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -682,7 +682,7 @@ func (s) TestEDSWatch_ResourceCaching(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -814,7 +814,7 @@ func (s) TestEDSWatch_ValidResponseCancelsExpiryTimerBehavior(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", @@ -977,7 +977,7 @@ func (s) TestEDSWatch_PartialValid(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{fmt.Sprintf("%s:%d", edsHost1, edsPort1)}, Weight: 1}}, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", diff --git a/xds/internal/xdsclient/tests/federation_watchers_test.go b/xds/internal/xdsclient/tests/federation_watchers_test.go index 92eb48fe13fa..7f9babde5e51 100644 --- a/xds/internal/xdsclient/tests/federation_watchers_test.go +++ b/xds/internal/xdsclient/tests/federation_watchers_test.go @@ -26,7 +26,7 @@ import ( "github.com/google/uuid" "google.golang.org/grpc/internal/testutils/xds/e2e" "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" @@ -293,7 +293,7 @@ func (s) TestFederation_EndpointsResourceContextParamOrder(t *testing.T) { { Endpoints: []xdsresource.Endpoint{{Addresses: []string{"localhost:666"}, Weight: 1}}, Weight: 1, - ID: internal.LocalityID{ + ID: clients.Locality{ Region: "region-1", Zone: "zone-1", SubZone: "subzone-1", diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index 7e56374aa96f..ff40de28acf8 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -161,8 +161,8 @@ func verifyListenerUpdate(ctx context.Context, updateCh *testutils.Channel, want } got := u.(listenerUpdateErrTuple) if wantUpdate.err != nil { - if gotType, wantType := xdsresource.ErrType(got.err), xdsresource.ErrType(wantUpdate.err); gotType != wantType { - return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) + if got.err == nil || !strings.Contains(got.err.Error(), wantUpdate.err.Error()) { + return fmt.Errorf("update received with error: %v, want %q", got.err, wantUpdate.err) } } cmpOpts := []cmp.Option{ @@ -176,21 +176,6 @@ func verifyListenerUpdate(ctx context.Context, updateCh *testutils.Channel, want return nil } -func verifyListenerError(ctx context.Context, updateCh *testutils.Channel, wantErr, wantNodeID string) error { - u, err := updateCh.Receive(ctx) - if err != nil { - return fmt.Errorf("timeout when waiting for a listener error from the management server: %v", err) - } - gotErr := u.(listenerUpdateErrTuple).err - if gotErr == nil || !strings.Contains(gotErr.Error(), wantErr) { - return fmt.Errorf("update received with error: %v, want %q", gotErr, wantErr) - } - if !strings.Contains(gotErr.Error(), wantNodeID) { - return fmt.Errorf("update received with error: %v, want error with node ID: %q", gotErr, wantNodeID) - } - return nil -} - func verifyErrorType(ctx context.Context, updateCh *testutils.Channel, wantErrType xdsresource.ErrorType, wantNodeID string) error { u, err := updateCh.Receive(ctx) if err != nil { @@ -1072,7 +1057,7 @@ func (s) TestLDSWatch_NACKError(t *testing.T) { } // Verify that the expected error is propagated to the existing watcher. - if err := verifyErrorType(ctx, lw.updateCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + if err := verifyErrorType(ctx, lw.updateCh, xdsresource.ErrorTypeUnknown, nodeID); err != nil { t.Fatal(err) } @@ -1080,7 +1065,7 @@ func (s) TestLDSWatch_NACKError(t *testing.T) { lw2 := newListenerWatcher() ldsCancel2 := xdsresource.WatchListener(client, ldsName, lw2) defer ldsCancel2() - if err := verifyErrorType(ctx, lw2.updateCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + if err := verifyErrorType(ctx, lw2.updateCh, xdsresource.ErrorTypeUnknown, nodeID); err != nil { t.Fatal(err) } } @@ -1152,7 +1137,7 @@ func (s) TestLDSWatch_ResourceCaching_NACKError(t *testing.T) { } // Verify that the expected error is propagated to the existing watcher. - if err := verifyErrorType(ctx, lw1.updateCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + if err := verifyErrorType(ctx, lw1.updateCh, xdsresource.ErrorTypeUnknown, nodeID); err != nil { t.Fatal(err) } @@ -1165,7 +1150,7 @@ func (s) TestLDSWatch_ResourceCaching_NACKError(t *testing.T) { t.Fatal(err) } // Verify that the expected error is propagated to the existing watcher. - if err := verifyErrorType(ctx, lw2.updateCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + if err := verifyErrorType(ctx, lw2.updateCh, xdsresource.ErrorTypeUnknown, nodeID); err != nil { t.Fatal(err) } } @@ -1243,7 +1228,7 @@ func (s) TestLDSWatch_PartialValid(t *testing.T) { // Verify that the expected error is propagated to the watcher which // requested for the bad resource. // Verify that the expected error is propagated to the existing watcher. - if err := verifyErrorType(ctx, lw1.updateCh, xdsresource.ErrorTypeNACKed, nodeID); err != nil { + if err := verifyErrorType(ctx, lw1.updateCh, xdsresource.ErrorTypeUnknown, nodeID); err != nil { t.Fatal(err) } diff --git a/xds/internal/xdsclient/tests/loadreport_test.go b/xds/internal/xdsclient/tests/loadreport_test.go index c249a3ace07e..a93b5bc1cbee 100644 --- a/xds/internal/xdsclient/tests/loadreport_test.go +++ b/xds/internal/xdsclient/tests/loadreport_test.go @@ -35,6 +35,7 @@ import ( "google.golang.org/grpc/internal/testutils/xds/fakeserver" "google.golang.org/grpc/internal/xds/bootstrap" "google.golang.org/grpc/status" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/protobuf/testing/protocmp" v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" @@ -44,13 +45,13 @@ import ( ) const ( - testLocality1 = `{region="test-region1", zone="", sub_zone=""}` - testLocality2 = `{region="test-region2", zone="", sub_zone=""}` - testKey1 = "test-key1" - testKey2 = "test-key2" + testKey1 = "test-key1" + testKey2 = "test-key2" ) var ( + testLocality1 = clients.Locality{Region: "test-region1"} + testLocality2 = clients.Locality{Region: "test-region2"} toleranceCmpOpt = cmpopts.EquateApprox(0, 1e-5) ignoreOrderCmpOpt = protocmp.FilterField(&v3endpointpb.ClusterStats{}, "upstream_locality_stats", cmpopts.SortSlices(func(a, b protocmp.Message) bool { @@ -143,7 +144,9 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { // Call the load reporting API to report load to the first management // server, and ensure that a connection to the server is created. store1, lrsCancel1 := client.ReportLoad(serverCfg1) - defer lrsCancel1() + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + defer lrsCancel1(sCtx) if _, err := newConnChan1.Receive(ctx); err != nil { t.Fatal("Timeout when waiting for a connection to the first management server, after starting load reporting") } @@ -158,7 +161,9 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { // Call the load reporting API to report load to the second management // server, and ensure that a connection to the server is created. store2, lrsCancel2 := client.ReportLoad(serverCfg2) - defer lrsCancel2() + sCtx2, sCancel2 := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel2() + defer lrsCancel2(sCtx2) if _, err := newConnChan2.Receive(ctx); err != nil { t.Fatal("Timeout when waiting for a connection to the second management server, after starting load reporting") } @@ -171,7 +176,7 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { } // Push some loads on the received store. - store2.PerCluster("cluster", "eds").CallDropped("test") + store2.ReporterForCluster("cluster", "eds").CallDropped("test") // Ensure the initial load reporting request is received at the server. lrsServer := mgmtServer2.LRSServer @@ -226,7 +231,9 @@ func (s) TestReportLoad_ConnectionCreation(t *testing.T) { } // Cancel this load reporting stream, server should see error canceled. - lrsCancel2() + sCtx2, sCancel2 = context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel2() + lrsCancel2(sCtx2) // Server should receive a stream canceled error. There may be additional // load reports from the client in the channel. @@ -280,14 +287,14 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { } // Push some loads on the received store. - store1.PerCluster("cluster1", "eds1").CallDropped("test") - store1.PerCluster("cluster1", "eds1").CallStarted(testLocality1) - store1.PerCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 3.14) - store1.PerCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 2.718) - store1.PerCluster("cluster1", "eds1").CallFinished(testLocality1, nil) - store1.PerCluster("cluster1", "eds1").CallStarted(testLocality2) - store1.PerCluster("cluster1", "eds1").CallServerLoad(testLocality2, testKey2, 1.618) - store1.PerCluster("cluster1", "eds1").CallFinished(testLocality2, nil) + store1.ReporterForCluster("cluster1", "eds1").CallDropped("test") + store1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality1) + store1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 3.14) + store1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 2.718) + store1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality1, nil) + store1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality2) + store1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality2, testKey2, 1.618) + store1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality2, nil) // Ensure the initial load reporting request is received at the server. req, err := lrsServer.LRSRequestChan.Receive(ctx) @@ -367,7 +374,7 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { } // Push more loads. - store2.PerCluster("cluster2", "eds2").CallDropped("test") + store2.ReporterForCluster("cluster2", "eds2").CallDropped("test") // Ensure that loads are seen on the server. We need a loop here because // there could have been some requests from the client in the time between @@ -402,7 +409,9 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { // Cancel the first load reporting call, and ensure that the stream does not // close (because we have another call open). - cancel1() + sCtx1, sCancel1 := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel1() + cancel1(sCtx1) sCtx, sCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) defer sCancel() if _, err := lrsServer.LRSStreamCloseChan.Receive(sCtx); err != context.DeadlineExceeded { @@ -410,7 +419,9 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { } // Cancel the second load reporting call, and ensure the stream is closed. - cancel2() + sCtx2, sCancel2 := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel2() + cancel2(sCtx2) if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil { t.Fatal("Timeout waiting for LRS stream to close") } @@ -422,5 +433,7 @@ func (s) TestReportLoad_StreamCreation(t *testing.T) { if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) } - cancel3() + sCtx3, sCancel3 := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel3() + cancel3(sCtx3) } diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go deleted file mode 100644 index deaf5a96f97d..000000000000 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ /dev/null @@ -1,508 +0,0 @@ -/* - * - * Copyright 2022 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package xdsclient_test - -import ( - "context" - "encoding/json" - "fmt" - "strings" - "testing" - - "github.com/google/uuid" - "google.golang.org/grpc" - "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/internal/testutils/xds/e2e" - "google.golang.org/grpc/internal/testutils/xds/fakeserver" - "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal" - xdstestutils "google.golang.org/grpc/xds/internal/testutils" - "google.golang.org/grpc/xds/internal/xdsclient" - xdsclientinternal "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" - "google.golang.org/protobuf/types/known/anypb" - - v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" -) - -var ( - // Resource type implementations retrieved from the resource type map in the - // internal package, which is initialized when the individual resource types - // are created. - listenerResourceType = internal.ResourceTypeMapForTesting[version.V3ListenerURL].(xdsresource.Type) - routeConfigResourceType = internal.ResourceTypeMapForTesting[version.V3RouteConfigURL].(xdsresource.Type) -) - -// This route configuration watcher registers two watches corresponding to the -// names passed in at creation time on a valid update. -type testRouteConfigWatcher struct { - client xdsclient.XDSClient - name1, name2 string - rcw1, rcw2 *routeConfigWatcher - cancel1, cancel2 func() - updateCh *testutils.Channel -} - -func newTestRouteConfigWatcher(client xdsclient.XDSClient, name1, name2 string) *testRouteConfigWatcher { - return &testRouteConfigWatcher{ - client: client, - name1: name1, - name2: name2, - rcw1: newRouteConfigWatcher(), - rcw2: newRouteConfigWatcher(), - updateCh: testutils.NewChannel(), - } -} - -func (rw *testRouteConfigWatcher) ResourceChanged(update *xdsresource.RouteConfigResourceData, onDone func()) { - rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) - - rw.cancel1 = xdsresource.WatchRouteConfig(rw.client, rw.name1, rw.rcw1) - rw.cancel2 = xdsresource.WatchRouteConfig(rw.client, rw.name2, rw.rcw2) - onDone() -} - -func (rw *testRouteConfigWatcher) ResourceError(err error, onDone func()) { - // When used with a go-control-plane management server that continuously - // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in AmbientError() simplifies tests which will have - // access to the most recently received error. - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) - onDone() -} - -func (rw *testRouteConfigWatcher) AmbientError(err error, onDone func()) { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) - onDone() -} - -func (rw *testRouteConfigWatcher) cancel() { - rw.cancel1() - rw.cancel2() -} - -// TestWatchCallAnotherWatch tests the scenario where a watch is registered for -// a resource, and more watches are registered from the first watch's callback. -// The test verifies that this scenario does not lead to a deadlock. -func (s) TestWatchCallAnotherWatch(t *testing.T) { - // Start an xDS management server and set the option to allow it to respond - // to requests which only specify a subset of the configured resources. - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) - - nodeID := uuid.New().String() - authority := makeAuthorityName(t.Name()) - bc, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ - Servers: []byte(fmt.Sprintf(`[{ - "server_uri": %q, - "channel_creds": [{"type": "insecure"}] - }]`, mgmtServer.Address)), - Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), - Authorities: map[string]json.RawMessage{ - // Xdstp style resource names used in this test use a slash removed - // version of t.Name as their authority, and the empty config - // results in the top-level xds server configuration being used for - // this authority. - authority: []byte(`{}`), - }, - }) - if err != nil { - t.Fatalf("Failed to create bootstrap configuration: %v", err) - } - - // Create an xDS client with the above bootstrap contents. - config, err := bootstrap.NewConfigFromContents(bc) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - // Configure the management server to respond with route config resources. - ldsNameNewStyle := makeNewStyleLDSName(authority) - rdsNameNewStyle := makeNewStyleRDSName(authority) - resources := e2e.UpdateOptions{ - NodeID: nodeID, - Routes: []*v3routepb.RouteConfiguration{ - e2e.DefaultRouteConfig(rdsName, ldsName, cdsName), - e2e.DefaultRouteConfig(rdsNameNewStyle, ldsNameNewStyle, cdsName), - }, - SkipValidation: true, - } - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - if err := mgmtServer.Update(ctx, resources); err != nil { - t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) - } - - // Create a route configuration watcher that registers two more watches from - // the OnUpdate callback: - // - one for the same resource name as this watch, which would be - // satisfied from xdsClient cache - // - the other for a different resource name, which would be - // satisfied from the server - rw := newTestRouteConfigWatcher(client, rdsName, rdsNameNewStyle) - defer rw.cancel() - rdsCancel := xdsresource.WatchRouteConfig(client, rdsName, rw) - defer rdsCancel() - - // Verify the contents of the received update for the all watchers. - wantUpdate12 := routeConfigUpdateErrTuple{ - update: xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{ - { - Domains: []string{ldsName}, - Routes: []*xdsresource.Route{ - { - Prefix: newStringP("/"), - ActionType: xdsresource.RouteActionRoute, - WeightedClusters: map[string]xdsresource.WeightedCluster{cdsName: {Weight: 100}}, - }, - }, - }, - }, - }, - } - wantUpdate3 := routeConfigUpdateErrTuple{ - update: xdsresource.RouteConfigUpdate{ - VirtualHosts: []*xdsresource.VirtualHost{ - { - Domains: []string{ldsNameNewStyle}, - Routes: []*xdsresource.Route{ - { - Prefix: newStringP("/"), - ActionType: xdsresource.RouteActionRoute, - WeightedClusters: map[string]xdsresource.WeightedCluster{cdsName: {Weight: 100}}, - }, - }, - }, - }, - }, - } - if err := verifyRouteConfigUpdate(ctx, rw.updateCh, wantUpdate12); err != nil { - t.Fatal(err) - } - if err := verifyRouteConfigUpdate(ctx, rw.rcw1.updateCh, wantUpdate12); err != nil { - t.Fatal(err) - } - if err := verifyRouteConfigUpdate(ctx, rw.rcw2.updateCh, wantUpdate3); err != nil { - t.Fatal(err) - } -} - -// TestNodeProtoSentOnlyInFirstRequest verifies that a non-empty node proto gets -// sent only on the first discovery request message on the ADS stream. -// -// It also verifies the same behavior holds after a stream restart. -func (s) TestNodeProtoSentOnlyInFirstRequest(t *testing.T) { - // Create a restartable listener which can close existing connections. - l, err := testutils.LocalTCPListener() - if err != nil { - t.Fatalf("testutils.LocalTCPListener() failed: %v", err) - } - lis := testutils.NewRestartableListener(l) - - // Start a fake xDS management server with the above restartable listener. - // - // We are unable to use the go-control-plane server here, because it caches - // the node proto received in the first request message and adds it to - // subsequent requests before invoking the OnStreamRequest() callback. - // Therefore we cannot verify what is sent by the xDS client. - mgmtServer, cleanup, err := fakeserver.StartServer(lis) - if err != nil { - t.Fatalf("Failed to start fake xDS server: %v", err) - } - defer cleanup() - - // Create a bootstrap file in a temporary directory. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - config, err := bootstrap.NewConfigFromContents(bc) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - const ( - serviceName = "my-service-client-side-xds" - routeConfigName = "route-" + serviceName - clusterName = "cluster-" + serviceName - ) - - // Register a watch for the Listener resource. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - watcher := xdstestutils.NewTestResourceWatcher() - client.WatchResource(listenerResourceType, serviceName, watcher) - - // Ensure the watch results in a discovery request with an empty node proto. - if err := readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } - - // Configure a listener resource on the fake xDS server. - lisAny, err := anypb.New(e2e.DefaultClientListener(serviceName, routeConfigName)) - if err != nil { - t.Fatalf("Failed to marshal listener resource into an Any proto: %v", err) - } - mgmtServer.XDSResponseChan <- &fakeserver.Response{ - Resp: &v3discoverypb.DiscoveryResponse{ - TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", - VersionInfo: "1", - Resources: []*anypb.Any{lisAny}, - }, - } - - // The xDS client is expected to ACK the Listener resource. The discovery - // request corresponding to the ACK must contain a nil node proto. - if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } - - // Register a watch for a RouteConfiguration resource. - client.WatchResource(routeConfigResourceType, routeConfigName, watcher) - - // Ensure the watch results in a discovery request with an empty node proto. - if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } - - // Configure the route configuration resource on the fake xDS server. - rcAny, err := anypb.New(e2e.DefaultRouteConfig(routeConfigName, serviceName, clusterName)) - if err != nil { - t.Fatalf("Failed to marshal route configuration resource into an Any proto: %v", err) - } - mgmtServer.XDSResponseChan <- &fakeserver.Response{ - Resp: &v3discoverypb.DiscoveryResponse{ - TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", - VersionInfo: "1", - Resources: []*anypb.Any{rcAny}, - }, - } - - // Ensure the discovery request for the ACK contains an empty node proto. - if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } - - // Stop the management server and expect the error callback to be invoked. - lis.Stop() - select { - case <-ctx.Done(): - t.Fatal("Timeout when waiting for the connection error to be propagated to the watcher") - case <-watcher.AmbientErrorCh: - } - - // Restart the management server. - lis.Restart() - - // The xDS client is expected to re-request previously requested resources. - // Hence, we expect two DiscoveryRequest messages (one for the Listener and - // one for the RouteConfiguration resource). The first message should contain - // a non-nil node proto and the second should contain a nil-proto. - // - // And since we don't push any responses on the response channel of the fake - // server, we do not expect any ACKs here. - if err := readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } - if err := readDiscoveryResponseAndCheckForEmptyNodeProto(ctx, mgmtServer.XDSRequestChan); err != nil { - t.Fatal(err) - } -} - -// readDiscoveryResponseAndCheckForEmptyNodeProto reads a discovery request -// message out of the provided reqCh. It returns an error if it fails to read a -// message before the context deadline expires, or if the read message contains -// a non-empty node proto. -func readDiscoveryResponseAndCheckForEmptyNodeProto(ctx context.Context, reqCh *testutils.Channel) error { - v, err := reqCh.Receive(ctx) - if err != nil { - return fmt.Errorf("Timeout when waiting for a DiscoveryRequest message") - } - req := v.(*fakeserver.Request).Req.(*v3discoverypb.DiscoveryRequest) - if node := req.GetNode(); node != nil { - return fmt.Errorf("Node proto received in DiscoveryRequest message is %v, want empty node proto", node) - } - return nil -} - -// readDiscoveryResponseAndCheckForNonEmptyNodeProto reads a discovery request -// message out of the provided reqCh. It returns an error if it fails to read a -// message before the context deadline expires, or if the read message contains -// an empty node proto. -func readDiscoveryResponseAndCheckForNonEmptyNodeProto(ctx context.Context, reqCh *testutils.Channel) error { - v, err := reqCh.Receive(ctx) - if err != nil { - return fmt.Errorf("Timeout when waiting for a DiscoveryRequest message") - } - req := v.(*fakeserver.Request).Req.(*v3discoverypb.DiscoveryRequest) - if node := req.GetNode(); node == nil { - return fmt.Errorf("Empty node proto received in DiscoveryRequest message, want non-empty node proto") - } - return nil -} - -type testRouteConfigResourceType struct{} - -func (testRouteConfigResourceType) TypeURL() string { return version.V3RouteConfigURL } -func (testRouteConfigResourceType) TypeName() string { return "RouteConfigResource" } -func (testRouteConfigResourceType) AllResourcesRequiredInSotW() bool { return false } -func (testRouteConfigResourceType) Decode(*xdsresource.DecodeOptions, *anypb.Any) (*xdsresource.DecodeResult, error) { - return nil, nil -} - -// Tests that the errors returned by the xDS client when watching a resource -// contain the node ID that was used to create the client. This test covers two -// scenarios: -// -// 1. When a watch is registered for an already registered resource type, but -// this time with a different implementation, -// 2. When a watch is registered for a resource name whose authority is not -// found in the bootstrap configuration. -func (s) TestWatchErrorsContainNodeID(t *testing.T) { - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - config, err := bootstrap.NewConfigFromContents(bc) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - t.Run("Multiple_ResourceType_Implementations", func(t *testing.T) { - const routeConfigName = "route-config-name" - watcher := xdstestutils.NewTestResourceWatcher() - client.WatchResource(routeConfigResourceType, routeConfigName, watcher) - - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - select { - case <-sCtx.Done(): - case <-watcher.UpdateCh: - t.Fatal("Unexpected resource update") - case <-watcher.AmbientErrorCh: - t.Fatal("Unexpected resource error") - case <-watcher.ResourceErrorCh: - t.Fatal("Unexpected resource does not exist") - } - - client.WatchResource(testRouteConfigResourceType{}, routeConfigName, watcher) - select { - case <-ctx.Done(): - t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.AmbientErrorCh: - if err == nil || !strings.Contains(err.Error(), nodeID) { - t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) - } - } - }) - - t.Run("Missing_Authority", func(t *testing.T) { - const routeConfigName = "xdstp://nonexistant-authority/envoy.config.route.v3.RouteConfiguration/route-config-name" - watcher := xdstestutils.NewTestResourceWatcher() - client.WatchResource(routeConfigResourceType, routeConfigName, watcher) - - select { - case <-ctx.Done(): - t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.AmbientErrorCh: - if err == nil || !strings.Contains(err.Error(), nodeID) { - t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) - } - } - }) -} - -// Tests that the errors returned by the xDS client when watching a resource -// contain the node ID when channel creation to the management server fails. -func (s) TestWatchErrorsContainNodeID_ChannelCreationFailure(t *testing.T) { - mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{}) - - // Create bootstrap configuration pointing to the above management server. - nodeID := uuid.New().String() - bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address) - - // Create an xDS client with the above bootstrap contents. - config, err := bootstrap.NewConfigFromContents(bc) - if err != nil { - t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bc), err) - } - pool := xdsclient.NewPool(config) - client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ - Name: t.Name(), - }) - if err != nil { - t.Fatalf("Failed to create xDS client: %v", err) - } - defer close() - - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) - defer cancel() - - // Override the xDS channel dialer with one that always fails. - origDialer := xdsclientinternal.GRPCNewClient - xdsclientinternal.GRPCNewClient = func(string, ...grpc.DialOption) (*grpc.ClientConn, error) { - return nil, fmt.Errorf("failed to create channel") - } - defer func() { xdsclientinternal.GRPCNewClient = origDialer }() - - const routeConfigName = "route-config-name" - watcher := xdstestutils.NewTestResourceWatcher() - client.WatchResource(routeConfigResourceType, routeConfigName, watcher) - - select { - case <-ctx.Done(): - t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.AmbientErrorCh: - if err == nil || !strings.Contains(err.Error(), nodeID) { - t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) - } - } -} diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index ad5ead26f175..a650bd7c49aa 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -122,8 +122,8 @@ func verifyRouteConfigUpdate(ctx context.Context, updateCh *testutils.Channel, w } got := u.(routeConfigUpdateErrTuple) if wantUpdate.err != nil { - if gotType, wantType := xdsresource.ErrType(got.err), xdsresource.ErrType(wantUpdate.err); gotType != wantType { - return fmt.Errorf("received update with error type %v, want %v", gotType, wantType) + if got.err == nil || !strings.Contains(got.err.Error(), wantUpdate.err.Error()) { + return fmt.Errorf("update received with error: %v, want %q", got.err, wantUpdate.err) } } cmpOpts := []cmp.Option{cmpopts.EquateEmpty(), cmpopts.IgnoreFields(xdsresource.RouteConfigUpdate{}, "Raw")} diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index a9fce90c8756..251094084d8b 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -33,7 +33,7 @@ import ( "google.golang.org/grpc/internal/testutils/xds/e2e" "google.golang.org/grpc/internal/testutils/xds/fakeserver" "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" "google.golang.org/protobuf/proto" @@ -161,7 +161,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -177,7 +177,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -194,7 +194,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3routepb.RouteConfiguration{})}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -422,7 +422,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -438,7 +438,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -455,7 +455,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3clusterpb.Cluster{})}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -675,7 +675,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -691,7 +691,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -708,7 +708,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3endpointpb.ClusterLoadAssignment{})}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -986,7 +986,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1002,7 +1002,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1019,7 +1019,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{})}, }, - wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), + wantErr: fmt.Sprintf("xds: resource %q of type %q has been removed", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1090,13 +1090,13 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{"addr1:314"}, Weight: 1}}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, { Endpoints: []xdsresource.Endpoint{{Addresses: []string{"addr2:159"}, Weight: 1}}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, @@ -1124,13 +1124,13 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Localities: []xdsresource.Locality{ { Endpoints: []xdsresource.Endpoint{{Addresses: []string{"addr1:314"}, Weight: 1}}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, { Endpoints: []xdsresource.Endpoint{{Addresses: []string{"addr2:159"}, Weight: 1}}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, diff --git a/xds/internal/xdsclient/transport/ads/ads_stream.go b/xds/internal/xdsclient/transport/ads/ads_stream.go deleted file mode 100644 index fc41b38edade..000000000000 --- a/xds/internal/xdsclient/transport/ads/ads_stream.go +++ /dev/null @@ -1,825 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package ads provides the implementation of an ADS (Aggregated Discovery -// Service) stream for the xDS client. -package ads - -import ( - "context" - "fmt" - "sync" - "sync/atomic" - "time" - - "google.golang.org/grpc/codes" - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal/backoff" - "google.golang.org/grpc/internal/buffer" - igrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/pretty" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" - "google.golang.org/protobuf/types/known/anypb" - - v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" - v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - statuspb "google.golang.org/genproto/googleapis/rpc/status" -) - -// Any per-RPC level logs which print complete request or response messages -// should be gated at this verbosity level. Other per-RPC level logs which print -// terse output should be at `INFO` and verbosity 2. -const perRPCVerbosityLevel = 9 - -// Response represents a response received on the ADS stream. It contains the -// type URL, version, and resources for the response. -type Response struct { - TypeURL string - Version string - Resources []*anypb.Any -} - -// DataAndErrTuple is a struct that holds a resource and an error. It is used to -// return a resource and any associated error from a function. -type DataAndErrTuple struct { - Resource xdsresource.ResourceData - Err error -} - -// StreamEventHandler is an interface that defines the callbacks for events that -// occur on the ADS stream. Methods on this interface may be invoked -// concurrently and implementations need to handle them in a thread-safe manner. -type StreamEventHandler interface { - OnADSStreamError(error) // Called when the ADS stream breaks. - OnADSWatchExpiry(xdsresource.Type, string) // Called when the watch timer expires for a resource. - OnADSResponse(Response, func()) ([]string, error) // Called when a response is received on the ADS stream. -} - -// WatchState is a enum that describes the watch state of a particular -// resource. -type WatchState int - -const ( - // ResourceWatchStateStarted is the state where a watch for a resource was - // started, but a request asking for that resource is yet to be sent to the - // management server. - ResourceWatchStateStarted WatchState = iota - // ResourceWatchStateRequested is the state when a request has been sent for - // the resource being watched. - ResourceWatchStateRequested - // ResourceWatchStateReceived is the state when a response has been received - // for the resource being watched. - ResourceWatchStateReceived - // ResourceWatchStateTimeout is the state when the watch timer associated - // with the resource expired because no response was received. - ResourceWatchStateTimeout -) - -// ResourceWatchState is the state corresponding to a resource being watched. -type ResourceWatchState struct { - State WatchState // Watch state of the resource. - ExpiryTimer *time.Timer // Timer for the expiry of the watch. -} - -// State corresponding to a resource type. -type resourceTypeState struct { - version string // Last acked version. Should not be reset when the stream breaks. - nonce string // Last received nonce. Should be reset when the stream breaks. - bufferedRequests chan struct{} // Channel to buffer requests when writing is blocked. - subscribedResources map[string]*ResourceWatchState // Map of subscribed resource names to their state. - pendingWrite bool // True if there is a pending write for this resource type. -} - -// StreamImpl provides the functionality associated with an ADS (Aggregated -// Discovery Service) stream on the client side. It manages the lifecycle of the -// ADS stream, including creating the stream, sending requests, and handling -// responses. It also handles flow control and retries for the stream. -type StreamImpl struct { - // The following fields are initialized from arguments passed to the - // constructor and are read-only afterwards, and hence can be accessed - // without a mutex. - transport transport.Transport // Transport to use for ADS stream. - eventHandler StreamEventHandler // Callbacks into the xdsChannel. - backoff func(int) time.Duration // Backoff for retries, after stream failures. - nodeProto *v3corepb.Node // Identifies the gRPC application. - watchExpiryTimeout time.Duration // Resource watch expiry timeout - logger *igrpclog.PrefixLogger - - // The following fields are initialized in the constructor and are not - // written to afterwards, and hence can be accessed without a mutex. - streamCh chan transport.StreamingCall // New ADS streams are pushed here. - requestCh *buffer.Unbounded // Subscriptions and unsubscriptions are pushed here. - runnerDoneCh chan struct{} // Notify completion of runner goroutine. - cancel context.CancelFunc // To cancel the context passed to the runner goroutine. - - // Guards access to the below fields (and to the contents of the map). - mu sync.Mutex - resourceTypeState map[xdsresource.Type]*resourceTypeState // Map of resource types to their state. - fc *adsFlowControl // Flow control for ADS stream. - firstRequest bool // False after the first request is sent out. -} - -// StreamOpts contains the options for creating a new ADS Stream. -type StreamOpts struct { - Transport transport.Transport // xDS transport to create the stream on. - EventHandler StreamEventHandler // Callbacks for stream events. - Backoff func(int) time.Duration // Backoff for retries, after stream failures. - NodeProto *v3corepb.Node // Node proto to identify the gRPC application. - WatchExpiryTimeout time.Duration // Resource watch expiry timeout. - LogPrefix string // Prefix to be used for log messages. -} - -// NewStreamImpl initializes a new StreamImpl instance using the given -// parameters. It also launches goroutines responsible for managing reads and -// writes for messages of the underlying stream. -func NewStreamImpl(opts StreamOpts) *StreamImpl { - s := &StreamImpl{ - transport: opts.Transport, - eventHandler: opts.EventHandler, - backoff: opts.Backoff, - nodeProto: opts.NodeProto, - watchExpiryTimeout: opts.WatchExpiryTimeout, - - streamCh: make(chan transport.StreamingCall, 1), - requestCh: buffer.NewUnbounded(), - runnerDoneCh: make(chan struct{}), - resourceTypeState: make(map[xdsresource.Type]*resourceTypeState), - } - - l := grpclog.Component("xds") - s.logger = igrpclog.NewPrefixLogger(l, opts.LogPrefix+fmt.Sprintf("[ads-stream %p] ", s)) - - ctx, cancel := context.WithCancel(context.Background()) - s.cancel = cancel - go s.runner(ctx) - return s -} - -// Stop blocks until the stream is closed and all spawned goroutines exit. -func (s *StreamImpl) Stop() { - s.cancel() - s.requestCh.Close() - <-s.runnerDoneCh - s.logger.Infof("Stopping ADS stream") -} - -// Subscribe subscribes to the given resource. It is assumed that multiple -// subscriptions for the same resource is deduped at the caller. A discovery -// request is sent out on the underlying stream for the resource type when there -// is sufficient flow control quota. -func (s *StreamImpl) Subscribe(typ xdsresource.Type, name string) { - if s.logger.V(2) { - s.logger.Infof("Subscribing to resource %q of type %q", name, typ.TypeName()) - } - - s.mu.Lock() - defer s.mu.Unlock() - - state, ok := s.resourceTypeState[typ] - if !ok { - // An entry in the type state map is created as part of the first - // subscription request for this type. - state = &resourceTypeState{ - subscribedResources: make(map[string]*ResourceWatchState), - bufferedRequests: make(chan struct{}, 1), - } - s.resourceTypeState[typ] = state - } - - // Create state for the newly subscribed resource. The watch timer will - // be started when a request for this resource is actually sent out. - state.subscribedResources[name] = &ResourceWatchState{State: ResourceWatchStateStarted} - state.pendingWrite = true - - // Send a request for the resource type with updated subscriptions. - s.requestCh.Put(typ) -} - -// Unsubscribe cancels the subscription to the given resource. It is a no-op if -// the given resource does not exist. The watch expiry timer associated with the -// resource is stopped if one is active. A discovery request is sent out on the -// stream for the resource type when there is sufficient flow control quota. -func (s *StreamImpl) Unsubscribe(typ xdsresource.Type, name string) { - if s.logger.V(2) { - s.logger.Infof("Unsubscribing to resource %q of type %q", name, typ.TypeName()) - } - - s.mu.Lock() - defer s.mu.Unlock() - - state, ok := s.resourceTypeState[typ] - if !ok { - return - } - - rs, ok := state.subscribedResources[name] - if !ok { - return - } - if rs.ExpiryTimer != nil { - rs.ExpiryTimer.Stop() - } - delete(state.subscribedResources, name) - state.pendingWrite = true - - // Send a request for the resource type with updated subscriptions. - s.requestCh.Put(typ) -} - -// runner is a long-running goroutine that handles the lifecycle of the ADS -// stream. It spwans another goroutine to handle writes of discovery request -// messages on the stream. Whenever an existing stream fails, it performs -// exponential backoff (if no messages were received on that stream) before -// creating a new stream. -func (s *StreamImpl) runner(ctx context.Context) { - defer close(s.runnerDoneCh) - - go s.send(ctx) - - runStreamWithBackoff := func() error { - stream, err := s.transport.CreateStreamingCall(ctx, "/envoy.service.discovery.v3.AggregatedDiscoveryService/StreamAggregatedResources") - if err != nil { - s.logger.Warningf("Failed to create a new ADS streaming RPC: %v", err) - s.onError(err, false) - return nil - } - if s.logger.V(2) { - s.logger.Infof("ADS stream created") - } - - s.mu.Lock() - // Flow control is a property of the underlying streaming RPC call and - // needs to be initialized everytime a new one is created. - s.fc = newADSFlowControl(s.logger) - s.firstRequest = true - s.mu.Unlock() - - // Ensure that the most recently created stream is pushed on the - // channel for the `send` goroutine to consume. - select { - case <-s.streamCh: - default: - } - s.streamCh <- stream - - // Backoff state is reset upon successful receipt of at least one - // message from the server. - if s.recv(ctx, stream) { - return backoff.ErrResetBackoff - } - return nil - } - backoff.RunF(ctx, runStreamWithBackoff, s.backoff) -} - -// send is a long running goroutine that handles sending discovery requests for -// two scenarios: -// - a new subscription or unsubscription request is received -// - a new stream is created after the previous one failed -func (s *StreamImpl) send(ctx context.Context) { - // Stores the most recent stream instance received on streamCh. - var stream transport.StreamingCall - for { - select { - case <-ctx.Done(): - return - case stream = <-s.streamCh: - if err := s.sendExisting(stream); err != nil { - // Send failed, clear the current stream. Attempt to resend will - // only be made after a new stream is created. - stream = nil - continue - } - case req, ok := <-s.requestCh.Get(): - if !ok { - return - } - s.requestCh.Load() - - typ := req.(xdsresource.Type) - if err := s.sendNew(stream, typ); err != nil { - stream = nil - continue - } - } - } -} - -// sendNew attempts to send a discovery request based on a new subscription or -// unsubscription. If there is no flow control quota, the request is buffered -// and will be sent later. This method also starts the watch expiry timer for -// resources that were sent in the request for the first time, i.e. their watch -// state is `watchStateStarted`. -func (s *StreamImpl) sendNew(stream transport.StreamingCall, typ xdsresource.Type) error { - s.mu.Lock() - defer s.mu.Unlock() - - // If there's no stream yet, skip the request. This request will be resent - // when a new stream is created. If no stream is created, the watcher will - // timeout (same as server not sending response back). - if stream == nil { - return nil - } - - // If local processing of the most recently received response is not yet - // complete, i.e. fc.pending == true, queue this write and return early. - // This allows us to batch writes for requests which are generated as part - // of local processing of a received response. - state := s.resourceTypeState[typ] - if s.fc.pending.Load() { - select { - case state.bufferedRequests <- struct{}{}: - default: - } - return nil - } - - return s.sendMessageIfWritePendingLocked(stream, typ, state) -} - -// sendExisting sends out discovery requests for existing resources when -// recovering from a broken stream. -// -// The stream argument is guaranteed to be non-nil. -func (s *StreamImpl) sendExisting(stream transport.StreamingCall) error { - s.mu.Lock() - defer s.mu.Unlock() - - for typ, state := range s.resourceTypeState { - // Reset only the nonces map when the stream restarts. - // - // xDS spec says the following. See section: - // https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version - // - // Note that the version for a resource type is not a property of an - // individual xDS stream but rather a property of the resources - // themselves. If the stream becomes broken and the client creates a new - // stream, the client’s initial request on the new stream should - // indicate the most recent version seen by the client on the previous - // stream - state.nonce = "" - - if len(state.subscribedResources) == 0 { - continue - } - - state.pendingWrite = true - if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { - return err - } - } - return nil -} - -// sendBuffered sends out discovery requests for resources that were buffered -// when they were subscribed to, because local processing of the previously -// received response was not yet complete. -// -// The stream argument is guaranteed to be non-nil. -func (s *StreamImpl) sendBuffered(stream transport.StreamingCall) error { - s.mu.Lock() - defer s.mu.Unlock() - - for typ, state := range s.resourceTypeState { - select { - case <-state.bufferedRequests: - if err := s.sendMessageIfWritePendingLocked(stream, typ, state); err != nil { - return err - } - default: - // No buffered request. - continue - } - } - return nil -} - -// sendMessageIfWritePendingLocked attempts to sends a discovery request to the -// server, if there is a pending write for the given resource type. -// -// If the request is successfully sent, the pending write field is cleared and -// watch timers are started for the resources in the request. -// -// Caller needs to hold c.mu. -func (s *StreamImpl) sendMessageIfWritePendingLocked(stream transport.StreamingCall, typ xdsresource.Type, state *resourceTypeState) error { - if !state.pendingWrite { - if s.logger.V(2) { - s.logger.Infof("Skipping sending request for type %q, because all subscribed resources were already sent", typ.TypeURL()) - } - return nil - } - - names := resourceNames(state.subscribedResources) - if err := s.sendMessageLocked(stream, names, typ.TypeURL(), state.version, state.nonce, nil); err != nil { - return err - } - state.pendingWrite = false - - // Drain the buffered requests channel because we just sent a request for this - // resource type. - select { - case <-state.bufferedRequests: - default: - } - - s.startWatchTimersLocked(typ, names) - return nil -} - -// sendMessageLocked sends a discovery request to the server, populating the -// different fields of the message with the given parameters. Returns a non-nil -// error if the request could not be sent. -// -// Caller needs to hold c.mu. -func (s *StreamImpl) sendMessageLocked(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) error { - req := &v3discoverypb.DiscoveryRequest{ - ResourceNames: names, - TypeUrl: url, - VersionInfo: version, - ResponseNonce: nonce, - } - - // The xDS protocol only requires that we send the node proto in the first - // discovery request on every stream. Sending the node proto in every - // request wastes CPU resources on the client and the server. - if s.firstRequest { - req.Node = s.nodeProto - } - - if nackErr != nil { - req.ErrorDetail = &statuspb.Status{ - Code: int32(codes.InvalidArgument), Message: nackErr.Error(), - } - } - - if err := stream.Send(req); err != nil { - s.logger.Warningf("Sending ADS request for type %q, resources: %v, version: %q, nonce: %q failed: %v", url, names, version, nonce, err) - return err - } - s.firstRequest = false - - if s.logger.V(perRPCVerbosityLevel) { - s.logger.Infof("ADS request sent: %v", pretty.ToJSON(req)) - } else if s.logger.V(2) { - s.logger.Warningf("ADS request sent for type %q, resources: %v, version: %q, nonce: %q", url, names, version, nonce) - } - return nil -} - -// recv is responsible for receiving messages from the ADS stream. -// -// It performs the following actions: -// - Waits for local flow control to be available before sending buffered -// requests, if any. -// - Receives a message from the ADS stream. If an error is encountered here, -// it is handled by the onError method which propagates the error to all -// watchers. -// - Invokes the event handler's OnADSResponse method to process the message. -// - Sends an ACK or NACK to the server based on the response. -// -// It returns a boolean indicating whether at least one message was received -// from the server. -func (s *StreamImpl) recv(ctx context.Context, stream transport.StreamingCall) bool { - msgReceived := false - for { - // Wait for ADS stream level flow control to be available, and send out - // a request if anything was buffered while we were waiting for local - // processing of the previous response to complete. - if !s.fc.wait(ctx) { - if s.logger.V(2) { - s.logger.Infof("ADS stream context canceled") - } - return msgReceived - } - s.sendBuffered(stream) - - resources, url, version, nonce, err := s.recvMessage(stream) - if err != nil { - s.onError(err, msgReceived) - s.logger.Warningf("ADS stream closed: %v", err) - return msgReceived - } - msgReceived = true - - // Invoke the onResponse event handler to parse the incoming message and - // decide whether to send an ACK or NACK. - resp := Response{ - Resources: resources, - TypeURL: url, - Version: version, - } - var resourceNames []string - var nackErr error - s.fc.setPending() - resourceNames, nackErr = s.eventHandler.OnADSResponse(resp, s.fc.onDone) - if xdsresource.ErrType(nackErr) == xdsresource.ErrorTypeResourceTypeUnsupported { - // Based on gRFC A27, a general guiding principle is that if the - // server sends something the client didn't actually subscribe to, - // then the client ignores it. Here, we have received a response - // with resources of a type that we don't know about. - // - // Sending a NACK doesn't really seem appropriate here, since we're - // not actually validating what the server sent and therefore don't - // know that it's invalid. But we shouldn't ACK either, because we - // don't know that it is valid. - s.logger.Warningf("%v", nackErr) - continue - } - - s.onRecv(stream, resourceNames, url, version, nonce, nackErr) - } -} - -func (s *StreamImpl) recvMessage(stream transport.StreamingCall) (resources []*anypb.Any, url, version, nonce string, err error) { - r, err := stream.Recv() - if err != nil { - return nil, "", "", "", err - } - resp, ok := r.(*v3discoverypb.DiscoveryResponse) - if !ok { - s.logger.Infof("Message received on ADS stream of unexpected type: %T", r) - return nil, "", "", "", fmt.Errorf("unexpected message type %T", r) - } - - if s.logger.V(perRPCVerbosityLevel) { - s.logger.Infof("ADS response received: %v", pretty.ToJSON(resp)) - } else if s.logger.V(2) { - s.logger.Infof("ADS response received for type %q, version %q, nonce %q", resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce()) - } - return resp.GetResources(), resp.GetTypeUrl(), resp.GetVersionInfo(), resp.GetNonce(), nil -} - -// onRecv is invoked when a response is received from the server. The arguments -// passed to this method correspond to the most recently received response. -// -// It performs the following actions: -// - updates resource type specific state -// - updates resource specific state for resources in the response -// - sends an ACK or NACK to the server based on the response -func (s *StreamImpl) onRecv(stream transport.StreamingCall, names []string, url, version, nonce string, nackErr error) { - s.mu.Lock() - defer s.mu.Unlock() - - // Lookup the resource type specific state based on the type URL. - var typ xdsresource.Type - for t := range s.resourceTypeState { - if t.TypeURL() == url { - typ = t - break - } - } - typeState, ok := s.resourceTypeState[typ] - if !ok { - s.logger.Warningf("ADS stream received a response for type %q, but no state exists for it", url) - return - } - - // Update the resource type specific state. This includes: - // - updating the nonce unconditionally - // - updating the version only if the response is to be ACKed - previousVersion := typeState.version - typeState.nonce = nonce - if nackErr == nil { - typeState.version = version - } - - // Update the resource specific state. For all resources received as - // part of this response that are in state `started` or `requested`, - // this includes: - // - setting the watch state to watchstateReceived - // - stopping the expiry timer, if one exists - for _, name := range names { - rs, ok := typeState.subscribedResources[name] - if !ok { - s.logger.Warningf("ADS stream received a response for resource %q, but no state exists for it", name) - continue - } - if ws := rs.State; ws == ResourceWatchStateStarted || ws == ResourceWatchStateRequested { - rs.State = ResourceWatchStateReceived - if rs.ExpiryTimer != nil { - rs.ExpiryTimer.Stop() - rs.ExpiryTimer = nil - } - } - } - - // Send an ACK or NACK. - subscribedResourceNames := resourceNames(typeState.subscribedResources) - if nackErr != nil { - s.logger.Warningf("Sending NACK for resource type: %q, version: %q, nonce: %q, reason: %v", url, version, nonce, nackErr) - s.sendMessageLocked(stream, subscribedResourceNames, url, previousVersion, nonce, nackErr) - return - } - - if s.logger.V(2) { - s.logger.Infof("Sending ACK for resource type: %q, version: %q, nonce: %q", url, version, nonce) - } - s.sendMessageLocked(stream, subscribedResourceNames, url, version, nonce, nil) -} - -// onError is called when an error occurs on the ADS stream. It stops any -// outstanding resource timers and resets the watch state to started for any -// resources that were in the requested state. It also handles the case where -// the ADS stream was closed after receiving a response, which is not -// considered an error. -func (s *StreamImpl) onError(err error, msgReceived bool) { - // For resources that been requested but not yet responded to by the - // management server, stop the resource timers and reset the watch state to - // watchStateStarted. This is because we don't want the expiry timer to be - // running when we don't have a stream open to the management server. - s.mu.Lock() - for _, state := range s.resourceTypeState { - for _, rs := range state.subscribedResources { - if rs.State != ResourceWatchStateRequested { - continue - } - if rs.ExpiryTimer != nil { - rs.ExpiryTimer.Stop() - rs.ExpiryTimer = nil - } - rs.State = ResourceWatchStateStarted - } - } - s.mu.Unlock() - - // Note that we do not consider it an error if the ADS stream was closed - // after having received a response on the stream. This is because there - // are legitimate reasons why the server may need to close the stream during - // normal operations, such as needing to rebalance load or the underlying - // connection hitting its max connection age limit. - // (see [gRFC A9](https://github.com/grpc/proposal/blob/master/A9-server-side-conn-mgt.md)). - if msgReceived { - err = xdsresource.NewError(xdsresource.ErrTypeStreamFailedAfterRecv, err.Error()) - } - - s.eventHandler.OnADSStreamError(err) -} - -// startWatchTimersLocked starts the expiry timers for the given resource names -// of the specified resource type. For each resource name, if the resource -// watch state is in the "started" state, it transitions the state to -// "requested" and starts an expiry timer. When the timer expires, the resource -// watch state is set to "timeout" and the event handler callback is called. -// -// The caller must hold the s.mu lock. -func (s *StreamImpl) startWatchTimersLocked(typ xdsresource.Type, names []string) { - typeState := s.resourceTypeState[typ] - for _, name := range names { - resourceState, ok := typeState.subscribedResources[name] - if !ok { - continue - } - if resourceState.State != ResourceWatchStateStarted { - continue - } - resourceState.State = ResourceWatchStateRequested - - rs := resourceState - resourceState.ExpiryTimer = time.AfterFunc(s.watchExpiryTimeout, func() { - s.mu.Lock() - rs.State = ResourceWatchStateTimeout - rs.ExpiryTimer = nil - s.mu.Unlock() - s.eventHandler.OnADSWatchExpiry(typ, name) - }) - } -} - -func resourceNames(m map[string]*ResourceWatchState) []string { - ret := make([]string, len(m)) - idx := 0 - for name := range m { - ret[idx] = name - idx++ - } - return ret -} - -// TriggerResourceNotFoundForTesting triggers a resource not found event for the -// given resource type and name. This is intended for testing purposes only, to -// simulate a resource not found scenario. -func (s *StreamImpl) TriggerResourceNotFoundForTesting(typ xdsresource.Type, resourceName string) { - s.mu.Lock() - - state, ok := s.resourceTypeState[typ] - if !ok { - s.mu.Unlock() - return - } - resourceState, ok := state.subscribedResources[resourceName] - if !ok { - s.mu.Unlock() - return - } - - if s.logger.V(2) { - s.logger.Infof("Triggering resource not found for type: %s, resource name: %s", typ.TypeName(), resourceName) - } - resourceState.State = ResourceWatchStateTimeout - if resourceState.ExpiryTimer != nil { - resourceState.ExpiryTimer.Stop() - resourceState.ExpiryTimer = nil - } - s.mu.Unlock() - go s.eventHandler.OnADSWatchExpiry(typ, resourceName) -} - -// ResourceWatchStateForTesting returns the ResourceWatchState for the given -// resource type and name. This is intended for testing purposes only, to -// inspect the internal state of the ADS stream. -func (s *StreamImpl) ResourceWatchStateForTesting(typ xdsresource.Type, resourceName string) (ResourceWatchState, error) { - s.mu.Lock() - defer s.mu.Unlock() - - state, ok := s.resourceTypeState[typ] - if !ok { - return ResourceWatchState{}, fmt.Errorf("unknown resource type: %v", typ) - } - resourceState, ok := state.subscribedResources[resourceName] - if !ok { - return ResourceWatchState{}, fmt.Errorf("unknown resource name: %v", resourceName) - } - return *resourceState, nil -} - -// adsFlowControl implements ADS stream level flow control that enables the -// transport to block the reading of the next message off of the stream until -// the previous update is consumed by all watchers. -// -// The lifetime of the flow control is tied to the lifetime of the stream. -type adsFlowControl struct { - logger *igrpclog.PrefixLogger - - // Whether the most recent update is pending consumption by all watchers. - pending atomic.Bool - // Channel used to notify when all the watchers have consumed the most - // recent update. Wait() blocks on reading a value from this channel. - readyCh chan struct{} -} - -// newADSFlowControl returns a new adsFlowControl. -func newADSFlowControl(logger *igrpclog.PrefixLogger) *adsFlowControl { - return &adsFlowControl{ - logger: logger, - readyCh: make(chan struct{}, 1), - } -} - -// setPending changes the internal state to indicate that there is an update -// pending consumption by all watchers. -func (fc *adsFlowControl) setPending() { - fc.pending.Store(true) -} - -// wait blocks until all the watchers have consumed the most recent update and -// returns true. If the context expires before that, it returns false. -func (fc *adsFlowControl) wait(ctx context.Context) bool { - // If there is no pending update, there is no need to block. - if !fc.pending.Load() { - // If all watchers finished processing the most recent update before the - // `recv` goroutine made the next call to `Wait()`, there would be an - // entry in the readyCh channel that needs to be drained to ensure that - // the next call to `Wait()` doesn't unblock before it actually should. - select { - case <-fc.readyCh: - default: - } - return true - } - - select { - case <-ctx.Done(): - return false - case <-fc.readyCh: - return true - } -} - -// onDone indicates that all watchers have consumed the most recent update. -func (fc *adsFlowControl) onDone() { - select { - // Writes to the readyCh channel should not block ideally. The default - // branch here is to appease the paranoid mind. - case fc.readyCh <- struct{}{}: - default: - if fc.logger.V(2) { - fc.logger.Infof("ADS stream flow control readyCh is full") - } - } - fc.pending.Store(false) -} diff --git a/xds/internal/xdsclient/transport/grpctransport/grpctransport.go b/xds/internal/xdsclient/transport/grpctransport/grpctransport.go deleted file mode 100644 index fb740ade1395..000000000000 --- a/xds/internal/xdsclient/transport/grpctransport/grpctransport.go +++ /dev/null @@ -1,138 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package grpctransport provides an implementation of the transport interface -// using gRPC. -package grpctransport - -import ( - "context" - "fmt" - "time" - - "google.golang.org/grpc" - "google.golang.org/grpc/keepalive" - "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - - v3adsgrpc "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - v3adspb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - v3lrsgrpc "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3" - v3lrspb "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3" -) - -func init() { - internal.GRPCNewClient = grpc.NewClient - internal.NewADSStream = func(ctx context.Context, cc *grpc.ClientConn) (v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient, error) { - return v3adsgrpc.NewAggregatedDiscoveryServiceClient(cc).StreamAggregatedResources(ctx) - } -} - -// Builder provides a way to build a gRPC-based transport to an xDS server. -type Builder struct{} - -// Build creates a new gRPC-based transport to an xDS server using the provided -// options. This involves creating a grpc.ClientConn to the server identified by -// the server URI in the provided options. -func (b *Builder) Build(opts transport.BuildOptions) (transport.Transport, error) { - if opts.ServerConfig == nil { - return nil, fmt.Errorf("ServerConfig field in opts cannot be nil") - } - - // NOTE: The bootstrap package ensures that the server_uri and credentials - // inside the server config are always populated. If we end up using a - // different type in BuildOptions to specify the server configuration, we - // must ensure that those fields are not empty before proceeding. - - // Dial the xDS management server with dial options specified by the server - // configuration and a static keepalive configuration that is common across - // gRPC language implementations. - kpCfg := grpc.WithKeepaliveParams(keepalive.ClientParameters{ - Time: 5 * time.Minute, - Timeout: 20 * time.Second, - }) - dopts := append(opts.ServerConfig.DialOptions(), kpCfg) - dialer := internal.GRPCNewClient.(func(string, ...grpc.DialOption) (*grpc.ClientConn, error)) - cc, err := dialer(opts.ServerConfig.ServerURI(), dopts...) - if err != nil { - // An error from a non-blocking dial indicates something serious. - return nil, fmt.Errorf("failed to create a grpc transport to the management server %q: %v", opts.ServerConfig.ServerURI(), err) - } - cc.Connect() - - return &grpcTransport{cc: cc}, nil -} - -type grpcTransport struct { - cc *grpc.ClientConn -} - -func (g *grpcTransport) CreateStreamingCall(ctx context.Context, method string) (transport.StreamingCall, error) { - switch method { - case v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResources_FullMethodName: - return g.newADSStreamingCall(ctx) - case v3lrsgrpc.LoadReportingService_StreamLoadStats_FullMethodName: - return g.newLRSStreamingCall(ctx) - default: - return nil, fmt.Errorf("unsupported method: %v", method) - } -} - -func (g *grpcTransport) newADSStreamingCall(ctx context.Context) (transport.StreamingCall, error) { - newStream := internal.NewADSStream.(func(context.Context, *grpc.ClientConn) (v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient, error)) - stream, err := newStream(ctx, g.cc) - if err != nil { - return nil, fmt.Errorf("failed to create an ADS stream: %v", err) - } - return &adsStream{stream: stream}, nil -} - -func (g *grpcTransport) newLRSStreamingCall(ctx context.Context) (transport.StreamingCall, error) { - stream, err := v3lrsgrpc.NewLoadReportingServiceClient(g.cc).StreamLoadStats(ctx) - if err != nil { - return nil, fmt.Errorf("failed to create an LRS stream: %v", err) - } - return &lrsStream{stream: stream}, nil -} - -func (g *grpcTransport) Close() error { - return g.cc.Close() -} - -type adsStream struct { - stream v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient -} - -func (a *adsStream) Send(msg any) error { - return a.stream.Send(msg.(*v3adspb.DiscoveryRequest)) -} - -func (a *adsStream) Recv() (any, error) { - return a.stream.Recv() -} - -type lrsStream struct { - stream v3lrsgrpc.LoadReportingService_StreamLoadStatsClient -} - -func (l *lrsStream) Send(msg any) error { - return l.stream.Send(msg.(*v3lrspb.LoadStatsRequest)) -} - -func (l *lrsStream) Recv() (any, error) { - return l.stream.Recv() -} diff --git a/xds/internal/xdsclient/transport/grpctransport/grpctransport_ext_test.go b/xds/internal/xdsclient/transport/grpctransport/grpctransport_ext_test.go deleted file mode 100644 index 2e375f0b5ac1..000000000000 --- a/xds/internal/xdsclient/transport/grpctransport/grpctransport_ext_test.go +++ /dev/null @@ -1,91 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package grpctransport_test - -import ( - "testing" - - "google.golang.org/grpc" - "google.golang.org/grpc/internal/grpctest" - internalbootstrap "google.golang.org/grpc/internal/xds/bootstrap" - "google.golang.org/grpc/xds/internal/xdsclient/internal" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/grpc/xds/internal/xdsclient/transport/grpctransport" -) - -type s struct { - grpctest.Tester -} - -func Test(t *testing.T) { - grpctest.RunSubTests(t, s{}) -} - -// Tests that the grpctransport.Builder creates a new grpc.ClientConn every time -// Build() is called. -func (s) TestBuild_CustomDialer(t *testing.T) { - // Override the dialer with a custom one. - customDialerCalled := false - origDialer := internal.GRPCNewClient - internal.GRPCNewClient = func(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { - customDialerCalled = true - return grpc.NewClient(target, opts...) - } - defer func() { internal.GRPCNewClient = origDialer }() - - serverCfg, err := internalbootstrap.ServerConfigForTesting(internalbootstrap.ServerConfigTestingOptions{URI: "server-address"}) - if err != nil { - t.Fatalf("Failed to create server config for testing: %v", err) - } - - // Create a new transport and ensure that the custom dialer was called. - opts := transport.BuildOptions{ServerConfig: serverCfg} - builder := &grpctransport.Builder{} - tr, err := builder.Build(opts) - if err != nil { - t.Fatalf("Builder.Build(%+v) failed: %v", opts, err) - } - defer tr.Close() - - if !customDialerCalled { - t.Fatalf("Builder.Build(%+v): custom dialer called = false, want true", opts) - } - customDialerCalled = false - - // Create another transport and ensure that the custom dialer was called. - tr, err = builder.Build(opts) - if err != nil { - t.Fatalf("Builder.Build(%+v) failed: %v", opts, err) - } - defer tr.Close() - - if !customDialerCalled { - t.Fatalf("Builder.Build(%+v): custom dialer called = false, want true", opts) - } -} - -// Tests that the grpctransport.Builder fails to build a transport when the -// provided BuildOptions do not contain a ServerConfig. -func (s) TestBuild_EmptyServerConfig(t *testing.T) { - builder := &grpctransport.Builder{} - opts := transport.BuildOptions{} - if tr, err := builder.Build(opts); err == nil { - tr.Close() - t.Fatalf("Builder.Build(%+v) succeeded when expected to fail", opts) - } -} diff --git a/xds/internal/xdsclient/transport/lrs/lrs_stream.go b/xds/internal/xdsclient/transport/lrs/lrs_stream.go deleted file mode 100644 index 7260816b671d..000000000000 --- a/xds/internal/xdsclient/transport/lrs/lrs_stream.go +++ /dev/null @@ -1,339 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package lrs provides the implementation of an LRS (Load Reporting Service) -// stream for the xDS client. -package lrs - -import ( - "context" - "fmt" - "io" - "sync" - "time" - - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/internal/backoff" - igrpclog "google.golang.org/grpc/internal/grpclog" - "google.golang.org/grpc/internal/pretty" - "google.golang.org/grpc/xds/internal" - "google.golang.org/grpc/xds/internal/xdsclient/load" - "google.golang.org/grpc/xds/internal/xdsclient/transport" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/durationpb" - - v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" - v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" - v3lrspb "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3" -) - -// Any per-RPC level logs which print complete request or response messages -// should be gated at this verbosity level. Other per-RPC level logs which print -// terse output should be at `INFO` and verbosity 2. -const perRPCVerbosityLevel = 9 - -// StreamImpl provides all the functionality associated with an LRS (Load Reporting -// Service) stream on the client-side. It manages the lifecycle of the LRS stream, -// including starting, stopping, and retrying the stream. It also provides a -// load.Store that can be used to report load, and a cleanup function that should -// be called when the load reporting is no longer needed. -type StreamImpl struct { - // The following fields are initialized when a Stream instance is created - // and are read-only afterwards, and hence can be accessed without a mutex. - transport transport.Transport // Transport to use for LRS stream. - backoff func(int) time.Duration // Backoff for retries, after stream failures. - nodeProto *v3corepb.Node // Identifies the gRPC application. - doneCh chan struct{} // To notify exit of LRS goroutine. - logger *igrpclog.PrefixLogger - - // Guards access to the below fields. - mu sync.Mutex - cancelStream context.CancelFunc // Cancel the stream. If nil, the stream is not active. - refCount int // Number of interested parties. - lrsStore *load.Store // Store returned to user for pushing loads. -} - -// StreamOpts holds the options for creating an lrsStream. -type StreamOpts struct { - Transport transport.Transport // xDS transport to create the stream on. - Backoff func(int) time.Duration // Backoff for retries, after stream failures. - NodeProto *v3corepb.Node // Node proto to identify the gRPC application. - LogPrefix string // Prefix to be used for log messages. -} - -// NewStreamImpl creates a new StreamImpl with the provided options. -// -// The actual streaming RPC call is initiated when the first call to ReportLoad -// is made, and is terminated when the last call to ReportLoad is canceled. -func NewStreamImpl(opts StreamOpts) *StreamImpl { - lrs := &StreamImpl{ - transport: opts.Transport, - backoff: opts.Backoff, - nodeProto: opts.NodeProto, - lrsStore: load.NewStore(), - } - - l := grpclog.Component("xds") - lrs.logger = igrpclog.NewPrefixLogger(l, opts.LogPrefix+fmt.Sprintf("[lrs-stream %p] ", lrs)) - return lrs -} - -// ReportLoad returns a load.Store that can be used to report load, and a -// cleanup function that should be called when the load reporting is no longer -// needed. -// -// The first call to ReportLoad sets the reference count to one, and starts the -// LRS streaming call. Subsequent calls increment the reference count and return -// the same load.Store. -// -// The cleanup function decrements the reference count and stops the LRS stream -// when the last reference is removed. -func (lrs *StreamImpl) ReportLoad() (*load.Store, func()) { - lrs.mu.Lock() - defer lrs.mu.Unlock() - - cleanup := sync.OnceFunc(func() { - lrs.mu.Lock() - defer lrs.mu.Unlock() - - if lrs.refCount == 0 { - lrs.logger.Errorf("Attempting to stop already stopped StreamImpl") - return - } - lrs.refCount-- - if lrs.refCount != 0 { - return - } - - if lrs.cancelStream == nil { - // It is possible that Stop() is called before the cleanup function - // is called, thereby setting cancelStream to nil. Hence we need a - // nil check here bofore invoking the cancel function. - return - } - lrs.cancelStream() - lrs.cancelStream = nil - lrs.logger.Infof("Stopping StreamImpl") - }) - - if lrs.refCount != 0 { - lrs.refCount++ - return lrs.lrsStore, cleanup - } - - lrs.refCount++ - ctx, cancel := context.WithCancel(context.Background()) - lrs.cancelStream = cancel - lrs.doneCh = make(chan struct{}) - go lrs.runner(ctx) - return lrs.lrsStore, cleanup -} - -// runner is responsible for managing the lifetime of an LRS streaming call. It -// creates the stream, sends the initial LoadStatsRequest, receives the first -// LoadStatsResponse, and then starts a goroutine to periodically send -// LoadStatsRequests. The runner will restart the stream if it encounters any -// errors. -func (lrs *StreamImpl) runner(ctx context.Context) { - defer close(lrs.doneCh) - - // This feature indicates that the client supports the - // LoadStatsResponse.send_all_clusters field in the LRS response. - node := proto.Clone(lrs.nodeProto).(*v3corepb.Node) - node.ClientFeatures = append(node.ClientFeatures, "envoy.lrs.supports_send_all_clusters") - - runLoadReportStream := func() error { - // streamCtx is created and canceled in case we terminate the stream - // early for any reason, to avoid gRPC-Go leaking the RPC's monitoring - // goroutine. - streamCtx, cancel := context.WithCancel(ctx) - defer cancel() - - stream, err := lrs.transport.CreateStreamingCall(streamCtx, "/envoy.service.load_stats.v3.LoadReportingService/StreamLoadStats") - if err != nil { - lrs.logger.Warningf("Failed to create new LRS streaming RPC: %v", err) - return nil - } - if lrs.logger.V(2) { - lrs.logger.Infof("LRS stream created") - } - - if err := lrs.sendFirstLoadStatsRequest(stream, node); err != nil { - lrs.logger.Warningf("Sending first LRS request failed: %v", err) - return nil - } - - clusters, interval, err := lrs.recvFirstLoadStatsResponse(stream) - if err != nil { - lrs.logger.Warningf("Reading from LRS streaming RPC failed: %v", err) - return nil - } - - // We reset backoff state when we successfully receive at least one - // message from the server. - lrs.sendLoads(streamCtx, stream, clusters, interval) - return backoff.ErrResetBackoff - } - backoff.RunF(ctx, runLoadReportStream, lrs.backoff) -} - -// sendLoads is responsible for periodically sending load reports to the LRS -// server at the specified interval for the specified clusters, until the passed -// in context is canceled. -func (lrs *StreamImpl) sendLoads(ctx context.Context, stream transport.StreamingCall, clusterNames []string, interval time.Duration) { - tick := time.NewTicker(interval) - defer tick.Stop() - for { - select { - case <-tick.C: - case <-ctx.Done(): - return - } - if err := lrs.sendLoadStatsRequest(stream, lrs.lrsStore.Stats(clusterNames)); err != nil { - lrs.logger.Warningf("Writing to LRS stream failed: %v", err) - return - } - } -} - -func (lrs *StreamImpl) sendFirstLoadStatsRequest(stream transport.StreamingCall, node *v3corepb.Node) error { - req := &v3lrspb.LoadStatsRequest{Node: node} - if lrs.logger.V(perRPCVerbosityLevel) { - lrs.logger.Infof("Sending initial LoadStatsRequest: %s", pretty.ToJSON(req)) - } - err := stream.Send(req) - if err == io.EOF { - return getStreamError(stream) - } - return err -} - -// recvFirstLoadStatsResponse receives the first LoadStatsResponse from the LRS -// server. Returns the following: -// - a list of cluster names requested by the server or an empty slice if the -// server requested for load from all clusters -// - the load reporting interval, and -// - any error encountered -func (lrs *StreamImpl) recvFirstLoadStatsResponse(stream transport.StreamingCall) ([]string, time.Duration, error) { - r, err := stream.Recv() - if err != nil { - return nil, 0, fmt.Errorf("lrs: failed to receive first LoadStatsResponse: %v", err) - } - resp, ok := r.(*v3lrspb.LoadStatsResponse) - if !ok { - return nil, time.Duration(0), fmt.Errorf("lrs: unexpected message type %T", r) - } - if lrs.logger.V(perRPCVerbosityLevel) { - lrs.logger.Infof("Received first LoadStatsResponse: %s", pretty.ToJSON(resp)) - } - - interval := resp.GetLoadReportingInterval() - if err := interval.CheckValid(); err != nil { - return nil, 0, fmt.Errorf("lrs: invalid load_reporting_interval: %v", err) - } - loadReportingInterval := interval.AsDuration() - - clusters := resp.Clusters - if resp.SendAllClusters { - // Return an empty slice to send stats for all clusters. - clusters = []string{} - } - - return clusters, loadReportingInterval, nil -} - -func (lrs *StreamImpl) sendLoadStatsRequest(stream transport.StreamingCall, loads []*load.Data) error { - clusterStats := make([]*v3endpointpb.ClusterStats, 0, len(loads)) - for _, sd := range loads { - droppedReqs := make([]*v3endpointpb.ClusterStats_DroppedRequests, 0, len(sd.Drops)) - for category, count := range sd.Drops { - droppedReqs = append(droppedReqs, &v3endpointpb.ClusterStats_DroppedRequests{ - Category: category, - DroppedCount: count, - }) - } - localityStats := make([]*v3endpointpb.UpstreamLocalityStats, 0, len(sd.LocalityStats)) - for l, localityData := range sd.LocalityStats { - lid, err := internal.LocalityIDFromString(l) - if err != nil { - return err - } - loadMetricStats := make([]*v3endpointpb.EndpointLoadMetricStats, 0, len(localityData.LoadStats)) - for name, loadData := range localityData.LoadStats { - loadMetricStats = append(loadMetricStats, &v3endpointpb.EndpointLoadMetricStats{ - MetricName: name, - NumRequestsFinishedWithMetric: loadData.Count, - TotalMetricValue: loadData.Sum, - }) - } - localityStats = append(localityStats, &v3endpointpb.UpstreamLocalityStats{ - Locality: &v3corepb.Locality{ - Region: lid.Region, - Zone: lid.Zone, - SubZone: lid.SubZone, - }, - TotalSuccessfulRequests: localityData.RequestStats.Succeeded, - TotalRequestsInProgress: localityData.RequestStats.InProgress, - TotalErrorRequests: localityData.RequestStats.Errored, - TotalIssuedRequests: localityData.RequestStats.Issued, - LoadMetricStats: loadMetricStats, - UpstreamEndpointStats: nil, // TODO: populate for per endpoint loads. - }) - } - - clusterStats = append(clusterStats, &v3endpointpb.ClusterStats{ - ClusterName: sd.Cluster, - ClusterServiceName: sd.Service, - UpstreamLocalityStats: localityStats, - TotalDroppedRequests: sd.TotalDrops, - DroppedRequests: droppedReqs, - LoadReportInterval: durationpb.New(sd.ReportInterval), - }) - } - - req := &v3lrspb.LoadStatsRequest{ClusterStats: clusterStats} - if lrs.logger.V(perRPCVerbosityLevel) { - lrs.logger.Infof("Sending LRS loads: %s", pretty.ToJSON(req)) - } - err := stream.Send(req) - if err == io.EOF { - return getStreamError(stream) - } - return err -} - -func getStreamError(stream transport.StreamingCall) error { - for { - if _, err := stream.Recv(); err != nil { - return err - } - } -} - -// Stop blocks until the stream is closed and all spawned goroutines exit. -func (lrs *StreamImpl) Stop() { - lrs.mu.Lock() - defer lrs.mu.Unlock() - - if lrs.cancelStream == nil { - return - } - lrs.cancelStream() - lrs.cancelStream = nil - lrs.logger.Infof("Stopping LRS stream") - <-lrs.doneCh -} diff --git a/xds/internal/xdsclient/transport/transport_interface.go b/xds/internal/xdsclient/transport/transport_interface.go deleted file mode 100644 index 48ce82a06e9d..000000000000 --- a/xds/internal/xdsclient/transport/transport_interface.go +++ /dev/null @@ -1,64 +0,0 @@ -/* - * - * Copyright 2024 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Package transport defines the interface that describe the functionality -// required to communicate with an xDS server using streaming calls. -package transport - -import ( - "context" - - "google.golang.org/grpc/internal/xds/bootstrap" -) - -// Builder is an interface for building a new xDS transport. -type Builder interface { - // Build creates a new xDS transport with the provided options. - Build(opts BuildOptions) (Transport, error) -} - -// BuildOptions contains the options for building a new xDS transport. -type BuildOptions struct { - // ServerConfig contains the configuration that controls how the transport - // interacts with the xDS server. This includes the server URI and the - // credentials to use to connect to the server, among other things. - ServerConfig *bootstrap.ServerConfig -} - -// Transport provides the functionality to communicate with an xDS server using -// streaming calls. -type Transport interface { - // CreateStreamingCall creates a new streaming call to the xDS server for the - // specified method name. The returned StreamingCall interface can be used to - // send and receive messages on the stream. - CreateStreamingCall(context.Context, string) (StreamingCall, error) - - // Close closes the underlying connection and cleans up any resources used by the - // Transport. - Close() error -} - -// StreamingCall is an interface that provides a way to send and receive -// messages on a stream. The methods accept or return any.Any messages instead -// of concrete types to allow this interface to be used for both ADS and LRS. -type StreamingCall interface { - // Send sends the provided message on the stream. - Send(any) error - - // Recv block until the next message is received on the stream. - Recv() (any, error) -} diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index 3d85c31ff433..a3736bc8d320 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -19,6 +19,8 @@ package xdsresource import ( "google.golang.org/grpc/internal/pretty" + "google.golang.org/grpc/internal/xds/bootstrap" + xdsclient "google.golang.org/grpc/xds/internal/clients/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -150,3 +152,9 @@ func WatchCluster(p Producer, name string, w ClusterWatcher) (cancel func()) { delegator := &delegatingClusterWatcher{watcher: w} return p.WatchResource(clusterType, name, delegator) } + +// NewGenericClusterResourceTypeDecoder returns a xdsclient.Decoder that +// wraps the xdsresource.clusterType. +func NewGenericClusterResourceTypeDecoder(bc *bootstrap.Config, gServerCfgMap map[xdsclient.ServerConfig]*bootstrap.ServerConfig) xdsclient.Decoder { + return &genericResourceTypeDecoder{resourceType: clusterType, bootstrapConfig: bc, gServerConfigMap: gServerCfgMap} +} diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index de574dd8d345..5d2a6753ed37 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -19,6 +19,7 @@ package xdsresource import ( "google.golang.org/grpc/internal/pretty" + xdsclient "google.golang.org/grpc/xds/internal/clients/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -146,3 +147,9 @@ func WatchEndpoints(p Producer, name string, w EndpointsWatcher) (cancel func()) delegator := &delegatingEndpointsWatcher{watcher: w} return p.WatchResource(endpointsType, name, delegator) } + +// NewGenericEndpointsResourceTypeDecoder returns a xdsclient.Decoder that +// wraps the xdsresource.endpointsType. +func NewGenericEndpointsResourceTypeDecoder() xdsclient.Decoder { + return &genericResourceTypeDecoder{resourceType: endpointsType} +} diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 0f49e6c56a3a..81786c01d243 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -22,6 +22,7 @@ import ( "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/internal/xds/bootstrap" + xdsclient "google.golang.org/grpc/xds/internal/clients/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -103,7 +104,6 @@ func (listenerResourceType) Decode(opts *DecodeOptions, resource *anypb.Any) (*D } return &DecodeResult{Name: name, Resource: &ListenerResourceData{Resource: listener}}, nil - } // ListenerResourceData wraps the configuration of a Listener resource as @@ -127,7 +127,6 @@ func (l *ListenerResourceData) RawEqual(other ResourceData) bool { return false } return proto.Equal(l.Resource.Raw, other.Raw()) - } // ToJSON returns a JSON string representation of the resource data. @@ -182,3 +181,9 @@ func WatchListener(p Producer, name string, w ListenerWatcher) (cancel func()) { delegator := &delegatingListenerWatcher{watcher: w} return p.WatchResource(listenerType, name, delegator) } + +// NewGenericListenerResourceTypeDecoder returns a xdsclient.Decoder that wraps +// the xdsresource.listenerType. +func NewGenericListenerResourceTypeDecoder(bc *bootstrap.Config) xdsclient.Decoder { + return &genericResourceTypeDecoder{resourceType: listenerType, bootstrapConfig: bc} +} diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index c22c5a6a3a84..8e40868f8a95 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -25,8 +25,11 @@ package xdsresource import ( + "fmt" + "google.golang.org/grpc/internal/xds/bootstrap" xdsinternal "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/types/known/anypb" ) @@ -167,3 +170,116 @@ func (r resourceTypeState) TypeName() string { func (r resourceTypeState) AllResourcesRequiredInSotW() bool { return r.allResourcesRequiredInSotW } + +// genericResourceTypeDecoder wraps an xdsresource.Type and implements +// xdsclient.Decoder. +// +// TODO: #8313 - Delete this once the internal xdsclient usages are updated +// to use the generic xdsclient.ResourceType interface directly. +type genericResourceTypeDecoder struct { + resourceType Type + bootstrapConfig *bootstrap.Config + gServerConfigMap map[xdsclient.ServerConfig]*bootstrap.ServerConfig +} + +// Decode deserialize and validate resource bytes of an xDS resource received +// from the xDS management server. +func (gd *genericResourceTypeDecoder) Decode(resourceBytes []byte, gOpts xdsclient.DecodeOptions) (*xdsclient.DecodeResult, error) { + raw := &anypb.Any{TypeUrl: gd.resourceType.TypeURL(), Value: resourceBytes} + opts := &DecodeOptions{BootstrapConfig: gd.bootstrapConfig} + if gOpts.ServerConfig != nil { + opts.ServerConfig = gd.gServerConfigMap[*gOpts.ServerConfig] + } + + result, err := gd.resourceType.Decode(opts, raw) + if result == nil { + return nil, err + } + if err != nil { + return &xdsclient.DecodeResult{Name: result.Name}, err + } + + return &xdsclient.DecodeResult{Name: result.Name, Resource: &genericResourceData{resourceData: result.Resource}}, nil +} + +// genericResourceData embed an xdsresource.ResourceData and implements +// xdsclient.ResourceData. +// +// TODO: #8313 - Delete this once the internal xdsclient usages are updated +// to use the generic xdsclient.ResourceData interface directly. +type genericResourceData struct { + resourceData ResourceData +} + +// Equal returns true if the passed in xdsclient.ResourceData +// is equal to that of the receiver. +func (grd *genericResourceData) Equal(other xdsclient.ResourceData) bool { + if other == nil { + return false + } + otherResourceData, ok := other.(*genericResourceData) + if !ok { + return false + } + return grd.resourceData.RawEqual(otherResourceData.resourceData) +} + +// Bytes returns the underlying raw bytes of the wrapped resource. +func (grd *genericResourceData) Bytes() []byte { + rawAny := grd.resourceData.Raw() + if rawAny == nil { + return nil + } + return rawAny.Value +} + +// genericResourceWatcher wraps xdsresource.ResourceWatcher and implements +// xdsclient.ResourceWatcher. +// +// TODO: #8313 - Delete this once the internal xdsclient usages are updated +// to use the generic xdsclient.ResourceWatcher interface directly. +type genericResourceWatcher struct { + xdsResourceWatcher ResourceWatcher +} + +// ResourceChanged indicates a new version of the wrapped resource is +// available. +func (gw *genericResourceWatcher) ResourceChanged(gData xdsclient.ResourceData, done func()) { + if gData == nil { + gw.xdsResourceWatcher.ResourceChanged(nil, done) + return + } + + grd, ok := gData.(*genericResourceData) + if !ok { + err := fmt.Errorf("genericResourceWatcher received unexpected xdsclient.ResourceData type %T, want *genericResourceData", gData) + gw.xdsResourceWatcher.ResourceError(err, done) + return + } + gw.xdsResourceWatcher.ResourceChanged(grd.resourceData, done) +} + +// ResourceError indicates an error occurred while trying to fetch or +// decode the associated wrapped resource. The previous version of the +// wrapped resource should be considered invalid. +func (gw *genericResourceWatcher) ResourceError(err error, done func()) { + gw.xdsResourceWatcher.ResourceError(err, done) +} + +// AmbientError indicates an error occurred after a resource has been +// received that should not modify the use of that wrapped resource but may +// provide useful information about the state of the XDSClient for debugging +// purposes. The previous version of the wrapped resource should still be +// considered valid. +func (gw *genericResourceWatcher) AmbientError(err error, done func()) { + gw.xdsResourceWatcher.AmbientError(err, done) +} + +// GenericResourceWatcher returns a xdsclient.ResourceWatcher that wraps an +// xdsresource.ResourceWatcher to make it compatible with xdsclient.ResourceWatcher. +func GenericResourceWatcher(xdsResourceWatcher ResourceWatcher) xdsclient.ResourceWatcher { + if xdsResourceWatcher == nil { + return nil + } + return &genericResourceWatcher{xdsResourceWatcher: xdsResourceWatcher} +} diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index c292b1b8ef2c..c0e8e6e03184 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -19,6 +19,7 @@ package xdsresource import ( "google.golang.org/grpc/internal/pretty" + xdsclient "google.golang.org/grpc/xds/internal/clients/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -148,3 +149,9 @@ func WatchRouteConfig(p Producer, name string, w RouteConfigWatcher) (cancel fun delegator := &delegatingRouteConfigWatcher{watcher: w} return p.WatchResource(routeConfigType, name, delegator) } + +// NewGenericRouteConfigResourceTypeDecoder returns a xdsclient.Decoder that +// wraps the xdsresource.routeConfigType. +func NewGenericRouteConfigResourceTypeDecoder() xdsclient.Decoder { + return &genericResourceTypeDecoder{resourceType: routeConfigType} +} diff --git a/xds/internal/xdsclient/xdsresource/type_eds.go b/xds/internal/xdsclient/xdsresource/type_eds.go index a7eab2361d31..12294626f392 100644 --- a/xds/internal/xdsclient/xdsresource/type_eds.go +++ b/xds/internal/xdsclient/xdsresource/type_eds.go @@ -18,7 +18,7 @@ package xdsresource import ( - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/protobuf/types/known/anypb" ) @@ -58,7 +58,7 @@ type Endpoint struct { // Locality contains information of a locality. type Locality struct { Endpoints []Endpoint - ID internal.LocalityID + ID clients.Locality Priority uint32 Weight uint32 } diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_eds.go b/xds/internal/xdsclient/xdsresource/unmarshal_eds.go index 8a7397d160a3..d56b42dd360e 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_eds.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_eds.go @@ -29,6 +29,7 @@ import ( "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) @@ -164,12 +165,12 @@ func parseEDSRespProto(m *v3endpointpb.ClusterLoadAssignment) (EndpointsUpdate, localitiesWithPriority = make(map[string]bool) priorities[priority] = localitiesWithPriority } - lid := internal.LocalityID{ + lid := clients.Locality{ Region: l.Region, Zone: l.Zone, SubZone: l.SubZone, } - lidStr := lid.ToString() + lidStr := internal.LocalityString(lid) // "Since an xDS configuration can place a given locality under multiple // priorities, it is possible to see locality weight attributes with diff --git a/xds/internal/xdsclient/xdsresource/unmarshal_eds_test.go b/xds/internal/xdsclient/xdsresource/unmarshal_eds_test.go index ae4f639d3ccc..f38f696178e5 100644 --- a/xds/internal/xdsclient/xdsresource/unmarshal_eds_test.go +++ b/xds/internal/xdsclient/xdsresource/unmarshal_eds_test.go @@ -32,7 +32,7 @@ import ( "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/pretty" "google.golang.org/grpc/internal/testutils" - "google.golang.org/grpc/xds/internal" + "google.golang.org/grpc/xds/internal/clients" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -149,7 +149,7 @@ func (s) TestEDSParseRespProto(t *testing.T) { HealthStatus: EndpointHealthStatusUnhealthy, Weight: 271, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, @@ -159,7 +159,7 @@ func (s) TestEDSParseRespProto(t *testing.T) { HealthStatus: EndpointHealthStatusDraining, Weight: 828, }}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, @@ -191,7 +191,7 @@ func (s) TestEDSParseRespProto(t *testing.T) { HealthStatus: EndpointHealthStatusUnhealthy, Weight: 271, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, @@ -201,7 +201,7 @@ func (s) TestEDSParseRespProto(t *testing.T) { HealthStatus: EndpointHealthStatusDraining, Weight: 828, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 0, Weight: 1, }, @@ -302,7 +302,7 @@ func (s) TestEDSParseRespProtoAdditionalAddrs(t *testing.T) { HealthStatus: EndpointHealthStatusUnhealthy, Weight: 271, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, @@ -312,7 +312,7 @@ func (s) TestEDSParseRespProtoAdditionalAddrs(t *testing.T) { HealthStatus: EndpointHealthStatusHealthy, Weight: 828, }}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, @@ -522,7 +522,7 @@ func (s) TestUnmarshalEndpoints(t *testing.T) { HealthStatus: EndpointHealthStatusUnhealthy, Weight: 271, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, @@ -532,7 +532,7 @@ func (s) TestUnmarshalEndpoints(t *testing.T) { HealthStatus: EndpointHealthStatusDraining, Weight: 828, }}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, @@ -553,7 +553,7 @@ func (s) TestUnmarshalEndpoints(t *testing.T) { HealthStatus: EndpointHealthStatusUnhealthy, Weight: 271, }}, - ID: internal.LocalityID{SubZone: "locality-1"}, + ID: clients.Locality{SubZone: "locality-1"}, Priority: 1, Weight: 1, }, @@ -563,7 +563,7 @@ func (s) TestUnmarshalEndpoints(t *testing.T) { HealthStatus: EndpointHealthStatusDraining, Weight: 828, }}, - ID: internal.LocalityID{SubZone: "locality-2"}, + ID: clients.Locality{SubZone: "locality-2"}, Priority: 0, Weight: 1, }, diff --git a/xds/server_resource_ext_test.go b/xds/server_resource_ext_test.go index 364d27a0f2f4..b396eb9ffc6c 100644 --- a/xds/server_resource_ext_test.go +++ b/xds/server_resource_ext_test.go @@ -25,6 +25,7 @@ import ( "net" "strings" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/uuid" @@ -32,14 +33,11 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/internal" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/internal/testutils/xds/e2e" "google.golang.org/grpc/internal/xds/bootstrap" "google.golang.org/grpc/xds" - xdsinternal "google.golang.org/grpc/xds/internal" "google.golang.org/grpc/xds/internal/xdsclient" - "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version" "google.golang.org/protobuf/types/known/wrapperspb" @@ -56,12 +54,13 @@ import ( // Tests the case where an LDS points to an RDS which returns resource not // found. Before getting the resource not found, the xDS Server has not received // all configuration needed, so it should Accept and Close any new connections. -// After it has received the resource not found error, the server should move to -// serving, successfully Accept Connections, and fail at the L7 level with -// resource not found specified. +// After it has received the resource not found error (due to short watch +// expiry), the server should move to serving, successfully Accept Connections, +// and fail at the L7 level with resource not found specified. func (s) TestServer_RouteConfiguration_ResourceNotFound(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() + routeConfigNamesCh := make(chan []string, 1) managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error { @@ -91,7 +90,6 @@ func (s) TestServer_RouteConfiguration_ResourceNotFound(t *testing.T) { if err != nil { t.Fatalf("Failed to retrieve host and port of server: %v", err) } - const routeConfigResourceName = "routeName" listener := e2e.DefaultServerListenerWithRouteConfigName(host, port, e2e.SecurityLevelNone, routeConfigResourceName) resources := e2e.UpdateOptions{ @@ -99,17 +97,29 @@ func (s) TestServer_RouteConfiguration_ResourceNotFound(t *testing.T) { Listeners: []*v3listenerpb.Listener{listener}, SkipValidation: true, } - if err := managementServer.Update(ctx, resources); err != nil { t.Fatal(err) } + modeChangeHandler := newServingModeChangeHandler(t) modeChangeOpt := xds.ServingModeCallback(modeChangeHandler.modeChangeCallback) + config, err := bootstrap.NewConfigFromContents(bootstrapContents) if err != nil { t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) } + // Create a specific xDS client instance within that pool for the server, + // configuring it with a short WatchExpiryTimeout. pool := xdsclient.NewPool(config) + _, serverXDSClientClose, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{ + Name: xdsclient.NameForServer, + WatchExpiryTimeout: 500 * time.Millisecond, + }) + if err != nil { + t.Fatalf("Failed to create xDS client for server: %v", err) + } + defer serverXDSClientClose() + // Start an xDS-enabled gRPC server using the above client from the pool. createStubServer(t, lis, modeChangeOpt, xds.ClientPoolForTesting(pool)) // Wait for the route configuration resource to be requested from the @@ -123,29 +133,20 @@ func (s) TestServer_RouteConfiguration_ResourceNotFound(t *testing.T) { t.Fatal("Timeout waiting for route config resource to be requested") } + // Do NOT send the RDS resource. The xDS client's watch expiry timer will + // fire. After the RDS resource is deemed "not found" (due to the short + // watch expiry), the server will transition to SERVING mode. + cc, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { t.Fatalf("failed to dial local test server: %v", err) } defer cc.Close() + // Before the watch expiry, the server is NOT_SERVING, RPCs should fail with UNAVAILABLE. waitForFailedRPCWithStatus(ctx, t, cc, codes.Unavailable, "", "") - // Lookup the xDS client in use based on the dedicated well-known key, as - // defined in A71, used by the xDS enabled gRPC server. - xdsC, close, err := pool.GetClientForTesting(xdsclient.NameForServer) - if err != nil { - t.Fatalf("Failed to find xDS client for configuration: %v", string(bootstrapContents)) - } - defer close() - - // Invoke resource not found error for the route configuration resource. - // This should cause the server to go SERVING, but fail RPCs with the - // appropriate error code. - triggerResourceNotFound := internal.TriggerXDSResourceNotFoundForTesting.(func(xdsclient.XDSClient, xdsresource.Type, string) error) - routeConfigResourceType := xdsinternal.ResourceTypeMapForTesting[version.V3RouteConfigURL].(xdsresource.Type) - if err := triggerResourceNotFound(xdsC, routeConfigResourceType, routeConfigResourceName); err != nil { - t.Fatalf("Failed to trigger resource name not found for testing: %v", err) - } + // Wait for the xDS-enabled gRPC server to go SERVING. This should happen + // after the RDS watch expiry timer fires. select { case <-ctx.Done(): t.Fatal("Timeout waiting for the xDS-enabled gRPC server to go SERVING") @@ -154,6 +155,8 @@ func (s) TestServer_RouteConfiguration_ResourceNotFound(t *testing.T) { t.Fatalf("Mode changed to %v, want %v", gotMode, connectivity.ServingModeServing) } } + // After watch expiry, the server should be SERVING, but RPCs should fail + // at the L7 level with resource not found. waitForFailedRPCWithStatus(ctx, t, cc, codes.Unavailable, "error from xDS configuration for matched route configuration", nodeID) }