Skip to content

Commit 0300770

Browse files
authored
xds: support cluster fallback in cluster_resolver (#4594)
1 parent 65cabd7 commit 0300770

17 files changed

+587
-722
lines changed

xds/internal/balancer/cdsbalancer/cdsbalancer.go

Lines changed: 70 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -35,27 +35,27 @@ import (
3535
"google.golang.org/grpc/resolver"
3636
"google.golang.org/grpc/serviceconfig"
3737
"google.golang.org/grpc/xds/internal/balancer/clusterresolver"
38+
"google.golang.org/grpc/xds/internal/balancer/clusterresolver/balancerconfig"
3839
"google.golang.org/grpc/xds/internal/xdsclient"
3940
)
4041

4142
const (
4243
cdsName = "cds_experimental"
43-
edsName = "eds_experimental"
4444
)
4545

4646
var (
4747
errBalancerClosed = errors.New("cdsBalancer is closed")
4848

49-
// newEDSBalancer is a helper function to build a new edsBalancer and will be
50-
// overridden in unittests.
51-
newEDSBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) {
52-
builder := balancer.Get(edsName)
49+
// newChildBalancer is a helper function to build a new cluster_resolver
50+
// balancer and will be overridden in unittests.
51+
newChildBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) {
52+
builder := balancer.Get(clusterresolver.Name)
5353
if builder == nil {
54-
return nil, fmt.Errorf("xds: no balancer builder with name %v", edsName)
54+
return nil, fmt.Errorf("xds: no balancer builder with name %v", clusterresolver.Name)
5555
}
56-
// We directly pass the parent clientConn to the
57-
// underlying edsBalancer because the cdsBalancer does
58-
// not deal with subConns.
56+
// We directly pass the parent clientConn to the underlying
57+
// cluster_resolver balancer because the cdsBalancer does not deal with
58+
// subConns.
5959
return builder.Build(cc, opts), nil
6060
}
6161
buildProvider = buildProviderFunc
@@ -126,31 +126,32 @@ func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, err
126126
// ccUpdate wraps a clientConn update received from gRPC (pushed from the
127127
// xdsResolver). A valid clusterName causes the cdsBalancer to register a CDS
128128
// watcher with the xdsClient, while a non-nil error causes it to cancel the
129-
// existing watch and propagate the error to the underlying edsBalancer.
129+
// existing watch and propagate the error to the underlying cluster_resolver
130+
// balancer.
130131
type ccUpdate struct {
131132
clusterName string
132133
err error
133134
}
134135

135136
// scUpdate wraps a subConn update received from gRPC. This is directly passed
136-
// on to the edsBalancer.
137+
// on to the cluster_resolver balancer.
137138
type scUpdate struct {
138139
subConn balancer.SubConn
139140
state balancer.SubConnState
140141
}
141142

142-
// cdsBalancer implements a CDS based LB policy. It instantiates an EDS based
143-
// LB policy to further resolve the serviceName received from CDS, into
144-
// localities and endpoints. Implements the balancer.Balancer interface which
145-
// is exposed to gRPC and implements the balancer.ClientConn interface which is
146-
// exposed to the edsBalancer.
143+
// cdsBalancer implements a CDS based LB policy. It instantiates a
144+
// cluster_resolver balancer to further resolve the serviceName received from
145+
// CDS, into localities and endpoints. Implements the balancer.Balancer
146+
// interface which is exposed to gRPC and implements the balancer.ClientConn
147+
// interface which is exposed to the cluster_resolver balancer.
147148
type cdsBalancer struct {
148149
ccw *ccWrapper // ClientConn interface passed to child LB.
149150
bOpts balancer.BuildOptions // BuildOptions passed to child LB.
150151
updateCh *buffer.Unbounded // Channel for gRPC and xdsClient updates.
151152
xdsClient xdsclient.XDSClient // xDS client to watch Cluster resource.
152153
clusterHandler *clusterHandler // To watch the clusters.
153-
edsLB balancer.Balancer // EDS child policy.
154+
childLB balancer.Balancer
154155
logger *grpclog.PrefixLogger
155156
closed *grpcsync.Event
156157
done *grpcsync.Event
@@ -166,7 +167,7 @@ type cdsBalancer struct {
166167
// handleClientConnUpdate handles a ClientConnUpdate received from gRPC. Good
167168
// updates lead to registration of a CDS watch. Updates with error lead to
168169
// cancellation of existing watch and propagation of the same error to the
169-
// edsBalancer.
170+
// cluster_resolver balancer.
170171
func (b *cdsBalancer) handleClientConnUpdate(update *ccUpdate) {
171172
// We first handle errors, if any, and then proceed with handling the
172173
// update, only if the status quo has changed.
@@ -266,15 +267,15 @@ func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanc
266267
}
267268

268269
// handleWatchUpdate handles a watch update from the xDS Client. Good updates
269-
// lead to clientConn updates being invoked on the underlying edsBalancer.
270+
// lead to clientConn updates being invoked on the underlying cluster_resolver balancer.
270271
func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) {
271272
if err := update.err; err != nil {
272273
b.logger.Warningf("Watch error from xds-client %p: %v", b.xdsClient, err)
273274
b.handleErrorFromUpdate(err, false)
274275
return
275276
}
276277

277-
b.logger.Infof("Watch update from xds-client %p, content: %+v, security config: %v", b.xdsClient, pretty.ToJSON(update.chu), pretty.ToJSON(update.securityCfg))
278+
b.logger.Infof("Watch update from xds-client %p, content: %+v, security config: %v", b.xdsClient, pretty.ToJSON(update.updates), pretty.ToJSON(update.securityCfg))
278279

279280
// Process the security config from the received update before building the
280281
// child policy or forwarding the update to it. We do this because the child
@@ -291,47 +292,54 @@ func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) {
291292
}
292293

293294
// The first good update from the watch API leads to the instantiation of an
294-
// edsBalancer. Further updates/errors are propagated to the existing
295-
// edsBalancer.
296-
if b.edsLB == nil {
297-
edsLB, err := newEDSBalancer(b.ccw, b.bOpts)
295+
// cluster_resolver balancer. Further updates/errors are propagated to the existing
296+
// cluster_resolver balancer.
297+
if b.childLB == nil {
298+
childLB, err := newChildBalancer(b.ccw, b.bOpts)
298299
if err != nil {
299-
b.logger.Errorf("Failed to create child policy of type %s, %v", edsName, err)
300+
b.logger.Errorf("Failed to create child policy of type %s, %v", clusterresolver.Name, err)
300301
return
301302
}
302-
b.edsLB = edsLB
303-
b.logger.Infof("Created child policy %p of type %s", b.edsLB, edsName)
304-
}
303+
b.childLB = childLB
304+
b.logger.Infof("Created child policy %p of type %s", b.childLB, clusterresolver.Name)
305+
}
306+
307+
dms := make([]balancerconfig.DiscoveryMechanism, len(update.updates))
308+
for i, cu := range update.updates {
309+
switch cu.ClusterType {
310+
case xdsclient.ClusterTypeEDS:
311+
dms[i] = balancerconfig.DiscoveryMechanism{
312+
Type: balancerconfig.DiscoveryMechanismTypeEDS,
313+
Cluster: cu.ClusterName,
314+
EDSServiceName: cu.EDSServiceName,
315+
MaxConcurrentRequests: cu.MaxRequests,
316+
}
317+
if cu.EnableLRS {
318+
// An empty string here indicates that the cluster_resolver balancer should use the
319+
// same xDS server for load reporting as it does for EDS
320+
// requests/responses.
321+
dms[i].LoadReportingServerName = new(string)
305322

306-
if len(update.chu) == 0 {
307-
b.logger.Infof("got update with 0 cluster updates, should never happen. There should be at least one cluster")
323+
}
324+
case xdsclient.ClusterTypeLogicalDNS:
325+
dms[i] = balancerconfig.DiscoveryMechanism{
326+
Type: balancerconfig.DiscoveryMechanismTypeLogicalDNS,
327+
DNSHostname: cu.DNSHostName,
328+
}
329+
default:
330+
b.logger.Infof("unexpected cluster type %v when handling update from cluster handler", cu.ClusterType)
331+
}
308332
}
309-
// TODO: this function is currently only handling the cluster with higher
310-
// priority. This should work in most cases (e.g. if the cluster is not a
311-
// aggregated cluster, or if the higher priority cluster works fine so
312-
// there's no need to fallback). This quick fix is to unblock the testing
313-
// work before the full fallback support is complete. Once the EDS balancer
314-
// is updated to cluster_resolver, which has the fallback functionality, we
315-
// will fix this to handle all the clusters in list.
316-
cds := update.chu[0]
317-
lbCfg := &clusterresolver.EDSConfig{
318-
ClusterName: cds.ClusterName,
319-
EDSServiceName: cds.EDSServiceName,
320-
MaxConcurrentRequests: cds.MaxRequests,
333+
lbCfg := &clusterresolver.LBConfig{
334+
DiscoveryMechanisms: dms,
321335
}
322-
if cds.EnableLRS {
323-
// An empty string here indicates that the edsBalancer should use the
324-
// same xDS server for load reporting as it does for EDS
325-
// requests/responses.
326-
lbCfg.LrsLoadReportingServerName = new(string)
327336

328-
}
329337
ccState := balancer.ClientConnState{
330338
ResolverState: xdsclient.SetClient(resolver.State{}, b.xdsClient),
331339
BalancerConfig: lbCfg,
332340
}
333-
if err := b.edsLB.UpdateClientConnState(ccState); err != nil {
334-
b.logger.Errorf("xds: edsBalancer.UpdateClientConnState(%+v) returned error: %v", ccState, err)
341+
if err := b.childLB.UpdateClientConnState(ccState); err != nil {
342+
b.logger.Errorf("xds: cluster_resolver balancer.UpdateClientConnState(%+v) returned error: %v", ccState, err)
335343
}
336344
}
337345

@@ -348,20 +356,20 @@ func (b *cdsBalancer) run() {
348356
b.handleClientConnUpdate(update)
349357
case *scUpdate:
350358
// SubConn updates are passthrough and are simply handed over to
351-
// the underlying edsBalancer.
352-
if b.edsLB == nil {
353-
b.logger.Errorf("xds: received scUpdate {%+v} with no edsBalancer", update)
359+
// the underlying cluster_resolver balancer.
360+
if b.childLB == nil {
361+
b.logger.Errorf("xds: received scUpdate {%+v} with no cluster_resolver balancer", update)
354362
break
355363
}
356-
b.edsLB.UpdateSubConnState(update.subConn, update.state)
364+
b.childLB.UpdateSubConnState(update.subConn, update.state)
357365
}
358366
case u := <-b.clusterHandler.updateChannel:
359367
b.handleWatchUpdate(u)
360368
case <-b.closed.Done():
361369
b.clusterHandler.close()
362-
if b.edsLB != nil {
363-
b.edsLB.Close()
364-
b.edsLB = nil
370+
if b.childLB != nil {
371+
b.childLB.Close()
372+
b.childLB = nil
365373
}
366374
if b.cachedRoot != nil {
367375
b.cachedRoot.Close()
@@ -389,22 +397,22 @@ func (b *cdsBalancer) run() {
389397
// - If it's from xds client, it means CDS resource were removed. The CDS
390398
// watcher should keep watching.
391399
//
392-
// In both cases, the error will be forwarded to EDS balancer. And if error is
393-
// resource-not-found, the child EDS balancer will stop watching EDS.
400+
// In both cases, the error will be forwarded to the child balancer. And if
401+
// error is resource-not-found, the child balancer will stop watching EDS.
394402
func (b *cdsBalancer) handleErrorFromUpdate(err error, fromParent bool) {
395403
// This is not necessary today, because xds client never sends connection
396404
// errors.
397405
if fromParent && xdsclient.ErrType(err) == xdsclient.ErrorTypeResourceNotFound {
398406
b.clusterHandler.close()
399407
}
400-
if b.edsLB != nil {
408+
if b.childLB != nil {
401409
if xdsclient.ErrType(err) != xdsclient.ErrorTypeConnection {
402410
// Connection errors will be sent to the child balancers directly.
403411
// There's no need to forward them.
404-
b.edsLB.ResolverError(err)
412+
b.childLB.ResolverError(err)
405413
}
406414
} else {
407-
// If eds balancer was never created, fail the RPCs with
415+
// If child balancer was never created, fail the RPCs with
408416
// errors.
409417
b.ccw.UpdateState(balancer.State{
410418
ConnectivityState: connectivity.TransientFailure,

xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ func setupWithXDSCreds(t *testing.T) (*fakeclient.Client, *cdsBalancer, *testEDS
153153
// Override the creation of the EDS balancer to return a fake EDS balancer
154154
// implementation.
155155
edsB := newTestEDSBalancer()
156-
oldEDSBalancerBuilder := newEDSBalancer
157-
newEDSBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) {
156+
oldEDSBalancerBuilder := newChildBalancer
157+
newChildBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) {
158158
edsB.parentCC = cc
159159
return edsB, nil
160160
}
@@ -177,7 +177,7 @@ func setupWithXDSCreds(t *testing.T) (*fakeclient.Client, *cdsBalancer, *testEDS
177177
}
178178

179179
return xdsC, cdsB.(*cdsBalancer), edsB, tcc, func() {
180-
newEDSBalancer = oldEDSBalancerBuilder
180+
newChildBalancer = oldEDSBalancerBuilder
181181
xdsC.Close()
182182
}
183183
}
@@ -251,7 +251,7 @@ func (s) TestSecurityConfigWithoutXDSCreds(t *testing.T) {
251251
// will trigger the watch handler on the CDS balancer, which will attempt to
252252
// create a new EDS balancer. The fake EDS balancer created above will be
253253
// returned to the CDS balancer, because we have overridden the
254-
// newEDSBalancer function as part of test setup.
254+
// newChildBalancer function as part of test setup.
255255
cdsUpdate := xdsclient.ClusterUpdate{ClusterName: serviceName}
256256
wantCCS := edsCCS(serviceName, nil, false)
257257
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
@@ -306,7 +306,7 @@ func (s) TestNoSecurityConfigWithXDSCreds(t *testing.T) {
306306
// will trigger the watch handler on the CDS balancer, which will attempt to
307307
// create a new EDS balancer. The fake EDS balancer created above will be
308308
// returned to the CDS balancer, because we have overridden the
309-
// newEDSBalancer function as part of test setup. No security config is
309+
// newChildBalancer function as part of test setup. No security config is
310310
// passed to the CDS balancer as part of this update.
311311
cdsUpdate := xdsclient.ClusterUpdate{ClusterName: serviceName}
312312
wantCCS := edsCCS(serviceName, nil, false)
@@ -464,7 +464,7 @@ func (s) TestSecurityConfigUpdate_BadToGood(t *testing.T) {
464464
// will trigger the watch handler on the CDS balancer, which will attempt to
465465
// create a new EDS balancer. The fake EDS balancer created above will be
466466
// returned to the CDS balancer, because we have overridden the
467-
// newEDSBalancer function as part of test setup.
467+
// newChildBalancer function as part of test setup.
468468
wantCCS := edsCCS(serviceName, nil, false)
469469
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil {
470470
t.Fatal(err)
@@ -498,7 +498,7 @@ func (s) TestGoodSecurityConfig(t *testing.T) {
498498
// will trigger the watch handler on the CDS balancer, which will attempt to
499499
// create a new EDS balancer. The fake EDS balancer created above will be
500500
// returned to the CDS balancer, because we have overridden the
501-
// newEDSBalancer function as part of test setup.
501+
// newChildBalancer function as part of test setup.
502502
wantCCS := edsCCS(serviceName, nil, false)
503503
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
504504
defer ctxCancel()
@@ -551,7 +551,7 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) {
551551
// will trigger the watch handler on the CDS balancer, which will attempt to
552552
// create a new EDS balancer. The fake EDS balancer created above will be
553553
// returned to the CDS balancer, because we have overridden the
554-
// newEDSBalancer function as part of test setup.
554+
// newChildBalancer function as part of test setup.
555555
wantCCS := edsCCS(serviceName, nil, false)
556556
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
557557
defer ctxCancel()
@@ -601,7 +601,7 @@ func (s) TestSecurityConfigUpdate_GoodToBad(t *testing.T) {
601601
// will trigger the watch handler on the CDS balancer, which will attempt to
602602
// create a new EDS balancer. The fake EDS balancer created above will be
603603
// returned to the CDS balancer, because we have overridden the
604-
// newEDSBalancer function as part of test setup.
604+
// newChildBalancer function as part of test setup.
605605
wantCCS := edsCCS(serviceName, nil, false)
606606
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
607607
defer ctxCancel()
@@ -672,7 +672,7 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) {
672672
// will trigger the watch handler on the CDS balancer, which will attempt to
673673
// create a new EDS balancer. The fake EDS balancer created above will be
674674
// returned to the CDS balancer, because we have overridden the
675-
// newEDSBalancer function as part of test setup.
675+
// newChildBalancer function as part of test setup.
676676
cdsUpdate := xdsclient.ClusterUpdate{
677677
ClusterName: serviceName,
678678
SecurityCfg: &xdsclient.SecurityConfig{

0 commit comments

Comments
 (0)