Skip to content

Commit b7a0288

Browse files
abhinavdahiyasoltysh
authored andcommitted
UPSTREAM: 84466: gce: ensureInternalInstanceGroups: reuse instance-groups for internal load balancers
UPSTREAM: 84466: legacy-cloud-providers/gce/gce_fake.go: NewFakeGCECloud: make sure that the secondary zone is also part of managedZones Origin-commit: 79d66e294a3906efd0351f125cefb4b9cc1c9ab4 UPSTREAM: 84466: gce: ensureInternalInstanceGroups: reuse instance-groups for internal load balancers Origin-commit: cfb25370a7c8f9bed9688cb334b4bc1c3342da0d UPSTREAM: 84466: gce: add ExternalInstanceGroupsPrefix to filter instance groups that will be re-used for ILB backend Origin-commit: e29c0b6ce3c068e02419a7b3cbc381b919981f50 UPSTREAM: 84466: gce: skip ensureInstanceGroup for a zone that has no remaining nodes for k8s managed IG Origin-commit: 3915cef99ee4eedc9755d454abb7e4efa2a63bff
1 parent 10f3755 commit b7a0288

File tree

5 files changed

+174
-37
lines changed

5 files changed

+174
-37
lines changed

staging/src/k8s.io/legacy-cloud-providers/gce/gce.go

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ type Cloud struct {
142142
useMetadataServer bool
143143
operationPollRateLimiter flowcontrol.RateLimiter
144144
manager diskServiceManager
145+
146+
externalInstanceGroupsPrefix string // If non-"", finds prefixed instance groups for ILB.
147+
145148
// Lock for access to nodeZones
146149
nodeZonesLock sync.Mutex
147150
// nodeZones is a mapping from Zone to a sets.String of Node's names in the Zone
@@ -189,6 +192,9 @@ type ConfigGlobal struct {
189192
NodeInstancePrefix string `gcfg:"node-instance-prefix"`
190193
Regional bool `gcfg:"regional"`
191194
Multizone bool `gcfg:"multizone"`
195+
// ExternalInstanceGroupsPrefix is the prefix that will be used to filter instance groups
196+
// that be backend for ILB containing cluster nodes if not-empty.
197+
ExternalInstanceGroupsPrefix string `gcfg:"external-instance-groups-prefix"`
192198
// APIEndpoint is the GCE compute API endpoint to use. If this is blank,
193199
// then the default endpoint is used.
194200
APIEndpoint string `gcfg:"api-endpoint"`
@@ -229,12 +235,13 @@ type CloudConfig struct {
229235
SubnetworkName string
230236
SubnetworkURL string
231237
// DEPRECATED: Do not rely on this value as it may be incorrect.
232-
SecondaryRangeName string
233-
NodeTags []string
234-
NodeInstancePrefix string
235-
TokenSource oauth2.TokenSource
236-
UseMetadataServer bool
237-
AlphaFeatureGate *AlphaFeatureGate
238+
SecondaryRangeName string
239+
NodeTags []string
240+
NodeInstancePrefix string
241+
ExternalInstanceGroupsPrefix string
242+
TokenSource oauth2.TokenSource
243+
UseMetadataServer bool
244+
AlphaFeatureGate *AlphaFeatureGate
238245
}
239246

240247
func init() {
@@ -324,6 +331,7 @@ func generateCloudConfig(configFile *ConfigFile) (cloudConfig *CloudConfig, err
324331

325332
cloudConfig.NodeTags = configFile.Global.NodeTags
326333
cloudConfig.NodeInstancePrefix = configFile.Global.NodeInstancePrefix
334+
cloudConfig.ExternalInstanceGroupsPrefix = configFile.Global.ExternalInstanceGroupsPrefix
327335
cloudConfig.AlphaFeatureGate = NewAlphaFeatureGate(configFile.Global.AlphaFeatures)
328336
}
329337

@@ -501,29 +509,30 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
501509
operationPollRateLimiter := flowcontrol.NewTokenBucketRateLimiter(5, 5) // 5 qps, 5 burst.
502510

503511
gce := &Cloud{
504-
service: service,
505-
serviceAlpha: serviceAlpha,
506-
serviceBeta: serviceBeta,
507-
containerService: containerService,
508-
tpuService: tpuService,
509-
projectID: projID,
510-
networkProjectID: netProjID,
511-
onXPN: onXPN,
512-
region: config.Region,
513-
regional: config.Regional,
514-
localZone: config.Zone,
515-
managedZones: config.ManagedZones,
516-
networkURL: networkURL,
517-
unsafeIsLegacyNetwork: isLegacyNetwork,
518-
unsafeSubnetworkURL: subnetURL,
519-
secondaryRangeName: config.SecondaryRangeName,
520-
nodeTags: config.NodeTags,
521-
nodeInstancePrefix: config.NodeInstancePrefix,
522-
useMetadataServer: config.UseMetadataServer,
523-
operationPollRateLimiter: operationPollRateLimiter,
524-
AlphaFeatureGate: config.AlphaFeatureGate,
525-
nodeZones: map[string]sets.String{},
526-
metricsCollector: newLoadBalancerMetrics(),
512+
service: service,
513+
serviceAlpha: serviceAlpha,
514+
serviceBeta: serviceBeta,
515+
containerService: containerService,
516+
tpuService: tpuService,
517+
projectID: projID,
518+
networkProjectID: netProjID,
519+
onXPN: onXPN,
520+
region: config.Region,
521+
regional: config.Regional,
522+
localZone: config.Zone,
523+
managedZones: config.ManagedZones,
524+
networkURL: networkURL,
525+
unsafeIsLegacyNetwork: isLegacyNetwork,
526+
unsafeSubnetworkURL: subnetURL,
527+
secondaryRangeName: config.SecondaryRangeName,
528+
nodeTags: config.NodeTags,
529+
nodeInstancePrefix: config.NodeInstancePrefix,
530+
externalInstanceGroupsPrefix: config.ExternalInstanceGroupsPrefix,
531+
useMetadataServer: config.UseMetadataServer,
532+
operationPollRateLimiter: operationPollRateLimiter,
533+
AlphaFeatureGate: config.AlphaFeatureGate,
534+
nodeZones: map[string]sets.String{},
535+
metricsCollector: newLoadBalancerMetrics(),
527536
}
528537

529538
gce.manager = &gceServiceManager{gce}

staging/src/k8s.io/legacy-cloud-providers/gce/gce_fake.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func NewFakeGCECloud(vals TestClusterValues) *Cloud {
7171
gce := &Cloud{
7272
region: vals.Region,
7373
service: service,
74-
managedZones: []string{vals.ZoneName},
74+
managedZones: []string{vals.ZoneName, vals.SecondaryZoneName},
7575
projectID: vals.ProjectID,
7676
networkProjectID: vals.ProjectID,
7777
ClusterID: fakeClusterID(vals.ClusterID),

staging/src/k8s.io/legacy-cloud-providers/gce/gce_instancegroup.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ limitations under the License.
1919
package gce
2020

2121
import (
22+
"fmt"
23+
2224
compute "google.golang.org/api/compute/v1"
2325

2426
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
@@ -60,6 +62,21 @@ func (g *Cloud) ListInstanceGroups(zone string) ([]*compute.InstanceGroup, error
6062
return v, mc.Observe(err)
6163
}
6264

65+
// ListInstanceGroupsWithPrefix lists all InstanceGroups in the project and
66+
// zone with given prefix.
67+
func (g *Cloud) ListInstanceGroupsWithPrefix(zone string, prefix string) ([]*compute.InstanceGroup, error) {
68+
ctx, cancel := cloud.ContextWithCallTimeout()
69+
defer cancel()
70+
71+
mc := newInstanceGroupMetricContext("list", zone)
72+
f := filter.None
73+
if prefix != "" {
74+
f = filter.Regexp("name", fmt.Sprintf("%s.*", prefix))
75+
}
76+
v, err := g.c.InstanceGroups().List(ctx, zone, f)
77+
return v, mc.Observe(err)
78+
}
79+
6380
// ListInstancesInInstanceGroup lists all the instances in a given
6481
// instance group and state.
6582
func (g *Cloud) ListInstancesInInstanceGroup(name string, zone string, state string) ([]*compute.InstanceWithNamedPorts, error) {

staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -542,17 +542,14 @@ func (g *Cloud) ensureInternalHealthCheck(name string, svcName types.NamespacedN
542542
return hc, nil
543543
}
544544

545-
func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) (string, error) {
545+
func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []string) (string, error) {
546546
klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): checking group that it contains %v nodes", name, zone, len(nodes))
547547
ig, err := g.GetInstanceGroup(name, zone)
548548
if err != nil && !isNotFound(err) {
549549
return "", err
550550
}
551551

552-
kubeNodes := sets.NewString()
553-
for _, n := range nodes {
554-
kubeNodes.Insert(n.Name)
555-
}
552+
kubeNodes := sets.NewString(nodes...)
556553

557554
// Individual InstanceGroup has a limit for 1000 instances in it.
558555
// As a result, it's not possible to add more to it.
@@ -618,8 +615,46 @@ func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]s
618615
zonedNodes := splitNodesByZone(nodes)
619616
klog.V(2).Infof("ensureInternalInstanceGroups(%v): %d nodes over %d zones in region %v", name, len(nodes), len(zonedNodes), g.region)
620617
var igLinks []string
621-
for zone, nodes := range zonedNodes {
622-
igLink, err := g.ensureInternalInstanceGroup(name, zone, nodes)
618+
gceZonedNodes := map[string][]string{}
619+
for zone, zNodes := range zonedNodes {
620+
hosts, err := g.getFoundInstanceByNames(nodeNames(zNodes))
621+
if err != nil {
622+
return nil, err
623+
}
624+
names := sets.NewString()
625+
for _, h := range hosts {
626+
names.Insert(h.Name)
627+
}
628+
skip := sets.NewString()
629+
630+
igs, err := g.candidateExternalInstanceGroups(zone)
631+
if err != nil {
632+
return nil, err
633+
}
634+
for _, ig := range igs {
635+
if strings.EqualFold(ig.Name, name) {
636+
continue
637+
}
638+
instances, err := g.ListInstancesInInstanceGroup(ig.Name, zone, allInstances)
639+
if err != nil {
640+
return nil, err
641+
}
642+
groupInstances := sets.NewString()
643+
for _, ins := range instances {
644+
parts := strings.Split(ins.Instance, "/")
645+
groupInstances.Insert(parts[len(parts)-1])
646+
}
647+
if names.HasAll(groupInstances.UnsortedList()...) {
648+
igLinks = append(igLinks, ig.SelfLink)
649+
skip.Insert(groupInstances.UnsortedList()...)
650+
}
651+
}
652+
if remaining := names.Difference(skip).UnsortedList(); len(remaining) > 0 {
653+
gceZonedNodes[zone] = remaining
654+
}
655+
}
656+
for zone, gceNodes := range gceZonedNodes {
657+
igLink, err := g.ensureInternalInstanceGroup(name, zone, gceNodes)
623658
if err != nil {
624659
return []string{}, err
625660
}
@@ -629,6 +664,13 @@ func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]s
629664
return igLinks, nil
630665
}
631666

667+
func (g *Cloud) candidateExternalInstanceGroups(zone string) ([]*compute.InstanceGroup, error) {
668+
if g.externalInstanceGroupsPrefix == "" {
669+
return nil, nil
670+
}
671+
return g.ListInstanceGroupsWithPrefix(zone, g.externalInstanceGroupsPrefix)
672+
}
673+
632674
func (g *Cloud) ensureInternalInstanceGroupsDeleted(name string) error {
633675
// List of nodes isn't available here - fetch all zones in region and try deleting this cluster's ig
634676
zones, err := g.ListZonesInRegion(g.region)

staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,75 @@ func TestEnsureLoadBalancerDeletedSucceedsOnXPN(t *testing.T) {
807807
checkEvent(t, recorder, FilewallChangeMsg, true)
808808
}
809809

810+
func TestEnsureInternalInstanceGroupsReuseGroups(t *testing.T) {
811+
vals := DefaultTestClusterValues()
812+
gce, err := fakeGCECloud(vals)
813+
require.NoError(t, err)
814+
gce.externalInstanceGroupsPrefix = "pre-existing"
815+
816+
igName := makeInstanceGroupName(vals.ClusterID)
817+
nodesA, err := createAndInsertNodes(gce, []string{"test-node-1", "test-node-2"}, vals.ZoneName)
818+
require.NoError(t, err)
819+
nodesB, err := createAndInsertNodes(gce, []string{"test-node-3"}, vals.SecondaryZoneName)
820+
require.NoError(t, err)
821+
822+
preIGName := "pre-existing-ig"
823+
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: preIGName}, vals.ZoneName)
824+
require.NoError(t, err)
825+
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: preIGName}, vals.SecondaryZoneName)
826+
require.NoError(t, err)
827+
err = gce.AddInstancesToInstanceGroup(preIGName, vals.ZoneName, gce.ToInstanceReferences(vals.ZoneName, []string{"test-node-1"}))
828+
require.NoError(t, err)
829+
err = gce.AddInstancesToInstanceGroup(preIGName, vals.SecondaryZoneName, gce.ToInstanceReferences(vals.SecondaryZoneName, []string{"test-node-3"}))
830+
require.NoError(t, err)
831+
832+
anotherPreIGName := "another-existing-ig"
833+
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: anotherPreIGName}, vals.ZoneName)
834+
require.NoError(t, err)
835+
err = gce.AddInstancesToInstanceGroup(anotherPreIGName, vals.ZoneName, gce.ToInstanceReferences(vals.ZoneName, []string{"test-node-2"}))
836+
require.NoError(t, err)
837+
838+
svc := fakeLoadbalancerService(string(LBTypeInternal))
839+
svc, err = gce.client.CoreV1().Services(svc.Namespace).Create(context.TODO(), svc, metav1.CreateOptions{})
840+
assert.NoError(t, err)
841+
_, err = gce.ensureInternalLoadBalancer(
842+
vals.ClusterName, vals.ClusterID,
843+
svc,
844+
nil,
845+
append(nodesA, nodesB...),
846+
)
847+
assert.NoError(t, err)
848+
849+
backendServiceName := makeBackendServiceName(gce.GetLoadBalancerName(context.TODO(), "", svc), vals.ClusterID, shareBackendService(svc), cloud.SchemeInternal, "TCP", svc.Spec.SessionAffinity)
850+
bs, err := gce.GetRegionBackendService(backendServiceName, gce.region)
851+
require.NoError(t, err)
852+
assert.Equal(t, 3, len(bs.Backends), "Want three backends referencing three instances groups")
853+
854+
igRef := func(zone, name string) string {
855+
return fmt.Sprintf("zones/%s/instanceGroups/%s", zone, name)
856+
}
857+
for _, name := range []string{igRef(vals.ZoneName, preIGName), igRef(vals.SecondaryZoneName, preIGName), igRef(vals.ZoneName, igName)} {
858+
var found bool
859+
for _, be := range bs.Backends {
860+
if strings.Contains(be.Group, name) {
861+
found = true
862+
break
863+
}
864+
}
865+
assert.True(t, found, "Expected list of backends to have group %q", name)
866+
}
867+
868+
// Expect initial zone to have test-node-2
869+
instances, err := gce.ListInstancesInInstanceGroup(igName, vals.ZoneName, "ALL")
870+
require.NoError(t, err)
871+
assert.Equal(t, 1, len(instances))
872+
assert.Contains(
873+
t,
874+
instances[0].Instance,
875+
fmt.Sprintf("projects/%s/zones/%s/instances/%s", vals.ProjectID, vals.ZoneName, "test-node-2"),
876+
)
877+
}
878+
810879
func TestEnsureInternalInstanceGroupsDeleted(t *testing.T) {
811880
vals := DefaultTestClusterValues()
812881
gce, err := fakeGCECloud(vals)

0 commit comments

Comments
 (0)