Skip to content

Commit 2caad48

Browse files
committed
vsphere: Cache REST API sessions to prevent excessive vCenter logouts
This commit fixes a critical issue where the machine-api-operator was creating and destroying vCenter REST API sessions on every machine reconciliation, causing excessive login/logout cycles that pollute vCenter audit logs and create unnecessary session churn. Root Cause: The WithRestClient() and WithCachingTagsManager() wrapper functions were creating new REST sessions, performing operations, and immediately logging out on every invocation. With hundreds of machines reconciling periodically, this created a constant stream of login/logout events. Solution (inspired by cluster-api-provider-vsphere): - Add TagManager field to Session struct to cache REST client - Initialize and cache REST client during session creation (GetOrCreate) - Validate both SOAP and REST session health before reusing cached sessions - Add GetCachingTagsManager() helper for direct access to cached tag manager - Update reconcileRegionAndZoneLabels() to use cached tag manager - Update reconcileTags() to use cached tag manager - Deprecate WithRestClient() and WithCachingTagsManager() for backward compatibility Key Changes: 1. pkg/controller/vsphere/session/session.go: - Added TagManager *tags.Manager field to Session struct - Modified GetOrCreate() to create and cache REST client once - Added dual session validation (SOAP + REST) before reusing sessions - Added GetCachingTagsManager() method for direct access - Deprecated old wrapper functions with migration guidance 2. pkg/controller/vsphere/reconciler.go: - Updated reconcileRegionAndZoneLabels() to use GetCachingTagsManager() - Updated reconcileTags() to use GetCachingTagsManager() - Eliminated callback pattern in favor of direct access Impact: - Eliminates excessive vCenter login/logout cycles - Reduces vCenter session churn from O(reconciliations) to O(1) per MAPI instance - Improves performance by removing authentication overhead on every tag operation - REST session now lives as long as SOAP session (until invalidation) Reference Implementation: https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/pkg/session/session.go Backward Compatibility: The deprecated wrapper functions are maintained with warning logs to support existing test code. All production code paths now use the new pattern. Fixes: Excessive vCenter logout events reported by customers Signed-off-by: Claude Code Assistant <[email protected]>
1 parent c8772d3 commit 2caad48

File tree

2 files changed

+91
-33
lines changed

2 files changed

+91
-33
lines changed

pkg/controller/vsphere/reconciler.go

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -594,15 +594,10 @@ func (r *Reconciler) reconcileRegionAndZoneLabels(vm *virtualMachine) error {
594594
regionLabel := r.vSphereConfig.Labels.Region
595595
zoneLabel := r.vSphereConfig.Labels.Zone
596596

597-
var res map[string]string
598-
599-
err := r.session.WithCachingTagsManager(vm.Context, func(c *session.CachingTagsManager) error {
600-
var err error
601-
res, err = vm.getRegionAndZone(c, regionLabel, zoneLabel)
602-
603-
return err
604-
})
605-
597+
// Use cached tag manager to avoid creating new REST sessions.
598+
// This eliminates excessive vCenter login/logout cycles.
599+
tagManager := r.session.GetCachingTagsManager()
600+
res, err := vm.getRegionAndZone(tagManager, regionLabel, zoneLabel)
606601
if err != nil {
607602
return err
608603
}
@@ -1599,32 +1594,29 @@ func (vm *virtualMachine) getPowerState() (types.VirtualMachinePowerState, error
15991594
// reconcileTags ensures that the required tags are present on the virtual machine, eg the Cluster ID
16001595
// that is used by the installer on cluster deletion to ensure ther are no leaked resources.
16011596
func (vm *virtualMachine) reconcileTags(ctx context.Context, sessionInstance *session.Session, machine *machinev1.Machine, providerSpec *machinev1.VSphereMachineProviderSpec) error {
1602-
if err := sessionInstance.WithCachingTagsManager(vm.Context, func(c *session.CachingTagsManager) error {
1603-
klog.Infof("%v: Reconciling attached tags", machine.GetName())
1604-
1605-
clusterID := machine.Labels[machinev1.MachineClusterIDLabel]
1606-
tagIDs := []string{clusterID}
1607-
tagIDs = append(tagIDs, providerSpec.TagIDs...)
1608-
klog.Infof("%v: Reconciling %s tags to vm", machine.GetName(), tagIDs)
1609-
for _, tagID := range tagIDs {
1610-
attached, err := vm.checkAttachedTag(ctx, tagID, c)
1611-
if err != nil {
1612-
return err
1613-
}
1597+
// Use cached tag manager to avoid creating new REST sessions.
1598+
// This eliminates excessive vCenter login/logout cycles.
1599+
tagManager := sessionInstance.GetCachingTagsManager()
1600+
klog.Infof("%v: Reconciling attached tags", machine.GetName())
1601+
1602+
clusterID := machine.Labels[machinev1.MachineClusterIDLabel]
1603+
tagIDs := []string{clusterID}
1604+
tagIDs = append(tagIDs, providerSpec.TagIDs...)
1605+
klog.Infof("%v: Reconciling %s tags to vm", machine.GetName(), tagIDs)
1606+
for _, tagID := range tagIDs {
1607+
attached, err := vm.checkAttachedTag(ctx, tagID, tagManager)
1608+
if err != nil {
1609+
return err
1610+
}
16141611

1615-
if !attached {
1616-
klog.Infof("%v: Attaching %s tag to vm", machine.GetName(), tagID)
1617-
// the tag should already be created by installer or the administrator
1618-
if err := c.AttachTag(ctx, tagID, vm.Ref); err != nil {
1619-
return err
1620-
}
1612+
if !attached {
1613+
klog.Infof("%v: Attaching %s tag to vm", machine.GetName(), tagID)
1614+
// the tag should already be created by installer or the administrator
1615+
if err := tagManager.AttachTag(ctx, tagID, vm.Ref); err != nil {
1616+
return err
16211617
}
16221618
}
1623-
return nil
1624-
}); err != nil {
1625-
return err
16261619
}
1627-
16281620
return nil
16291621
}
16301622

pkg/controller/vsphere/session/session.go

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,14 @@ const (
4646
)
4747

4848
// Session is a vSphere session with a configured Finder.
49+
// This implementation is inspired by cluster-api-provider-vsphere's session caching pattern
50+
// to avoid excessive vCenter login/logout cycles for REST API operations.
51+
// Reference: https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/pkg/session/session.go
4952
type Session struct {
5053
*govmomi.Client
5154
Finder *find.Finder
5255
Datacenter *object.Datacenter
56+
TagManager *tags.Manager
5357

5458
username string
5559
password string
@@ -80,13 +84,40 @@ func GetOrCreate(
8084

8185
sessionKey := server + username + datacenter
8286
if session, ok := sessionCache[sessionKey]; ok {
87+
// Check both SOAP and REST session validity before reusing cached session.
88+
// This prevents reusing sessions where one connection type has expired.
89+
// Pattern adapted from cluster-api-provider-vsphere:
90+
// https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/pkg/session/session.go#L132-L149
8391
sessionActive, err := session.SessionManager.SessionIsActive(ctx)
8492
if err != nil {
85-
klog.Errorf("Error performing session check request to vSphere: %v", err)
93+
klog.Errorf("Error performing SOAP session check request to vSphere: %v", err)
8694
}
87-
if sessionActive {
95+
96+
var restSessionActive bool
97+
if session.TagManager != nil {
98+
restSession, err := session.TagManager.Session(ctx)
99+
if err != nil {
100+
klog.Errorf("Error performing REST session check request to vSphere: %v", err)
101+
}
102+
restSessionActive = restSession != nil
103+
}
104+
105+
if sessionActive && restSessionActive {
106+
klog.V(3).Infof("Found active cached vSphere session with valid SOAP and REST connections")
88107
return &session, nil
89108
}
109+
110+
// If either session is invalid, logout both to clean up
111+
if session.TagManager != nil {
112+
klog.Infof("Logging out inactive REST session")
113+
if err := session.TagManager.Logout(ctx); err != nil {
114+
klog.Errorf("Failed to logout REST session: %v", err)
115+
}
116+
}
117+
klog.Infof("Logging out inactive SOAP session")
118+
if err := session.Client.Logout(ctx); err != nil {
119+
klog.Errorf("Failed to logout SOAP session: %v", err)
120+
}
90121
}
91122
klog.Infof("No existing vCenter session found, creating new session")
92123

@@ -127,6 +158,20 @@ func GetOrCreate(
127158
session.Datacenter = dc
128159
session.Finder.SetDatacenter(dc)
129160

161+
// Create and cache REST client for tag operations.
162+
// This prevents creating a new REST session on every tag operation.
163+
// Pattern adapted from cluster-api-provider-vsphere:
164+
// https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/pkg/session/session.go#L196-L205
165+
restClient := rest.NewClient(session.Client.Client)
166+
if err := restClient.Login(ctx, url.UserPassword(username, password)); err != nil {
167+
// Cleanup SOAP session on REST login failure
168+
if logoutErr := client.Logout(ctx); logoutErr != nil {
169+
klog.Errorf("Failed to logout SOAP session after REST login failure: %v", logoutErr)
170+
}
171+
return nil, fmt.Errorf("unable to login REST client to vCenter: %w", err)
172+
}
173+
session.TagManager = tags.NewManager(restClient)
174+
130175
// Cache the session.
131176
sessionCache[sessionKey] = session
132177

@@ -206,7 +251,21 @@ func (s *Session) GetTask(ctx context.Context, taskRef string) (*mo.Task, error)
206251
return &obj, nil
207252
}
208253

254+
// GetCachingTagsManager returns a CachingTagsManager that wraps the cached TagManager.
255+
// This replaces the previous WithCachingTagsManager pattern which created new sessions
256+
// on every call. The returned manager uses the session's cached REST client.
257+
func (s *Session) GetCachingTagsManager() *CachingTagsManager {
258+
return newTagsCachingClient(s.TagManager, s.sessionKey)
259+
}
260+
261+
// WithRestClient is deprecated. Use s.TagManager directly instead.
262+
// This function is maintained for backward compatibility but creates excessive
263+
// vCenter login/logout cycles. Migration path: replace callback pattern with
264+
// direct access to s.TagManager.
265+
//
266+
// Deprecated: Use s.TagManager for direct REST client access.
209267
func (s *Session) WithRestClient(ctx context.Context, f func(c *rest.Client) error) error {
268+
klog.Warning("WithRestClient is deprecated and causes excessive vCenter logouts. Use s.TagManager directly instead.")
210269
c := rest.NewClient(s.Client.Client)
211270

212271
user := url.UserPassword(s.username, s.password)
@@ -223,7 +282,14 @@ func (s *Session) WithRestClient(ctx context.Context, f func(c *rest.Client) err
223282
return f(c)
224283
}
225284

285+
// WithCachingTagsManager is deprecated. Use s.GetCachingTagsManager() instead.
286+
// This function is maintained for backward compatibility but creates excessive
287+
// vCenter login/logout cycles. Migration path: replace callback pattern with
288+
// direct call to s.GetCachingTagsManager().
289+
//
290+
// Deprecated: Use s.GetCachingTagsManager() for cached tag manager access.
226291
func (s *Session) WithCachingTagsManager(ctx context.Context, f func(m *CachingTagsManager) error) error {
292+
klog.Warning("WithCachingTagsManager is deprecated and causes excessive vCenter logouts. Use s.GetCachingTagsManager() instead.")
227293
c := rest.NewClient(s.Client.Client)
228294

229295
user := url.UserPassword(s.username, s.password)

0 commit comments

Comments
 (0)