Skip to content

Commit 9af7be5

Browse files
deads2ksoltysh
authored andcommitted
UPSTREAM: <carry>: delay queuing deletion for PV to allow nodes some time to unmount
UPSTREAM: <carry>: Fix sync of PV deletion in PV controller Always queue PV deletion events immediately, without any wait. It does not affect dynamic de-provisioning / deletion of volumes, it's done on PVC deletion. This de-flakes unit tests, which expect that PV deletion is processed without waiting too much. This updates carry patch b24f93e. It still waits for 21 seconds after *PVC* deletion! UPSTREAM: <carry>: delay queuing deletion for PV to allow nodes some time to unmount openshift-rebase(v1.24):source=c5fd3449734
1 parent e4d66c1 commit 9af7be5

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

pkg/controller/volume/persistentvolume/pv_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ type PersistentVolumeController struct {
204204
// however overall speed of multi-worker controller would be lower than if
205205
// it runs single thread only.
206206
claimQueue *workqueue.Type
207-
volumeQueue *workqueue.Type
207+
volumeQueue workqueue.RateLimitingInterface
208208

209209
// Map of scheduled/running operations.
210210
runningOperations goroutinemap.GoRoutineMap

pkg/controller/volume/persistentvolume/pv_controller_base.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ func NewController(p ControllerParameters) (*PersistentVolumeController, error)
100100
createProvisionedPVRetryCount: createProvisionedPVRetryCount,
101101
createProvisionedPVInterval: createProvisionedPVInterval,
102102
claimQueue: workqueue.NewNamed("claims"),
103-
volumeQueue: workqueue.NewNamed("volumes"),
103+
volumeQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "volumes"),
104104
resyncPeriod: p.SyncPeriod,
105105
operationTimestamps: metrics.NewOperationStartTimeCache(),
106106
}
@@ -197,6 +197,20 @@ func (ctrl *PersistentVolumeController) enqueueWork(queue workqueue.Interface, o
197197
queue.Add(objName)
198198
}
199199

200+
func (ctrl *PersistentVolumeController) enqueueWorkAfter(queue workqueue.DelayingInterface, obj interface{}, delay time.Duration) {
201+
// Beware of "xxx deleted" events
202+
if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil {
203+
obj = unknown.Obj
204+
}
205+
objName, err := controller.KeyFunc(obj)
206+
if err != nil {
207+
klog.Errorf("failed to get key from object: %v", err)
208+
return
209+
}
210+
klog.V(5).Infof("enqueued %q for sync", objName)
211+
queue.AddAfter(objName, delay)
212+
}
213+
200214
func (ctrl *PersistentVolumeController) storeVolumeUpdate(volume interface{}) (bool, error) {
201215
return storeObjectUpdate(ctrl.volumes.store, volume, "volume")
202216
}
@@ -298,8 +312,11 @@ func (ctrl *PersistentVolumeController) deleteClaim(claim *v1.PersistentVolumeCl
298312
// sync the volume when its claim is deleted. Explicitly sync'ing the
299313
// volume here in response to claim deletion prevents the volume from
300314
// waiting until the next sync period for its Release.
315+
// delay queuing the volume to allow some time for nodes to detach the volume from the node. The time chosen here
316+
// is to hopefully be short enough that e2e tests still pass and long enough that most PVs stop hitting the failure
317+
// errors.
301318
klog.V(5).Infof("deleteClaim[%q]: scheduling sync of volume %s", claimKey, volumeName)
302-
ctrl.volumeQueue.Add(volumeName)
319+
ctrl.volumeQueue.AddAfter(volumeName, 21*time.Second)
303320
}
304321

305322
// Run starts all of this controller's control loops

0 commit comments

Comments
 (0)