Skip to content

Commit

Permalink
umountDuplicate larger than the threshold (#3429)
Browse files Browse the repository at this point in the history
* umountDuplicate larger than the threshold

Signed-off-by: wangshulin <[email protected]>

* update warning log

Signed-off-by: wangshulin <[email protected]>

* add dataset event record

Signed-off-by: wangshulin <[email protected]>

* update eventReason and message

Signed-off-by: wangshulin <[email protected]>

---------

Signed-off-by: wangshulin <[email protected]>
  • Loading branch information
wangshli authored Sep 2, 2023
1 parent 6aa2d02 commit b402b33
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
2 changes: 2 additions & 0 deletions pkg/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ const (

FuseRecoverSucceed = "FuseRecoverSucceed"

FuseUmountDuplicate = "UnmountDuplicateMountpoint"

RuntimeDeprecated = "RuntimeDeprecated"

RuntimeWithSecretNotSupported = "RuntimeWithSecretNotSupported"
Expand Down
16 changes: 9 additions & 7 deletions pkg/csi/recover/recover.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,14 @@ func (r FuseRecover) recover() {

for _, point := range brokenMounts {
glog.V(4).Infof("Get broken mount point: %v", point)
// do not umountDuplicate because if app container restart, umount duplicate mount may lead to recover successed but can not access data
// if app container restart, umount duplicate mount may lead to recover successed but can not access data
// so we only umountDuplicate when it has mounted more than the recoverWarningThreshold
// please refer to https://github.com/fluid-cloudnative/fluid/issues/3399 for more information
// r.umountDuplicate(point)
if point.Count > r.recoverWarningThreshold {
glog.Warningf("Mountpoint %s has been mounted %v times, exceeding the recoveryWarningThreshold %v, unmount duplicate mountpoint to avoid large /proc/self/mountinfo file, this may potential make data access connection broken", point.MountPath, point.Count, r.recoverWarningThreshold)
r.eventRecord(point, corev1.EventTypeWarning, common.FuseUmountDuplicate)
r.umountDuplicate(point)
}
if err := r.recoverBrokenMount(point); err != nil {
r.eventRecord(point, corev1.EventTypeWarning, common.FuseRecoverFailed)
continue
Expand Down Expand Up @@ -222,10 +227,7 @@ func (r *FuseRecover) eventRecord(point mountinfo.MountPoint, eventType, eventRe
r.Recorder.Eventf(dataset, eventType, eventReason, "Fuse recover %s succeed", point.MountPath)
case common.FuseRecoverFailed:
r.Recorder.Eventf(dataset, eventType, eventReason, "Fuse recover %s failed", point.MountPath)
}
// add warning event if point.count is larger than the threshold
if point.Count > r.recoverWarningThreshold {
r.Recorder.Eventf(dataset, corev1.EventTypeWarning, "TooManyRecovery", "Mountpoint %s has been mounted %v times", point.MountPath, point.Count)
glog.Warningf("TooManyRecovery: Mountpoint %s has been mounted %v times", point.MountPath, point.Count)
case common.FuseUmountDuplicate:
r.Recorder.Eventf(dataset, eventType, eventReason, "Mountpoint %s has been mounted %v times, unmount duplicate mountpoint to avoid large /proc/self/mountinfo file, this may potential make data access connection broken", point.MountPath, point.Count)
}
}

0 comments on commit b402b33

Please sign in to comment.