Skip to content

Commit 2b96558

Browse files
committed
rbd: delete group only if its primary
This is a work of Nikhil which need to be applied on top of this PR to test the feature. Signed-off-by: Madhu Rajanna <[email protected]>
1 parent 22966e8 commit 2b96558

File tree

4 files changed

+99
-28
lines changed

4 files changed

+99
-28
lines changed

Diff for: internal/csi-addons/rbd/volumegroup.go

+47-15
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/ceph/ceph-csi/internal/rbd/types"
2828
"github.com/ceph/ceph-csi/internal/util/log"
2929

30+
"github.com/csi-addons/spec/lib/go/replication"
3031
"github.com/csi-addons/spec/lib/go/volumegroup"
3132
"google.golang.org/grpc"
3233
"google.golang.org/grpc/codes"
@@ -206,27 +207,41 @@ func (vs *VolumeGroupServer) DeleteVolumeGroup(
206207

207208
log.DebugLog(ctx, "VolumeGroup %q has been found", req.GetVolumeGroupId())
208209

209-
// verify that the volume group is empty
210-
volumes, err := vg.ListVolumes(ctx)
210+
volumes, mirror, err := mgr.GetMirrorSource(ctx, req.GetVolumeGroupId(), &replication.ReplicationSource{
211+
Type: &replication.ReplicationSource_Volumegroup{
212+
Volumegroup: &replication.ReplicationSource_VolumeGroupSource{
213+
VolumeGroupId: req.GetVolumeGroupId(),
214+
},
215+
},
216+
})
211217
if err != nil {
212-
return nil, status.Errorf(
213-
codes.NotFound,
214-
"could not list volumes for voluem group %q: %s",
215-
req.GetVolumeGroupId(),
216-
err.Error())
218+
return nil, getGRPCError(err)
217219
}
220+
defer destoryVolumes(ctx, volumes)
218221

219-
log.DebugLog(ctx, "VolumeGroup %q contains %d volumes", req.GetVolumeGroupId(), len(volumes))
222+
vgrMirrorInfo, err := mirror.GetMirroringInfo(ctx)
220223

221-
if len(volumes) != 0 {
222-
return nil, status.Errorf(
223-
codes.FailedPrecondition,
224-
"rejecting to delete non-empty volume group %q",
225-
req.GetVolumeGroupId())
224+
// verify that the volume group is empty, if the group is primary
225+
if vgrMirrorInfo.IsPrimary() {
226+
volumes, err := vg.ListVolumes(ctx)
227+
if err != nil {
228+
return nil, status.Errorf(
229+
codes.NotFound,
230+
"could not list volumes for volume group %q: %s",
231+
req.GetVolumeGroupId(),
232+
err.Error())
233+
}
234+
log.DebugLog(ctx, "VolumeGroup %q contains %d volumes", req.GetVolumeGroupId(), len(volumes))
235+
if len(volumes) != 0 {
236+
return nil, status.Errorf(
237+
codes.FailedPrecondition,
238+
"rejecting to delete non-empty volume group %q",
239+
req.GetVolumeGroupId())
240+
}
226241
}
227242

228243
// delete the volume group
229-
err = vg.Delete(ctx)
244+
err = vg.Delete(ctx, vgrMirrorInfo, mirror)
230245
if err != nil {
231246
return nil, status.Errorf(codes.Internal,
232247
"failed to delete volume group %q: %s",
@@ -336,10 +351,27 @@ func (vs *VolumeGroupServer) ModifyVolumeGroupMembership(
336351
}
337352
}
338353

354+
vol, mirror, err := mgr.GetMirrorSource(ctx, req.GetVolumeGroupId(), &replication.ReplicationSource{
355+
Type: &replication.ReplicationSource_Volumegroup{
356+
Volumegroup: &replication.ReplicationSource_VolumeGroupSource{
357+
VolumeGroupId: req.GetVolumeGroupId(),
358+
},
359+
},
360+
})
361+
if err != nil {
362+
return nil, getGRPCError(err)
363+
}
364+
defer destoryVolumes(ctx, vol)
365+
366+
vgrMirrorInfo, err := mirror.GetMirroringInfo(ctx)
367+
368+
// Skip removing images from group if the group is secondary
369+
removeImageFromGroup := vgrMirrorInfo.IsPrimary()
370+
339371
// remove the volume that should not be part of the group
340372
for _, id := range toRemove {
341373
vol := beforeIDs[id]
342-
err = vg.RemoveVolume(ctx, vol)
374+
err = vg.RemoveVolume(ctx, vol, removeImageFromGroup)
343375
if err != nil {
344376
return nil, status.Errorf(
345377
codes.Internal,

Diff for: internal/rbd/group/volume_group.go

+48-9
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
librbd "github.com/ceph/go-ceph/rbd"
2727
"github.com/container-storage-interface/spec/lib/go/csi"
2828
"github.com/csi-addons/spec/lib/go/volumegroup"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2931

3032
"github.com/ceph/ceph-csi/internal/rbd/types"
3133
"github.com/ceph/ceph-csi/internal/util"
@@ -184,7 +186,7 @@ func (vg *volumeGroup) Create(ctx context.Context) error {
184186
return nil
185187
}
186188

187-
func (vg *volumeGroup) Delete(ctx context.Context) error {
189+
func (vg *volumeGroup) Delete(ctx context.Context, vgrMirrorInfo types.MirrorInfo, mirror types.Mirror) error {
188190
name, err := vg.GetName(ctx)
189191
if err != nil {
190192
return err
@@ -195,6 +197,41 @@ func (vg *volumeGroup) Delete(ctx context.Context) error {
195197
return err
196198
}
197199

200+
// Cleanup only omap data if the following condition is met
201+
// Mirroring is enabled on the group
202+
// Local group is secondary
203+
// Local group is in up+replaying state
204+
log.DebugLog(ctx, "volume group %v is in %v state and is primary %v", vg, vgrMirrorInfo.GetState, vgrMirrorInfo.IsPrimary())
205+
if vgrMirrorInfo != nil && vgrMirrorInfo.GetState() == librbd.MirrorGroupEnabled.String() && !vgrMirrorInfo.IsPrimary() {
206+
// If the group is in a secondary state and its up+replaying means its
207+
// an healthy secondary and the group is primary somewhere in the
208+
// remote cluster and the local group is getting replayed. Delete the
209+
// OMAP data generated as we cannot delete the secondary group. When
210+
// the group on the primary cluster gets deleted/mirroring disabled,
211+
// the group on all the remote (secondary) clusters will get
212+
// auto-deleted. This helps in garbage collecting the OMAP, VR, VGR,
213+
// VGRC, PVC and PV objects after failback operation.
214+
if mirror != nil {
215+
sts, rErr := mirror.GetGlobalMirroringStatus(ctx)
216+
if rErr != nil {
217+
return status.Error(codes.Internal, rErr.Error())
218+
}
219+
localStatus, rErr := sts.GetLocalSiteStatus()
220+
if rErr != nil {
221+
log.ErrorLog(ctx, "failed to get local status for volume group%s: %w", name, rErr)
222+
return status.Error(codes.Internal, rErr.Error())
223+
}
224+
log.DebugLog(ctx, "local status is %v and local state is %v", localStatus.IsUP(), localStatus.GetState())
225+
if localStatus.IsUP() && localStatus.GetState() == librbd.MirrorGroupStatusStateReplaying.String() {
226+
return vg.commonVolumeGroup.Delete(ctx)
227+
}
228+
log.ErrorLog(ctx,
229+
"secondary group status is up=%t and state=%s",
230+
localStatus.IsUP(),
231+
localStatus.GetState())
232+
}
233+
}
234+
198235
err = librbd.GroupRemove(ioctx, name)
199236
if err != nil && !errors.Is(err, rados.ErrNotFound) {
200237
return fmt.Errorf("failed to remove volume group %q: %w", vg, err)
@@ -252,21 +289,23 @@ func (vg *volumeGroup) AddVolume(ctx context.Context, vol types.Volume) error {
252289
return nil
253290
}
254291

255-
func (vg *volumeGroup) RemoveVolume(ctx context.Context, vol types.Volume) error {
292+
func (vg *volumeGroup) RemoveVolume(ctx context.Context, vol types.Volume, removeImageFromGroup bool) error {
256293
// volume was already removed from the group
257294
if len(vg.volumes) == 0 {
258295
return nil
259296
}
260297

261-
err := vol.RemoveFromGroup(ctx, vg)
262-
if err != nil {
263-
if errors.Is(err, librbd.ErrNotExist) {
264-
return nil
265-
}
298+
if removeImageFromGroup {
299+
log.DebugLog(ctx, "removing image %v from group %v", vol, vg)
300+
err := vol.RemoveFromGroup(ctx, vg)
301+
if err != nil {
302+
if errors.Is(err, librbd.ErrNotExist) {
303+
return nil
304+
}
266305

267-
return fmt.Errorf("failed to remove volume %q from volume group %q: %w", vol, vg, err)
306+
return fmt.Errorf("failed to remove volume %q from volume group %q: %w", vol, vg, err)
307+
}
268308
}
269-
270309
// toRemove contain the ID of the volume that is removed from the group
271310
toRemove, err := vol.GetID(ctx)
272311
if err != nil {

Diff for: internal/rbd/group_controllerserver.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ func (cs *ControllerServer) CreateVolumeGroupSnapshot(
7272
for _, volume := range volumes {
7373
if vg != nil {
7474
// 'normal' cleanup, remove all images from the group
75-
vgErr := vg.RemoveVolume(ctx, volume)
75+
vgErr := vg.RemoveVolume(ctx, volume, true)
7676
if vgErr != nil {
7777
log.ErrorLog(
7878
ctx,
@@ -91,7 +91,7 @@ func (cs *ControllerServer) CreateVolumeGroupSnapshot(
9191
// the VG should always be deleted, volumes can only belong to a single VG
9292
log.DebugLog(ctx, "removing temporary volume group %q", vg)
9393

94-
vgErr := vg.Delete(ctx)
94+
vgErr := vg.Delete(ctx, nil, nil)
9595
if vgErr != nil {
9696
log.ErrorLog(ctx, "failed to remove temporary volume group %q: %v", vg, vgErr)
9797
}

Diff for: internal/rbd/types/group.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,13 @@ type VolumeGroup interface {
5858
Create(ctx context.Context) error
5959

6060
// Delete removes the VolumeGroup from the backend storage.
61-
Delete(ctx context.Context) error
61+
Delete(ctx context.Context, vgMirrorInfo MirrorInfo, mirror Mirror) error
6262

6363
// AddVolume adds the Volume to the VolumeGroup.
6464
AddVolume(ctx context.Context, volume Volume) error
6565

6666
// RemoveVolume removes the Volume from the VolumeGroup.
67-
RemoveVolume(ctx context.Context, volume Volume) error
67+
RemoveVolume(ctx context.Context, volume Volume, removeImageFromGroup bool) error
6868

6969
// ListVolumes returns a slice with all Volumes in the VolumeGroup.
7070
ListVolumes(ctx context.Context) ([]Volume, error)

0 commit comments

Comments
 (0)