From a4d878ef7ad0bd8697fddffa689b8159ca22df1f Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 25 Mar 2025 12:17:31 +0000 Subject: [PATCH 01/18] HDDS-12596. OM fs snapshot max limit is not enforced --- .../apache/hadoop/ozone/om/OzoneManager.java | 13 +++++++ .../snapshot/OMSnapshotCreateRequest.java | 8 ++++ .../snapshot/TestOMSnapshotCreateRequest.java | 37 ++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 520084848594..193be51d2a50 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -479,6 +479,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private final BucketUtilizationMetrics bucketUtilizationMetrics; private boolean fsSnapshotEnabled; + private int fsSnapshotMaxLimit; private String omHostName; @@ -988,6 +989,9 @@ private void instantiateServices(boolean withNewSnapshot) throws IOException { fsSnapshotEnabled = configuration.getBoolean( OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_KEY, OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_DEFAULT); + fsSnapshotMaxLimit = configuration.getInt( + OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, + OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); omSnapshotManager = new OmSnapshotManager(this); // Snapshot metrics @@ -4365,6 +4369,15 @@ public boolean isFilesystemSnapshotEnabled() { return fsSnapshotEnabled; } + /** + * Get the maximum number of Ozone filesystem snapshots allowed. + * + * @return the maximum number of Ozone filesystem snapshots allowed. + */ + public int getFsSnapshotMaxLimit() { + return fsSnapshotMaxLimit; + } + /** * Get DB updates since a specific sequence number. * diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 900419ad4223..b37f49d2a824 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -165,6 +165,14 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut throw new OMException("Snapshot already exists", FILE_ALREADY_EXISTS); } + // Check snapshot limit + int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); + if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { + throw new OMException( + String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), + OMException.ResultCodes.INVALID_SNAPSHOT_ERROR); + } + // Note down RDB latest transaction sequence number, which is used // as snapshot generation in the Differ. final long dbLatestSequenceNumber = diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index 7842bc1cab25..afa177c6a409 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT;; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; @@ -28,7 +29,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.any; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; import java.io.IOException; @@ -69,6 +70,7 @@ public class TestOMSnapshotCreateRequest extends TestSnapshotRequestAndResponse public void setup() throws Exception { snapshotName1 = UUID.randomUUID().toString(); snapshotName2 = UUID.randomUUID().toString(); + when(getOzoneManager().getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); } @ValueSource(strings = { @@ -264,6 +266,39 @@ public void testEntryExists() throws Exception { assertEquals(2, getOmMetrics().getNumSnapshotCreates()); } + @Test + public void testSnapshotLimit() throws Exception { + when(getOzoneManager().isAdmin(any())).thenReturn(true); + + when(getOzoneManager().getFsSnapshotMaxLimit()).thenReturn(1); + + String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); + + OMRequest omRequest = + createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); + OMSnapshotCreateRequest omSnapshotCreateRequest = doPreExecute(omRequest); + + assertNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); + omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 1); + + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); + + // Should fail as snapshot limit is 1 + omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + omSnapshotCreateRequest = doPreExecute(omRequest); + OMClientResponse omClientResponse = + omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 2); + + OMResponse omResponse = omClientResponse.getOMResponse(); + assertNotNull(omResponse.getCreateSnapshotResponse()); + assertEquals(OzoneManagerProtocolProtos.Status.INVALID_SNAPSHOT_ERROR, + omResponse.getStatus()); + + assertEquals(1, getOmMetrics().getNumSnapshotCreateFails()); + assertEquals(1, getOmMetrics().getNumSnapshotActive()); + assertEquals(2, getOmMetrics().getNumSnapshotCreates()); + } + private void renameKey(String fromKey, String toKey, long offset) throws IOException { OmKeyInfo toKeyInfo = addKey(toKey, offset + 1L); From f14a095d3f55479a2e0da343dd4ecc729a66e24d Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 25 Mar 2025 12:39:33 +0000 Subject: [PATCH 02/18] fix compile error --- .../ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index afa177c6a409..5003e949f2ad 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT;; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; From fe3780c06bbe9569391af6e9f2eb8caa962fde49 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 25 Mar 2025 12:55:26 +0000 Subject: [PATCH 03/18] Should set default snapshot limit for TestOMSnapshotRequestResponse base class --- .../ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java | 2 -- .../ozone/om/snapshot/TestSnapshotRequestAndResponse.java | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index 5003e949f2ad..ccbc3365e38e 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; @@ -70,7 +69,6 @@ public class TestOMSnapshotCreateRequest extends TestSnapshotRequestAndResponse public void setup() throws Exception { snapshotName1 = UUID.randomUUID().toString(); snapshotName2 = UUID.randomUUID().toString(); - when(getOzoneManager().getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); } @ValueSource(strings = { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java index 6ee1f7249bfe..7896b02fc735 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.om.snapshot; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createOmKeyInfo; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -167,6 +168,7 @@ public void baseSetup() throws Exception { omMetadataManager); omSnapshotManager = new OmSnapshotManager(ozoneManager); when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); + when(ozoneManager.getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); } @AfterEach From 39d87bea7a51c9398a65a414c890428cca88a8f9 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 25 Mar 2025 13:10:17 +0000 Subject: [PATCH 04/18] Set default filesystem snapshot max limit in TestOMKeyRequest setup --- .../hadoop/ozone/om/request/key/TestOMKeyRequest.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index 4a2e98272473..2da46c2bff33 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -17,16 +17,17 @@ package org.apache.hadoop.ozone.om.request.key; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.setupReplicationConfigValidation; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.anyInt; -import static org.mockito.Mockito.anyLong; -import static org.mockito.Mockito.anyString; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.framework; import static org.mockito.Mockito.mock; @@ -153,6 +154,7 @@ public void setup() throws Exception { when(ozoneManager.getDeletionMetrics()).thenReturn(delMetrics); when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager); when(ozoneManager.getConfiguration()).thenReturn(ozoneConfiguration); + when(ozoneManager.getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); OMLayoutVersionManager lvm = mock(OMLayoutVersionManager.class); when(lvm.isAllowed(anyString())).thenReturn(true); when(ozoneManager.getVersionManager()).thenReturn(lvm); From 8338f2934d50f029d366455c2097cb5508420d58 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Thu, 27 Mar 2025 11:15:57 +0000 Subject: [PATCH 05/18] Addressed comment --- .../snapshot/OMSnapshotCreateRequest.java | 16 +++++++------- .../snapshot/TestOMSnapshotCreateRequest.java | 22 +++++++++++++++---- .../snapshot/TestOMSnapshotDeleteRequest.java | 8 ++++++- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index b37f49d2a824..c677e03a9ae3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -147,6 +147,14 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut OzoneManagerProtocolProtos.UserInfo userInfo = getOmRequest().getUserInfo(); String key = snapshotInfo.getTableKey(); try { + // Check snapshot limit + int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); + if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { + throw new OMException( + String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), + OMException.ResultCodes.INVALID_SNAPSHOT_ERROR); + } + // Lock bucket so it doesn't // get deleted while creating snapshot mergeOmLockDetails( @@ -165,14 +173,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut throw new OMException("Snapshot already exists", FILE_ALREADY_EXISTS); } - // Check snapshot limit - int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); - if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { - throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), - OMException.ResultCodes.INVALID_SNAPSHOT_ERROR); - } - // Note down RDB latest transaction sequence number, which is used // as snapshot generation in the Differ. final long dbLatestSequenceNumber = diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index ccbc3365e38e..450338ad6964 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; +import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.deleteSnapshotRequest; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status.OK; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.CreateSnapshot; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -267,7 +268,6 @@ public void testEntryExists() throws Exception { @Test public void testSnapshotLimit() throws Exception { when(getOzoneManager().isAdmin(any())).thenReturn(true); - when(getOzoneManager().getFsSnapshotMaxLimit()).thenReturn(1); String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); @@ -292,9 +292,23 @@ public void testSnapshotLimit() throws Exception { assertEquals(OzoneManagerProtocolProtos.Status.INVALID_SNAPSHOT_ERROR, omResponse.getStatus()); - assertEquals(1, getOmMetrics().getNumSnapshotCreateFails()); - assertEquals(1, getOmMetrics().getNumSnapshotActive()); - assertEquals(2, getOmMetrics().getNumSnapshotCreates()); + // delete snapshot + omRequest = deleteSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); + OMSnapshotDeleteRequest omSnapshotDeleteRequest = TestOMSnapshotDeleteRequest.doPreExecute(omRequest, + getOzoneManager()); + omClientResponse = + omSnapshotDeleteRequest.validateAndUpdateCache(getOzoneManager(), 3); + + // create snapshot again, should be successful + omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + omSnapshotCreateRequest = doPreExecute(omRequest); + omClientResponse = + omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 4); + + omResponse = omClientResponse.getOMResponse(); + assertNotNull(omResponse.getCreateSnapshotResponse()); + assertEquals(OzoneManagerProtocolProtos.Status.OK, + omResponse.getStatus()); } private void renameKey(String fromKey, String toKey, long offset) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java index 65ec8af82c98..617da70a6718 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java @@ -36,6 +36,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; +import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -272,11 +273,16 @@ public void testEntryExist() throws Exception { private OMSnapshotDeleteRequest doPreExecute( OMRequest originalRequest) throws Exception { + return doPreExecute(originalRequest, getOzoneManager()); + } + + public static OMSnapshotDeleteRequest doPreExecute( + OMRequest originalRequest, OzoneManager ozoneManager) throws Exception { OMSnapshotDeleteRequest omSnapshotDeleteRequest = new OMSnapshotDeleteRequest(originalRequest); OMRequest modifiedRequest = - omSnapshotDeleteRequest.preExecute(getOzoneManager()); + omSnapshotDeleteRequest.preExecute(ozoneManager); return new OMSnapshotDeleteRequest(modifiedRequest); } From 0834515693f5927856722625994b6bfb5f874a2b Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 28 Mar 2025 05:47:32 +0000 Subject: [PATCH 06/18] Move snapshot validation to preExecute and add new TOO_MANY_SNAPSHOTS err code --- .../ozone/om/exceptions/OMException.java | 4 +++- .../src/main/proto/OmClientProtocol.proto | 1 + .../snapshot/OMSnapshotCreateRequest.java | 20 +++++++++++-------- .../snapshot/TestOMSnapshotCreateRequest.java | 19 ++++++------------ 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java index cc540c15e3b8..2c91e8cc65cd 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java @@ -271,6 +271,8 @@ public enum ResultCodes { TOO_MANY_BUCKETS, KEY_UNDER_LEASE_RECOVERY, KEY_ALREADY_CLOSED, - KEY_UNDER_LEASE_SOFT_LIMIT_PERIOD + KEY_UNDER_LEASE_SOFT_LIMIT_PERIOD, + + TOO_MANY_SNAPSHOTS, } } diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index df97028a0f31..0804b01a2f65 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -564,6 +564,7 @@ enum Status { KEY_ALREADY_CLOSED = 96; KEY_UNDER_LEASE_SOFT_LIMIT_PERIOD = 97; + TOO_MANY_SNAPSHOTS = 98; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index c677e03a9ae3..89b6f978ac6a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -99,11 +99,23 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { snapshotPath = snapshotInfo.getSnapshotPath(); } + private void validateSnapshotLimit(OzoneManager ozoneManager) throws IOException { + int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); + OMMetrics omMetrics = ozoneManager.getMetrics(); + if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { + throw new OMException( + String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), + OMException.ResultCodes.TOO_MANY_SNAPSHOTS); + } + } + @Override @DisallowedUntilLayoutVersion(FILESYSTEM_SNAPSHOT) @RequireSnapshotFeatureState(true) public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); + // verify snapshot limit + validateSnapshotLimit(ozoneManager); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a @@ -147,14 +159,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut OzoneManagerProtocolProtos.UserInfo userInfo = getOmRequest().getUserInfo(); String key = snapshotInfo.getTableKey(); try { - // Check snapshot limit - int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); - if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { - throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), - OMException.ResultCodes.INVALID_SNAPSHOT_ERROR); - } - // Lock bucket so it doesn't // get deleted while creating snapshot mergeOmLockDetails( diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index 450338ad6964..b6d5a33dbdb7 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -282,30 +282,23 @@ public void testSnapshotLimit() throws Exception { assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); // Should fail as snapshot limit is 1 - omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); - omSnapshotCreateRequest = doPreExecute(omRequest); - OMClientResponse omClientResponse = - omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 2); - - OMResponse omResponse = omClientResponse.getOMResponse(); - assertNotNull(omResponse.getCreateSnapshotResponse()); - assertEquals(OzoneManagerProtocolProtos.Status.INVALID_SNAPSHOT_ERROR, - omResponse.getStatus()); + OMRequest snapshotRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest)); + assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); // delete snapshot omRequest = deleteSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); OMSnapshotDeleteRequest omSnapshotDeleteRequest = TestOMSnapshotDeleteRequest.doPreExecute(omRequest, getOzoneManager()); - omClientResponse = - omSnapshotDeleteRequest.validateAndUpdateCache(getOzoneManager(), 3); + omSnapshotDeleteRequest.validateAndUpdateCache(getOzoneManager(), 3); // create snapshot again, should be successful omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); omSnapshotCreateRequest = doPreExecute(omRequest); - omClientResponse = + OMClientResponse omClientResponse = omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 4); - omResponse = omClientResponse.getOMResponse(); + OMResponse omResponse = omClientResponse.getOMResponse(); assertNotNull(omResponse.getCreateSnapshotResponse()); assertEquals(OzoneManagerProtocolProtos.Status.OK, omResponse.getStatus()); From 3c2727d39a557cd10987f68c7c0da83cb5c96c06 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Mon, 31 Mar 2025 00:19:00 +0800 Subject: [PATCH 07/18] Use snapshotChain size to validete snapshot limit --- .../hadoop/ozone/om/OmSnapshotManager.java | 26 ++++++++ .../apache/hadoop/ozone/om/OzoneManager.java | 13 ---- .../snapshot/OMSnapshotCreateRequest.java | 12 +--- .../ozone/om/TestOmSnapshotManager.java | 61 +++++++++++++++++-- .../om/request/key/TestOMKeyRequest.java | 10 ++- .../snapshot/TestOMSnapshotCreateRequest.java | 22 +------ .../TestSnapshotRequestAndResponse.java | 2 - 7 files changed, 90 insertions(+), 56 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 11bcd51c9923..8044e1915499 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -24,6 +24,8 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_DIFF_DB_NAME; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_INDICATOR; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE; @@ -172,6 +174,8 @@ public final class OmSnapshotManager implements AutoCloseable { // Soft limit of the snapshot cache size. private final int softCacheSize; + private int fsSnapshotMaxLimit; + public OmSnapshotManager(OzoneManager ozoneManager) { boolean isFilesystemSnapshotEnabled = @@ -246,6 +250,9 @@ public OmSnapshotManager(OzoneManager ozoneManager) { this.softCacheSize = ozoneManager.getConfiguration().getInt( OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE, OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE_DEFAULT); + + fsSnapshotMaxLimit = ozoneManager.getConfiguration().getInt(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, + OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); CacheLoader loader = createCacheLoader(); @@ -849,6 +856,25 @@ private void validateSnapshotsExistAndActive(final String volumeName, checkSnapshotActive(toSnapInfo, false); } + public void validateSnapshotLimit() throws IOException { + OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) + ozoneManager.getMetadataManager(); + SnapshotChainManager snapshotChainManager = + omMetadataManager.getSnapshotChainManager(); + int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); + if (currentSnapshotNum >= fsSnapshotMaxLimit) { + throw new OMException( + String.format("Snapshot limit of %d reached. Cannot create more snapshots.", + fsSnapshotMaxLimit) + " If you already deleted some snapshots, " + + "please wait for the background service to complete the cleanup.", + OMException.ResultCodes.TOO_MANY_SNAPSHOTS); + } + } + + public void setFsSnapshotMaxLimit(int fsSnapshotMaxLimit) { + this.fsSnapshotMaxLimit = fsSnapshotMaxLimit; + } + private int getIndexFromToken(final String token) throws IOException { if (isBlank(token)) { return 0; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 193be51d2a50..520084848594 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -479,7 +479,6 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private final BucketUtilizationMetrics bucketUtilizationMetrics; private boolean fsSnapshotEnabled; - private int fsSnapshotMaxLimit; private String omHostName; @@ -989,9 +988,6 @@ private void instantiateServices(boolean withNewSnapshot) throws IOException { fsSnapshotEnabled = configuration.getBoolean( OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_KEY, OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_DEFAULT); - fsSnapshotMaxLimit = configuration.getInt( - OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, - OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); omSnapshotManager = new OmSnapshotManager(this); // Snapshot metrics @@ -4369,15 +4365,6 @@ public boolean isFilesystemSnapshotEnabled() { return fsSnapshotEnabled; } - /** - * Get the maximum number of Ozone filesystem snapshots allowed. - * - * @return the maximum number of Ozone filesystem snapshots allowed. - */ - public int getFsSnapshotMaxLimit() { - return fsSnapshotMaxLimit; - } - /** * Get DB updates since a specific sequence number. * diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 89b6f978ac6a..389f9a2f7c91 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -99,23 +99,13 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { snapshotPath = snapshotInfo.getSnapshotPath(); } - private void validateSnapshotLimit(OzoneManager ozoneManager) throws IOException { - int maxSnapshots = ozoneManager.getFsSnapshotMaxLimit(); - OMMetrics omMetrics = ozoneManager.getMetrics(); - if (omMetrics.getNumSnapshotActive() >= maxSnapshots) { - throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots.", maxSnapshots), - OMException.ResultCodes.TOO_MANY_SNAPSHOTS); - } - } - @Override @DisallowedUntilLayoutVersion(FILESYSTEM_SNAPSHOT) @RequireSnapshotFeatureState(true) public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); // verify snapshot limit - validateSnapshotLimit(ozoneManager); + ozoneManager.getOmSnapshotManager().validateSnapshotLimit(); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 925facb48c8f..cabd22274338 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -32,6 +32,7 @@ import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.getINode; import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.truncateFileName; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -185,8 +186,10 @@ public void testCloseOnEviction() throws IOException, when(snapshotInfoTable.get(first.getTableKey())).thenReturn(first); when(snapshotInfoTable.get(second.getTableKey())).thenReturn(second); - ((OmMetadataManagerImpl) om.getMetadataManager()).getSnapshotChainManager().addSnapshot(first); - ((OmMetadataManagerImpl) om.getMetadataManager()).getSnapshotChainManager().addSnapshot(second); + SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) + .getSnapshotChainManager(); + snapshotChainManager.addSnapshot(first); + snapshotChainManager.addSnapshot(second); RDBBatchOperation rdbBatchOperation = new RDBBatchOperation(); // create the first snapshot checkpoint OmSnapshotManager.createOmSnapshotCheckpoint(om.getMetadataManager(), @@ -228,6 +231,52 @@ public void testCloseOnEviction() throws IOException, GenericTestUtils.waitFor(() -> { return logCapture.getOutput().contains(msg); }, 100, 30_000); + + // clean up all snapshot + snapshotChainManager.deleteSnapshot(second); + snapshotChainManager.deleteSnapshot(first); + assertEquals(snapshotChainManager.getGlobalSnapshotChain().size(), 0); + } + + @Test + public void testValidateSnapshotLimit() throws IOException { + Table volumeTable = mock(Table.class); + Table bucketTable = mock(Table.class); + Table snapshotInfoTable = mock(Table.class); + HddsWhiteboxTestUtils.setInternalState( + om.getMetadataManager(), VOLUME_TABLE, volumeTable); + HddsWhiteboxTestUtils.setInternalState( + om.getMetadataManager(), BUCKET_TABLE, bucketTable); + HddsWhiteboxTestUtils.setInternalState( + om.getMetadataManager(), SNAPSHOT_INFO_TABLE, snapshotInfoTable); + + om.getOmSnapshotManager().setFsSnapshotMaxLimit(2); + + SnapshotInfo first = createSnapshotInfo("vol1", "buck1"); + SnapshotInfo second = createSnapshotInfo("vol1", "buck1"); + + first.setGlobalPreviousSnapshotId(null); + first.setPathPreviousSnapshotId(null); + second.setGlobalPreviousSnapshotId(first.getSnapshotId()); + second.setPathPreviousSnapshotId(first.getSnapshotId()); + + when(snapshotInfoTable.get(first.getTableKey())).thenReturn(first); + when(snapshotInfoTable.get(second.getTableKey())).thenReturn(second); + + SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) + .getSnapshotChainManager(); + snapshotChainManager.addSnapshot(first); + snapshotChainManager.addSnapshot(second); + + OMException exception = assertThrows(OMException.class, () -> om.getOmSnapshotManager().validateSnapshotLimit()); + assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, exception.getResult()); + + snapshotChainManager.deleteSnapshot(second); + + assertDoesNotThrow(() -> om.getOmSnapshotManager().validateSnapshotLimit()); + + // clean up + snapshotChainManager.deleteSnapshot(first); } @BeforeEach @@ -342,8 +391,9 @@ public void testGetSnapshotInfo() throws IOException { .getLatestPathSnapshotId(String.join("/", "vol", "buck")); s1.setPathPreviousSnapshotId(latestPathSnapId); s1.setGlobalPreviousSnapshotId(latestGlobalSnapId); - ((OmMetadataManagerImpl) om.getMetadataManager()).getSnapshotChainManager() - .addSnapshot(s1); + SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) + .getSnapshotChainManager(); + snapshotChainManager.addSnapshot(s1); OMException ome = assertThrows(OMException.class, () -> om.getOmSnapshotManager().getSnapshot(s1.getSnapshotId())); assertEquals(OMException.ResultCodes.FILE_NOT_FOUND, ome.getResult()); @@ -352,6 +402,9 @@ public void testGetSnapshotInfo() throws IOException { ome = assertThrows(OMException.class, () -> om.getOmSnapshotManager().getSnapshot(s2.getSnapshotId())); assertEquals(OMException.ResultCodes.FILE_NOT_FOUND, ome.getResult()); + + // clean up + snapshotChainManager.deleteSnapshot(s1); } /* * Test that exclude list is generated correctly. diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index 2da46c2bff33..4a2e98272473 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -17,17 +17,16 @@ package org.apache.hadoop.ozone.om.request.key; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.setupReplicationConfigValidation; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.framework; import static org.mockito.Mockito.mock; @@ -154,7 +153,6 @@ public void setup() throws Exception { when(ozoneManager.getDeletionMetrics()).thenReturn(delMetrics); when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager); when(ozoneManager.getConfiguration()).thenReturn(ozoneConfiguration); - when(ozoneManager.getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); OMLayoutVersionManager lvm = mock(OMLayoutVersionManager.class); when(lvm.isAllowed(anyString())).thenReturn(true); when(ozoneManager.getVersionManager()).thenReturn(lvm); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index b6d5a33dbdb7..cc0157d0b96b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -21,7 +21,6 @@ import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; -import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.deleteSnapshotRequest; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status.OK; import static org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.CreateSnapshot; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -29,7 +28,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.when; import java.io.IOException; @@ -268,7 +267,7 @@ public void testEntryExists() throws Exception { @Test public void testSnapshotLimit() throws Exception { when(getOzoneManager().isAdmin(any())).thenReturn(true); - when(getOzoneManager().getFsSnapshotMaxLimit()).thenReturn(1); + getOmSnapshotManager().setFsSnapshotMaxLimit(1); String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); @@ -285,23 +284,6 @@ public void testSnapshotLimit() throws Exception { OMRequest snapshotRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest)); assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); - - // delete snapshot - omRequest = deleteSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); - OMSnapshotDeleteRequest omSnapshotDeleteRequest = TestOMSnapshotDeleteRequest.doPreExecute(omRequest, - getOzoneManager()); - omSnapshotDeleteRequest.validateAndUpdateCache(getOzoneManager(), 3); - - // create snapshot again, should be successful - omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); - omSnapshotCreateRequest = doPreExecute(omRequest); - OMClientResponse omClientResponse = - omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 4); - - OMResponse omResponse = omClientResponse.getOMResponse(); - assertNotNull(omResponse.getCreateSnapshotResponse()); - assertEquals(OzoneManagerProtocolProtos.Status.OK, - omResponse.getStatus()); } private void renameKey(String fromKey, String toKey, long offset) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java index 7896b02fc735..6ee1f7249bfe 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.om.snapshot; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createOmKeyInfo; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -168,7 +167,6 @@ public void baseSetup() throws Exception { omMetadataManager); omSnapshotManager = new OmSnapshotManager(ozoneManager); when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); - when(ozoneManager.getFsSnapshotMaxLimit()).thenReturn(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT); } @AfterEach From 7b52f1a1887784700dc9b32c11b77f080bfcee5a Mon Sep 17 00:00:00 2001 From: peterxcli Date: Thu, 3 Apr 2025 13:46:51 +0800 Subject: [PATCH 08/18] Cleanup test --- .../ozone/om/TestOmSnapshotManager.java | 77 ++++++++++--------- .../snapshot/TestOMSnapshotDeleteRequest.java | 8 +- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index cabd22274338..3f5a8703dbea 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -55,6 +55,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -76,6 +77,7 @@ import org.apache.hadoop.util.Time; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -90,6 +92,9 @@ class TestOmSnapshotManager { private OzoneManager om; + private SnapshotChainManager snapshotChainManager; + private OmMetadataManagerImpl omMetadataManager; + private OmSnapshotManager omSnapshotManager; private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + SNAPSHOT_CANDIDATE_DIR; private File leaderDir; @@ -114,9 +119,15 @@ void init(@TempDir File tempDir) throws Exception { OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE, 1); configuration.setBoolean( OMConfigKeys.OZONE_OM_SNAPSHOT_ROCKSDB_METRICS_ENABLED, false); + // Allow 2 fs snapshots + configuration.setInt( + OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, 2); OmTestManagers omTestManagers = new OmTestManagers(configuration); om = omTestManagers.getOzoneManager(); + omMetadataManager = (OmMetadataManagerImpl) om.getMetadataManager(); + omSnapshotManager = om.getOmSnapshotManager(); + snapshotChainManager = omMetadataManager.getSnapshotChainManager(); } @AfterAll @@ -124,6 +135,21 @@ void stop() { om.stop(); } + @AfterEach + void cleanup() throws IOException { + Table snapshotInfoTable = omMetadataManager.getSnapshotInfoTable(); + + Iterator iter = snapshotChainManager.iterator(true); + while (iter.hasNext()) { + UUID snapshotId = iter.next(); + String snapshotInfoKey = snapshotChainManager.getTableKey(snapshotId); + SnapshotInfo snapshotInfo = snapshotInfoTable.get(snapshotInfoKey); + snapshotChainManager.deleteSnapshot(snapshotInfo); + snapshotInfoTable.delete(snapshotInfoKey); + } + omSnapshotManager.invalidateCache(); + } + @Test public void testSnapshotFeatureFlagSafetyCheck() throws IOException { // Verify that the snapshot feature config safety check method @@ -152,11 +178,11 @@ public void testCloseOnEviction() throws IOException, Table bucketTable = mock(Table.class); Table snapshotInfoTable = mock(Table.class); HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), VOLUME_TABLE, volumeTable); + omMetadataManager, VOLUME_TABLE, volumeTable); HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), BUCKET_TABLE, bucketTable); + omMetadataManager, BUCKET_TABLE, bucketTable); HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), SNAPSHOT_INFO_TABLE, snapshotInfoTable); + omMetadataManager, SNAPSHOT_INFO_TABLE, snapshotInfoTable); final String volumeName = UUID.randomUUID().toString(); final String dbVolumeKey = om.getMetadataManager().getVolumeKey(volumeName); @@ -186,8 +212,6 @@ public void testCloseOnEviction() throws IOException, when(snapshotInfoTable.get(first.getTableKey())).thenReturn(first); when(snapshotInfoTable.get(second.getTableKey())).thenReturn(second); - SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) - .getSnapshotChainManager(); snapshotChainManager.addSnapshot(first); snapshotChainManager.addSnapshot(second); RDBBatchOperation rdbBatchOperation = new RDBBatchOperation(); @@ -197,7 +221,6 @@ public void testCloseOnEviction() throws IOException, om.getMetadataManager().getStore().commitBatchOperation(rdbBatchOperation); // retrieve it and setup store mock - OmSnapshotManager omSnapshotManager = om.getOmSnapshotManager(); OmSnapshot firstSnapshot = omSnapshotManager .getActiveSnapshot(first.getVolumeName(), first.getBucketName(), first.getName()) .get(); @@ -231,26 +254,13 @@ public void testCloseOnEviction() throws IOException, GenericTestUtils.waitFor(() -> { return logCapture.getOutput().contains(msg); }, 100, 30_000); - - // clean up all snapshot - snapshotChainManager.deleteSnapshot(second); - snapshotChainManager.deleteSnapshot(first); - assertEquals(snapshotChainManager.getGlobalSnapshotChain().size(), 0); } @Test public void testValidateSnapshotLimit() throws IOException { - Table volumeTable = mock(Table.class); - Table bucketTable = mock(Table.class); Table snapshotInfoTable = mock(Table.class); HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), VOLUME_TABLE, volumeTable); - HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), BUCKET_TABLE, bucketTable); - HddsWhiteboxTestUtils.setInternalState( - om.getMetadataManager(), SNAPSHOT_INFO_TABLE, snapshotInfoTable); - - om.getOmSnapshotManager().setFsSnapshotMaxLimit(2); + omMetadataManager, SNAPSHOT_INFO_TABLE, snapshotInfoTable); SnapshotInfo first = createSnapshotInfo("vol1", "buck1"); SnapshotInfo second = createSnapshotInfo("vol1", "buck1"); @@ -263,20 +273,17 @@ public void testValidateSnapshotLimit() throws IOException { when(snapshotInfoTable.get(first.getTableKey())).thenReturn(first); when(snapshotInfoTable.get(second.getTableKey())).thenReturn(second); - SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) - .getSnapshotChainManager(); snapshotChainManager.addSnapshot(first); + assertDoesNotThrow(() -> omSnapshotManager.snapshotLimitCheck()); + snapshotChainManager.addSnapshot(second); - OMException exception = assertThrows(OMException.class, () -> om.getOmSnapshotManager().validateSnapshotLimit()); + OMException exception = assertThrows(OMException.class, () -> omSnapshotManager.snapshotLimitCheck()); assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, exception.getResult()); snapshotChainManager.deleteSnapshot(second); - assertDoesNotThrow(() -> om.getOmSnapshotManager().validateSnapshotLimit()); - - // clean up - snapshotChainManager.deleteSnapshot(first); + assertDoesNotThrow(() -> omSnapshotManager.snapshotLimitCheck()); } @BeforeEach @@ -383,16 +390,11 @@ public void testHardLinkCreation() throws IOException { @Test public void testGetSnapshotInfo() throws IOException { SnapshotInfo s1 = createSnapshotInfo("vol", "buck"); - UUID latestGlobalSnapId = - ((OmMetadataManagerImpl) om.getMetadataManager()).getSnapshotChainManager() - .getLatestGlobalSnapshotId(); + UUID latestGlobalSnapId = snapshotChainManager.getLatestGlobalSnapshotId(); UUID latestPathSnapId = - ((OmMetadataManagerImpl) om.getMetadataManager()).getSnapshotChainManager() - .getLatestPathSnapshotId(String.join("/", "vol", "buck")); + snapshotChainManager.getLatestPathSnapshotId(String.join("/", "vol", "buck")); s1.setPathPreviousSnapshotId(latestPathSnapId); s1.setGlobalPreviousSnapshotId(latestGlobalSnapId); - SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl) om.getMetadataManager()) - .getSnapshotChainManager(); snapshotChainManager.addSnapshot(s1); OMException ome = assertThrows(OMException.class, () -> om.getOmSnapshotManager().getSnapshot(s1.getSnapshotId())); @@ -403,8 +405,11 @@ public void testGetSnapshotInfo() throws IOException { () -> om.getOmSnapshotManager().getSnapshot(s2.getSnapshotId())); assertEquals(OMException.ResultCodes.FILE_NOT_FOUND, ome.getResult()); - // clean up - snapshotChainManager.deleteSnapshot(s1); + // add to make cleanup work + Table snapshotInfoTable = mock(Table.class); + HddsWhiteboxTestUtils.setInternalState( + omMetadataManager, SNAPSHOT_INFO_TABLE, snapshotInfoTable); + when(snapshotInfoTable.get(s1.getTableKey())).thenReturn(s1); } /* * Test that exclude list is generated correctly. diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java index 617da70a6718..65ec8af82c98 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotDeleteRequest.java @@ -36,7 +36,6 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; -import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -273,16 +272,11 @@ public void testEntryExist() throws Exception { private OMSnapshotDeleteRequest doPreExecute( OMRequest originalRequest) throws Exception { - return doPreExecute(originalRequest, getOzoneManager()); - } - - public static OMSnapshotDeleteRequest doPreExecute( - OMRequest originalRequest, OzoneManager ozoneManager) throws Exception { OMSnapshotDeleteRequest omSnapshotDeleteRequest = new OMSnapshotDeleteRequest(originalRequest); OMRequest modifiedRequest = - omSnapshotDeleteRequest.preExecute(ozoneManager); + omSnapshotDeleteRequest.preExecute(getOzoneManager()); return new OMSnapshotDeleteRequest(modifiedRequest); } From 2607c3e0a90f4a616ed920e3f08eaa248b6faff8 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Thu, 3 Apr 2025 13:47:44 +0800 Subject: [PATCH 09/18] Count inflight snapshot, too --- .../hadoop/ozone/om/OmSnapshotManager.java | 24 ++++++--- .../snapshot/OMSnapshotCreateRequest.java | 8 ++- .../snapshot/TestOMSnapshotCreateRequest.java | 50 +++++++++++++++---- 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 8044e1915499..ebacefc46c94 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -65,6 +65,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.StringUtils; @@ -174,7 +175,8 @@ public final class OmSnapshotManager implements AutoCloseable { // Soft limit of the snapshot cache size. private final int softCacheSize; - private int fsSnapshotMaxLimit; + private final int fsSnapshotMaxLimit; + private final AtomicInteger inFlightSnapshotCount = new AtomicInteger(0); public OmSnapshotManager(OzoneManager ozoneManager) { @@ -856,23 +858,31 @@ private void validateSnapshotsExistAndActive(final String volumeName, checkSnapshotActive(toSnapInfo, false); } - public void validateSnapshotLimit() throws IOException { + /* + * Check snapshot limit + * Note: This method increments the snapshot in-flight counter + */ + public synchronized void snapshotLimitCheck() throws IOException { OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager(); int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); - if (currentSnapshotNum >= fsSnapshotMaxLimit) { + int inFlightCount = inFlightSnapshotCount.get(); + if (currentSnapshotNum + inFlightCount >= fsSnapshotMaxLimit) { throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots.", - fsSnapshotMaxLimit) + " If you already deleted some snapshots, " + + String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + + "Current snapshots: %d, In-flight creations: %d", + fsSnapshotMaxLimit, currentSnapshotNum, inFlightCount) + + " If you already deleted some snapshots, " + "please wait for the background service to complete the cleanup.", OMException.ResultCodes.TOO_MANY_SNAPSHOTS); } + inFlightSnapshotCount.incrementAndGet(); } - public void setFsSnapshotMaxLimit(int fsSnapshotMaxLimit) { - this.fsSnapshotMaxLimit = fsSnapshotMaxLimit; + public void decrementInFlightSnapshotCount() { + inFlightSnapshotCount.decrementAndGet(); } private int getIndexFromToken(final String token) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 389f9a2f7c91..1e9dc22f16f4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -105,7 +105,7 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); // verify snapshot limit - ozoneManager.getOmSnapshotManager().validateSnapshotLimit(); + ozoneManager.getOmSnapshotManager().snapshotLimitCheck(); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a @@ -190,7 +190,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut // pre-replicated key size counter in OmBucketInfo. snapshotInfo.setReferencedSize(estimateBucketDataSize(omBucketInfo)); - addSnapshotInfoToSnapshotChainAndCache(omMetadataManager, context.getIndex()); + addSnapshotInfoToSnapshotChainAndCache(ozoneManager, omMetadataManager, context.getIndex()); omResponse.setCreateSnapshotResponse( CreateSnapshotResponse.newBuilder() @@ -252,6 +252,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut * it was removed at T-5. */ private void addSnapshotInfoToSnapshotChainAndCache( + OzoneManager ozoneManager, OmMetadataManagerImpl omMetadataManager, long transactionLogIndex ) throws IOException { @@ -290,6 +291,9 @@ private void addSnapshotInfoToSnapshotChainAndCache( removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager, snapshotInfo); throw new IOException(exception.getMessage(), exception); + } finally { + // whatever the snapshot add to chain succeeds or fails, we decrement the in-flight counter + ozoneManager.getOmSnapshotManager().decrementInFlightSnapshotCount(); } } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index cc0157d0b96b..3d1affb57b81 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; @@ -37,6 +38,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -64,11 +66,17 @@ public class TestOMSnapshotCreateRequest extends TestSnapshotRequestAndResponse { private String snapshotName1; private String snapshotName2; + private String snapshotName3; + private String snapshotName4; + private String snapshotName5; @BeforeEach public void setup() throws Exception { snapshotName1 = UUID.randomUUID().toString(); snapshotName2 = UUID.randomUUID().toString(); + snapshotName3 = UUID.randomUUID().toString(); + snapshotName4 = UUID.randomUUID().toString(); + snapshotName5 = UUID.randomUUID().toString(); } @ValueSource(strings = { @@ -267,22 +275,46 @@ public void testEntryExists() throws Exception { @Test public void testSnapshotLimit() throws Exception { when(getOzoneManager().isAdmin(any())).thenReturn(true); - getOmSnapshotManager().setFsSnapshotMaxLimit(1); + getOzoneManager().getOmSnapshotManager().close(); + getOzoneManager().getConfiguration().setInt(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, 3); + OmSnapshotManager omSnapshotManager = new OmSnapshotManager(getOzoneManager()); + when(getOzoneManager().getOmSnapshotManager()).thenReturn(omSnapshotManager); + // Test Case 1: No snapshots in chain, no in-flight String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); - - OMRequest omRequest = - createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); + OMRequest omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); OMSnapshotCreateRequest omSnapshotCreateRequest = doPreExecute(omRequest); - assertNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 1); - assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); - // Should fail as snapshot limit is 1 - OMRequest snapshotRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); - OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest)); + // Test Case 2: One snapshot in chain, no in-flight + String key2 = getTableKey(getVolumeName(), getBucketName(), snapshotName2); + OMRequest snapshotRequest2 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + OMSnapshotCreateRequest omSnapshotCreateRequest2 = doPreExecute(snapshotRequest2); + omSnapshotCreateRequest2.validateAndUpdateCache(getOzoneManager(), 2); + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key2)); + + // Test Case 3: Two snapshots in chain, one in-flight + // First create an in-flight snapshot + String key3 = getTableKey(getVolumeName(), getBucketName(), snapshotName3); + OMRequest snapshotRequest3 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName3); + OMSnapshotCreateRequest omSnapshotCreateRequest3 = doPreExecute(snapshotRequest3); + // Don't call validateAndUpdateCache to keep it in-flight + + // Try to create another snapshot - should fail as total would be 4 (2 in chain + 1 in-flight + 1 new) + OMRequest snapshotRequest4 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName4); + OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest4)); + assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); + + // Complete the in-flight snapshot + omSnapshotCreateRequest3.validateAndUpdateCache(getOzoneManager(), 3); + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key3)); + + // Test Case 4: Three snapshots in chain, no in-flight + // Try to create another snapshot - should fail as we've reached the limit + OMRequest snapshotRequest5 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName5); + omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest5)); assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); } From 78bc4c4db9157143a3aede2359312a1c60e82675 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Thu, 3 Apr 2025 14:33:29 +0800 Subject: [PATCH 10/18] fix test --- .../java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 3f5a8703dbea..9d5959103b7f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -275,6 +275,7 @@ public void testValidateSnapshotLimit() throws IOException { snapshotChainManager.addSnapshot(first); assertDoesNotThrow(() -> omSnapshotManager.snapshotLimitCheck()); + omSnapshotManager.decrementInFlightSnapshotCount(); snapshotChainManager.addSnapshot(second); From 902b507301ce2273136c784dde6104c8a403646d Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 8 Apr 2025 11:22:40 +0800 Subject: [PATCH 11/18] Revert "Count inflight snapshot, too" This reverts commit 2607c3e0a90f4a616ed920e3f08eaa248b6faff8. --- .../hadoop/ozone/om/OmSnapshotManager.java | 24 +++------ .../snapshot/OMSnapshotCreateRequest.java | 8 +-- .../snapshot/TestOMSnapshotCreateRequest.java | 50 ++++--------------- 3 files changed, 18 insertions(+), 64 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index ebacefc46c94..8044e1915499 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -65,7 +65,6 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.StringUtils; @@ -175,8 +174,7 @@ public final class OmSnapshotManager implements AutoCloseable { // Soft limit of the snapshot cache size. private final int softCacheSize; - private final int fsSnapshotMaxLimit; - private final AtomicInteger inFlightSnapshotCount = new AtomicInteger(0); + private int fsSnapshotMaxLimit; public OmSnapshotManager(OzoneManager ozoneManager) { @@ -858,31 +856,23 @@ private void validateSnapshotsExistAndActive(final String volumeName, checkSnapshotActive(toSnapInfo, false); } - /* - * Check snapshot limit - * Note: This method increments the snapshot in-flight counter - */ - public synchronized void snapshotLimitCheck() throws IOException { + public void validateSnapshotLimit() throws IOException { OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager(); int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); - int inFlightCount = inFlightSnapshotCount.get(); - if (currentSnapshotNum + inFlightCount >= fsSnapshotMaxLimit) { + if (currentSnapshotNum >= fsSnapshotMaxLimit) { throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + - "Current snapshots: %d, In-flight creations: %d", - fsSnapshotMaxLimit, currentSnapshotNum, inFlightCount) + - " If you already deleted some snapshots, " + + String.format("Snapshot limit of %d reached. Cannot create more snapshots.", + fsSnapshotMaxLimit) + " If you already deleted some snapshots, " + "please wait for the background service to complete the cleanup.", OMException.ResultCodes.TOO_MANY_SNAPSHOTS); } - inFlightSnapshotCount.incrementAndGet(); } - public void decrementInFlightSnapshotCount() { - inFlightSnapshotCount.decrementAndGet(); + public void setFsSnapshotMaxLimit(int fsSnapshotMaxLimit) { + this.fsSnapshotMaxLimit = fsSnapshotMaxLimit; } private int getIndexFromToken(final String token) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 1e9dc22f16f4..389f9a2f7c91 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -105,7 +105,7 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); // verify snapshot limit - ozoneManager.getOmSnapshotManager().snapshotLimitCheck(); + ozoneManager.getOmSnapshotManager().validateSnapshotLimit(); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a @@ -190,7 +190,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut // pre-replicated key size counter in OmBucketInfo. snapshotInfo.setReferencedSize(estimateBucketDataSize(omBucketInfo)); - addSnapshotInfoToSnapshotChainAndCache(ozoneManager, omMetadataManager, context.getIndex()); + addSnapshotInfoToSnapshotChainAndCache(omMetadataManager, context.getIndex()); omResponse.setCreateSnapshotResponse( CreateSnapshotResponse.newBuilder() @@ -252,7 +252,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut * it was removed at T-5. */ private void addSnapshotInfoToSnapshotChainAndCache( - OzoneManager ozoneManager, OmMetadataManagerImpl omMetadataManager, long transactionLogIndex ) throws IOException { @@ -291,9 +290,6 @@ private void addSnapshotInfoToSnapshotChainAndCache( removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager, snapshotInfo); throw new IOException(exception.getMessage(), exception); - } finally { - // whatever the snapshot add to chain succeeds or fails, we decrement the in-flight counter - ozoneManager.getOmSnapshotManager().decrementInFlightSnapshotCount(); } } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index 3d1affb57b81..cc0157d0b96b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; @@ -38,7 +37,6 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -66,17 +64,11 @@ public class TestOMSnapshotCreateRequest extends TestSnapshotRequestAndResponse { private String snapshotName1; private String snapshotName2; - private String snapshotName3; - private String snapshotName4; - private String snapshotName5; @BeforeEach public void setup() throws Exception { snapshotName1 = UUID.randomUUID().toString(); snapshotName2 = UUID.randomUUID().toString(); - snapshotName3 = UUID.randomUUID().toString(); - snapshotName4 = UUID.randomUUID().toString(); - snapshotName5 = UUID.randomUUID().toString(); } @ValueSource(strings = { @@ -275,46 +267,22 @@ public void testEntryExists() throws Exception { @Test public void testSnapshotLimit() throws Exception { when(getOzoneManager().isAdmin(any())).thenReturn(true); - getOzoneManager().getOmSnapshotManager().close(); - getOzoneManager().getConfiguration().setInt(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, 3); - OmSnapshotManager omSnapshotManager = new OmSnapshotManager(getOzoneManager()); - when(getOzoneManager().getOmSnapshotManager()).thenReturn(omSnapshotManager); + getOmSnapshotManager().setFsSnapshotMaxLimit(1); - // Test Case 1: No snapshots in chain, no in-flight String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); - OMRequest omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); + + OMRequest omRequest = + createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); OMSnapshotCreateRequest omSnapshotCreateRequest = doPreExecute(omRequest); + assertNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 1); - assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); - - // Test Case 2: One snapshot in chain, no in-flight - String key2 = getTableKey(getVolumeName(), getBucketName(), snapshotName2); - OMRequest snapshotRequest2 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); - OMSnapshotCreateRequest omSnapshotCreateRequest2 = doPreExecute(snapshotRequest2); - omSnapshotCreateRequest2.validateAndUpdateCache(getOzoneManager(), 2); - assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key2)); - - // Test Case 3: Two snapshots in chain, one in-flight - // First create an in-flight snapshot - String key3 = getTableKey(getVolumeName(), getBucketName(), snapshotName3); - OMRequest snapshotRequest3 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName3); - OMSnapshotCreateRequest omSnapshotCreateRequest3 = doPreExecute(snapshotRequest3); - // Don't call validateAndUpdateCache to keep it in-flight - - // Try to create another snapshot - should fail as total would be 4 (2 in chain + 1 in-flight + 1 new) - OMRequest snapshotRequest4 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName4); - OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest4)); - assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); - // Complete the in-flight snapshot - omSnapshotCreateRequest3.validateAndUpdateCache(getOzoneManager(), 3); - assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key3)); + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); - // Test Case 4: Three snapshots in chain, no in-flight - // Try to create another snapshot - should fail as we've reached the limit - OMRequest snapshotRequest5 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName5); - omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest5)); + // Should fail as snapshot limit is 1 + OMRequest snapshotRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest)); assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); } From 65922fb69f38d9365ee849b311e6827ed550252f Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 8 Apr 2025 11:23:20 +0800 Subject: [PATCH 12/18] Revert "fix test" This reverts commit 78bc4c4db9157143a3aede2359312a1c60e82675. --- .../java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 9d5959103b7f..3f5a8703dbea 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -275,7 +275,6 @@ public void testValidateSnapshotLimit() throws IOException { snapshotChainManager.addSnapshot(first); assertDoesNotThrow(() -> omSnapshotManager.snapshotLimitCheck()); - omSnapshotManager.decrementInFlightSnapshotCount(); snapshotChainManager.addSnapshot(second); From 3cb97fecdd167a99eb86193c480220bc31f15a35 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 8 Apr 2025 11:27:41 +0800 Subject: [PATCH 13/18] Rename `validateSnapshotLimit` to `snapshotLimitCheck` in snapshotManager --- .../main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java | 2 +- .../ozone/om/request/snapshot/OMSnapshotCreateRequest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 8044e1915499..a892d5cfaf55 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -856,7 +856,7 @@ private void validateSnapshotsExistAndActive(final String volumeName, checkSnapshotActive(toSnapInfo, false); } - public void validateSnapshotLimit() throws IOException { + public void snapshotLimitCheck() throws IOException { OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); SnapshotChainManager snapshotChainManager = diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 389f9a2f7c91..8498072a3dfd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -105,7 +105,7 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); // verify snapshot limit - ozoneManager.getOmSnapshotManager().validateSnapshotLimit(); + ozoneManager.getOmSnapshotManager().snapshotLimitCheck(); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a From 8f0bab25e638ab68c2a6ae439a4516eb19a98ddd Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 15 Apr 2025 19:05:28 +0800 Subject: [PATCH 14/18] Add snaphot inflight count --- .../hadoop/ozone/om/OmSnapshotManager.java | 29 +++++++---- .../snapshot/OMSnapshotCreateRequest.java | 5 +- .../ozone/om/TestOmSnapshotManager.java | 1 + .../snapshot/TestOMSnapshotCreateRequest.java | 50 +++++++++++++++---- 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 553241810445..fe1f28364586 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -65,6 +65,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.StringUtils; @@ -175,6 +176,7 @@ public final class OmSnapshotManager implements AutoCloseable { private final int softCacheSize; private int fsSnapshotMaxLimit; + private final AtomicInteger inFlightSnapshotCount = new AtomicInteger(0); public OmSnapshotManager(OzoneManager ozoneManager) { @@ -856,23 +858,30 @@ private void validateSnapshotsExistAndActive(final String volumeName, checkSnapshotActive(toSnapInfo, false); } - public void snapshotLimitCheck() throws IOException { - OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) - ozoneManager.getMetadataManager(); - SnapshotChainManager snapshotChainManager = - omMetadataManager.getSnapshotChainManager(); + public void snapshotLimitCheck() throws IOException, OMException { + OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); + SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager(); int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); - if (currentSnapshotNum >= fsSnapshotMaxLimit) { + int oldCount = inFlightSnapshotCount.get(); + int newCount = inFlightSnapshotCount.updateAndGet(count -> { + if (currentSnapshotNum + count >= fsSnapshotMaxLimit) { + return count; + } + return count + 1; + }); + if (newCount == oldCount) { throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots.", - fsSnapshotMaxLimit) + " If you already deleted some snapshots, " + + String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + + "Current snapshots: %d, In-flight creations: %d", + fsSnapshotMaxLimit, currentSnapshotNum, newCount) + + " If you already deleted some snapshots, " + "please wait for the background service to complete the cleanup.", OMException.ResultCodes.TOO_MANY_SNAPSHOTS); } } - public void setFsSnapshotMaxLimit(int fsSnapshotMaxLimit) { - this.fsSnapshotMaxLimit = fsSnapshotMaxLimit; + public void decrementInFlightSnapshotCount() { + inFlightSnapshotCount.decrementAndGet(); } private int getIndexFromToken(final String token) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 8498072a3dfd..411cc58f8e4c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -190,7 +190,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut // pre-replicated key size counter in OmBucketInfo. snapshotInfo.setReferencedSize(estimateBucketDataSize(omBucketInfo)); - addSnapshotInfoToSnapshotChainAndCache(omMetadataManager, context.getIndex()); + addSnapshotInfoToSnapshotChainAndCache(ozoneManager, omMetadataManager, context.getIndex()); omResponse.setCreateSnapshotResponse( CreateSnapshotResponse.newBuilder() @@ -252,6 +252,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut * it was removed at T-5. */ private void addSnapshotInfoToSnapshotChainAndCache( + OzoneManager ozoneManager, OmMetadataManagerImpl omMetadataManager, long transactionLogIndex ) throws IOException { @@ -290,6 +291,8 @@ private void addSnapshotInfoToSnapshotChainAndCache( removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager, snapshotInfo); throw new IOException(exception.getMessage(), exception); + } finally { + ozoneManager.getOmSnapshotManager().decrementInFlightSnapshotCount(); } } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index cada42858ccb..e789ea4228be 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -275,6 +275,7 @@ public void testValidateSnapshotLimit() throws IOException { snapshotChainManager.addSnapshot(first); assertDoesNotThrow(() -> omSnapshotManager.snapshotLimitCheck()); + omSnapshotManager.decrementInFlightSnapshotCount(); snapshotChainManager.addSnapshot(second); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java index cc0157d0b96b..a45991816b1c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/snapshot/TestOMSnapshotCreateRequest.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.request.snapshot; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_FS_SNAPSHOT_MAX_LIMIT; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getFromProtobuf; import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.getTableKey; import static org.apache.hadoop.ozone.om.request.OMRequestTestUtils.createSnapshotRequest; @@ -37,6 +38,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ResolvedBucket; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -64,11 +66,17 @@ public class TestOMSnapshotCreateRequest extends TestSnapshotRequestAndResponse { private String snapshotName1; private String snapshotName2; + private String snapshotName3; + private String snapshotName4; + private String snapshotName5; @BeforeEach public void setup() throws Exception { snapshotName1 = UUID.randomUUID().toString(); snapshotName2 = UUID.randomUUID().toString(); + snapshotName3 = UUID.randomUUID().toString(); + snapshotName4 = UUID.randomUUID().toString(); + snapshotName5 = UUID.randomUUID().toString(); } @ValueSource(strings = { @@ -267,22 +275,46 @@ public void testEntryExists() throws Exception { @Test public void testSnapshotLimit() throws Exception { when(getOzoneManager().isAdmin(any())).thenReturn(true); - getOmSnapshotManager().setFsSnapshotMaxLimit(1); + getOzoneManager().getOmSnapshotManager().close(); + getOzoneManager().getConfiguration().setInt(OZONE_OM_FS_SNAPSHOT_MAX_LIMIT, 3); + OmSnapshotManager omSnapshotManager = new OmSnapshotManager(getOzoneManager()); + when(getOzoneManager().getOmSnapshotManager()).thenReturn(omSnapshotManager); + // Test Case 1: No snapshots in chain, no in-flight String key1 = getTableKey(getVolumeName(), getBucketName(), snapshotName1); - - OMRequest omRequest = - createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); + OMRequest omRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName1); OMSnapshotCreateRequest omSnapshotCreateRequest = doPreExecute(omRequest); - assertNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); omSnapshotCreateRequest.validateAndUpdateCache(getOzoneManager(), 1); - assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key1)); - // Should fail as snapshot limit is 1 - OMRequest snapshotRequest = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); - OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest)); + // Test Case 2: One snapshot in chain, no in-flight + String key2 = getTableKey(getVolumeName(), getBucketName(), snapshotName2); + OMRequest snapshotRequest2 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName2); + OMSnapshotCreateRequest omSnapshotCreateRequest2 = doPreExecute(snapshotRequest2); + omSnapshotCreateRequest2.validateAndUpdateCache(getOzoneManager(), 2); + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key2)); + + // Test Case 3: Two snapshots in chain, one in-flight + // First create an in-flight snapshot + String key3 = getTableKey(getVolumeName(), getBucketName(), snapshotName3); + OMRequest snapshotRequest3 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName3); + OMSnapshotCreateRequest omSnapshotCreateRequest3 = doPreExecute(snapshotRequest3); + // Don't call validateAndUpdateCache to keep it in-flight + + // Try to create another snapshot - should fail as total would be 4 (2 in chain + 1 in-flight + 1 new) + OMRequest snapshotRequest4 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName4); + OMException omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest4)); + assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); + + // Complete the in-flight snapshot + omSnapshotCreateRequest3.validateAndUpdateCache(getOzoneManager(), 3); + assertNotNull(getOmMetadataManager().getSnapshotInfoTable().get(key3)); + + // Test Case 4: Three snapshots in chain, no in-flight + // Try to create another snapshot - should fail as we've reached the limit + OMRequest snapshotRequest5 = createSnapshotRequest(getVolumeName(), getBucketName(), snapshotName5); + omException = assertThrows(OMException.class, () -> doPreExecute(snapshotRequest5)); assertEquals(OMException.ResultCodes.TOO_MANY_SNAPSHOTS, omException.getResult()); } From 6465e9f1835e906107fcdfc84ac5dccd9d5d212e Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 15 Apr 2025 19:29:00 +0800 Subject: [PATCH 15/18] Reset snapshot inflight count when om become leader --- .../snapshot/TestOzoneManagerHASnapshot.java | 23 +++++++++++++++++++ .../hadoop/ozone/om/OmSnapshotManager.java | 8 +++++++ .../om/ratis/OzoneManagerStateMachine.java | 5 ++++ 3 files changed, 36 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java index 92222739235f..073cc4594213 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java @@ -345,6 +345,29 @@ public void testKeyAndSnapshotDeletionService() throws IOException, InterruptedE checkSnapshotIsPurgedFromDB(omFollower, tableKey); } + @Test + public void testSnapshotInFlightCount() throws Exception { + // snapshot inflight count should be reset to 0 when leader changes + + // first do some snapshot creations + String snapshotName1 = UUID.randomUUID().toString(); + store.createSnapshot(volumeName, bucketName, snapshotName1); + + // then shutdown the leader + OzoneManager omLeader = cluster.getOMLeader(); + cluster.shutdownOzoneManager(omLeader); + + // wait for the new leader to be elected + cluster.waitForLeaderOM(); + + // check the inflight count on the new leader is 0 + OzoneManager newLeader = cluster.getOMLeader(); + assertEquals(0, newLeader.getOmSnapshotManager().getInFlightSnapshotCount()); + + // restart the previous shutdowned node + cluster.restartOzoneManager(omLeader, true); + } + private void createSnapshot(String volName, String buckName, String snapName) throws IOException { store.createSnapshot(volName, buckName, snapName); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index fe1f28364586..b9fa7d648906 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -884,6 +884,14 @@ public void decrementInFlightSnapshotCount() { inFlightSnapshotCount.decrementAndGet(); } + public void resetInFlightSnapshotCount() { + inFlightSnapshotCount.set(0); + } + + public int getInFlightSnapshotCount() { + return inFlightSnapshotCount.get(); + } + private int getIndexFromToken(final String token) throws IOException { if (isBlank(token)) { return 0; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index d41cefa0fec6..c42a0b0058f0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -156,6 +156,11 @@ public SnapshotInfo getLatestSnapshot() { return snapshotInfo; } + @Override + public void notifyLeaderReady() { + ozoneManager.getOmSnapshotManager().resetInFlightSnapshotCount(); + } + @Override public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, RaftPeerId newLeaderId) { From 385eeb1a1f204991cfdc260bd91c9e1007eaa14f Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 25 Apr 2025 06:19:27 +0000 Subject: [PATCH 16/18] add sync block on inFlightSnapshotCount --- .../hadoop/ozone/om/OmSnapshotManager.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index b9fa7d648906..ddf77c3afe5a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -862,21 +862,19 @@ public void snapshotLimitCheck() throws IOException, OMException { OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager(); int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); - int oldCount = inFlightSnapshotCount.get(); - int newCount = inFlightSnapshotCount.updateAndGet(count -> { - if (currentSnapshotNum + count >= fsSnapshotMaxLimit) { - return count; + + synchronized (inFlightSnapshotCount) { + int currentCount = inFlightSnapshotCount.get(); + if (currentSnapshotNum + currentCount >= fsSnapshotMaxLimit) { + throw new OMException( + String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + + "Current snapshots: %d, In-flight creations: %d", + fsSnapshotMaxLimit, currentSnapshotNum, currentCount) + + " If you already deleted some snapshots, " + + "please wait for the background service to complete the cleanup.", + OMException.ResultCodes.TOO_MANY_SNAPSHOTS); } - return count + 1; - }); - if (newCount == oldCount) { - throw new OMException( - String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + - "Current snapshots: %d, In-flight creations: %d", - fsSnapshotMaxLimit, currentSnapshotNum, newCount) + - " If you already deleted some snapshots, " + - "please wait for the background service to complete the cleanup.", - OMException.ResultCodes.TOO_MANY_SNAPSHOTS); + inFlightSnapshotCount.set(currentCount + 1); } } From 55cb41807daaa54f8a1e56ce8727b1060a8e8776 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 25 Apr 2025 11:37:11 +0000 Subject: [PATCH 17/18] use AtomicReference for exception handling --- .../hadoop/ozone/om/OmSnapshotManager.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 1fb79005ae50..2f89ef92b19b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -66,6 +66,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.StringUtils; @@ -873,19 +874,23 @@ public void snapshotLimitCheck() throws IOException, OMException { OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager(); int currentSnapshotNum = snapshotChainManager.getGlobalSnapshotChain().size(); - - synchronized (inFlightSnapshotCount) { - int currentCount = inFlightSnapshotCount.get(); - if (currentSnapshotNum + currentCount >= fsSnapshotMaxLimit) { - throw new OMException( + + AtomicReference exceptionRef = new AtomicReference<>(null); + inFlightSnapshotCount.updateAndGet(count -> { + if (currentSnapshotNum + count >= fsSnapshotMaxLimit) { + exceptionRef.set(new OMException( String.format("Snapshot limit of %d reached. Cannot create more snapshots. " + "Current snapshots: %d, In-flight creations: %d", - fsSnapshotMaxLimit, currentSnapshotNum, currentCount) + + fsSnapshotMaxLimit, currentSnapshotNum, count) + " If you already deleted some snapshots, " + "please wait for the background service to complete the cleanup.", - OMException.ResultCodes.TOO_MANY_SNAPSHOTS); + OMException.ResultCodes.TOO_MANY_SNAPSHOTS)); + return count; } - inFlightSnapshotCount.set(currentCount + 1); + return count + 1; + }); + if (exceptionRef.get() != null) { + throw exceptionRef.get(); } } From 1106481f17e6a14827ab6cb1f424d3ed2c48f139 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Sat, 26 Apr 2025 08:47:54 +0000 Subject: [PATCH 18/18] move snapshotLimitCheck to the place that wont have further error --- .../ozone/om/request/snapshot/OMSnapshotCreateRequest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java index 411cc58f8e4c..8efa517a750b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java @@ -104,8 +104,6 @@ public OMSnapshotCreateRequest(OMRequest omRequest) { @RequireSnapshotFeatureState(true) public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { final OMRequest omRequest = super.preExecute(ozoneManager); - // verify snapshot limit - ozoneManager.getOmSnapshotManager().snapshotLimitCheck(); // Verify name OmUtils.validateSnapshotName(snapshotName); // Updating the volumeName & bucketName in case the bucket is a linked bucket. We need to do this before a @@ -122,6 +120,8 @@ public OMRequest preExecute(OzoneManager ozoneManager) throws IOException { "Only bucket owners and Ozone admins can create snapshots", OMException.ResultCodes.PERMISSION_DENIED); } + // verify snapshot limit + ozoneManager.getOmSnapshotManager().snapshotLimitCheck(); CreateSnapshotRequest.Builder createSnapshotRequest = omRequest.getCreateSnapshotRequest().toBuilder() .setSnapshotId(toProtobuf(UUID.randomUUID())) .setVolumeName(volumeName)