From ae218781fb59d6c97c384a4cbf6457be7d15f2b2 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sat, 31 May 2025 12:50:23 -0400 Subject: [PATCH 01/35] HDDS-13159. Refactor KeyManagerImpl for getting deleted subdirectories and deleted subFiles Change-Id: Ic1fc709b3963cde14c2a7fb64b687322a29e642a --- .../hadoop/ozone/om/KeyManagerImpl.java | 102 +++++------------- .../ozone/om/request/file/OMFileRequest.java | 16 +++ 2 files changed, 43 insertions(+), 75 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index b399d6bb9ceb..138e640dcda4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -160,6 +160,7 @@ import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils; import org.apache.hadoop.ozone.om.helpers.OzoneFileStatus; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.WithParentObjectId; import org.apache.hadoop.ozone.om.request.OMClientRequest; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.request.util.OMMultipartUploadUtils; @@ -2196,101 +2197,52 @@ private void slimLocationVersion(OmKeyInfo... keyInfos) { @Override public DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) throws IOException { - String seekDirInDB = metadataManager.getOzonePathKey(volumeId, bucketId, - parentInfo.getObjectID(), ""); - long countEntries = 0; - - Table dirTable = metadataManager.getDirectoryTable(); - try (TableIterator> - iterator = dirTable.iterator(seekDirInDB)) { - return gatherSubDirsWithIterator(parentInfo, countEntries, iterator, remainingBufLimit); - } - - } - - private DeleteKeysResult gatherSubDirsWithIterator(OmKeyInfo parentInfo, - long countEntries, - TableIterator> iterator, long remainingBufLimit) - throws IOException { - List directories = new ArrayList<>(); - long consumedSize = 0; - boolean processedSubDirs = false; - - while (iterator.hasNext() && remainingBufLimit > 0) { - Table.KeyValue entry = iterator.next(); - OmDirectoryInfo dirInfo = entry.getValue(); - long objectSerializedSize = entry.getRawSize(); - if (!OMFileRequest.isImmediateChild(dirInfo.getParentObjectID(), - parentInfo.getObjectID())) { - processedSubDirs = true; - break; - } - if (!metadataManager.getDirectoryTable().isExist(entry.getKey())) { - continue; - } - if (remainingBufLimit - objectSerializedSize < 0) { - break; - } - String dirName = OMFileRequest.getAbsolutePath(parentInfo.getKeyName(), - dirInfo.getName()); - OmKeyInfo omKeyInfo = OMFileRequest.getOmKeyInfo( - parentInfo.getVolumeName(), parentInfo.getBucketName(), dirInfo, - dirName); - directories.add(omKeyInfo); - countEntries++; - remainingBufLimit -= objectSerializedSize; - consumedSize += objectSerializedSize; - } - - processedSubDirs = processedSubDirs || (!iterator.hasNext()); - - return new DeleteKeysResult(directories, consumedSize, processedSubDirs); + return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getDirectoryTable(), + omDirectoryInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, omDirectoryInfo), remainingBufLimit); } - @Override - public DeleteKeysResult getPendingDeletionSubFiles(long volumeId, - long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) - throws IOException { - List files = new ArrayList<>(); + private DeleteKeysResult gatherSubPathsWithIterator( + long volumeId, long bucketId, OmKeyInfo parentInfo, + Table table, Function deleteKeyTransformer, + long remainingBufLimit) throws IOException { + List keyInfos = new ArrayList<>(); String seekFileInDB = metadataManager.getOzonePathKey(volumeId, bucketId, parentInfo.getObjectID(), ""); long consumedSize = 0; - boolean processedSubFiles = false; - - Table fileTable = metadataManager.getFileTable(); - try (TableIterator> - iterator = fileTable.iterator(seekFileInDB)) { - + boolean processedSubPaths = false; + try (TableIterator> iterator = table.iterator(seekFileInDB)) { while (iterator.hasNext() && remainingBufLimit > 0) { - Table.KeyValue entry = iterator.next(); - OmKeyInfo fileInfo = entry.getValue(); + Table.KeyValue entry = iterator.next(); + T withParentObjectId = entry.getValue(); long objectSerializedSize = entry.getRawSize(); - if (!OMFileRequest.isImmediateChild(fileInfo.getParentObjectID(), + if (!OMFileRequest.isImmediateChild(withParentObjectId.getParentObjectID(), parentInfo.getObjectID())) { - processedSubFiles = true; + processedSubPaths = true; break; } - if (!metadataManager.getFileTable().isExist(entry.getKey())) { + if (!table.isExist(entry.getKey())) { continue; } if (remainingBufLimit - objectSerializedSize < 0) { break; } - fileInfo.setFileName(fileInfo.getKeyName()); - String fullKeyPath = OMFileRequest.getAbsolutePath( - parentInfo.getKeyName(), fileInfo.getKeyName()); - fileInfo.setKeyName(fullKeyPath); - - files.add(fileInfo); + OmKeyInfo keyInfo = deleteKeyTransformer.apply(withParentObjectId); + keyInfos.add(keyInfo); remainingBufLimit -= objectSerializedSize; consumedSize += objectSerializedSize; } - processedSubFiles = processedSubFiles || (!iterator.hasNext()); + processedSubPaths = processedSubPaths || (!iterator.hasNext()); + return new DeleteKeysResult(keyInfos, consumedSize, processedSubPaths); } + } - return new DeleteKeysResult(files, consumedSize, processedSubFiles); + @Override + public DeleteKeysResult getPendingDeletionSubFiles(long volumeId, + long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) + throws IOException { + return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getFileTable(), + keyInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, keyInfo), + remainingBufLimit); } public boolean isBucketFSOptimized(String volName, String buckName) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileRequest.java index f8058bd7a897..75ec1d5b7277 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/file/OMFileRequest.java @@ -720,6 +720,22 @@ public static OzoneFileStatus getOMKeyInfoIfExists( return null; } + public static OmKeyInfo getKeyInfoWithFullPath(OmKeyInfo parentInfo, OmDirectoryInfo directoryInfo) { + String dirName = OMFileRequest.getAbsolutePath(parentInfo.getKeyName(), + directoryInfo.getName()); + return OMFileRequest.getOmKeyInfo( + parentInfo.getVolumeName(), parentInfo.getBucketName(), directoryInfo, + dirName); + } + + public static OmKeyInfo getKeyInfoWithFullPath(OmKeyInfo parentInfo, OmKeyInfo omKeyInfo) { + omKeyInfo.setFileName(omKeyInfo.getKeyName()); + String fullKeyPath = OMFileRequest.getAbsolutePath( + parentInfo.getKeyName(), omKeyInfo.getKeyName()); + omKeyInfo.setKeyName(fullKeyPath); + return omKeyInfo; + } + /** * Prepare OmKeyInfo from OmDirectoryInfo. * From cd0157b3b782ef30e6397bc84125fcd426ea0e07 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sat, 31 May 2025 12:56:03 -0400 Subject: [PATCH 02/35] HDDS-13159. Static Import KeyValue Change-Id: I47b24dfc3b5afa3cefbdc85ac7b3e4a9b8c94869 --- .../hadoop/ozone/om/KeyManagerImpl.java | 51 ++++++++++--------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index 138e640dcda4..da080be68cac 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -131,6 +131,7 @@ import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.hdds.utils.db.StringCodec; import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; @@ -735,7 +736,7 @@ public ListKeysResult listKeys(String volumeName, String bucketName, @Override public PendingKeysDeletion getPendingDeletionKeys( - final CheckedFunction, Boolean, IOException> filter, final int count) + final CheckedFunction, Boolean, IOException> filter, final int count) throws IOException { return getPendingDeletionKeys(null, null, null, filter, count); } @@ -743,13 +744,13 @@ public PendingKeysDeletion getPendingDeletionKeys( @Override public PendingKeysDeletion getPendingDeletionKeys( String volume, String bucket, String startKey, - CheckedFunction, Boolean, IOException> filter, + CheckedFunction, Boolean, IOException> filter, int count) throws IOException { List keyBlocksList = Lists.newArrayList(); Map keysToModify = new HashMap<>(); // Bucket prefix would be empty if volume is empty i.e. either null or "". Optional bucketPrefix = getBucketPrefix(volume, bucket, false); - try (TableIterator> + try (TableIterator> delKeyIter = metadataManager.getDeletedTable().iterator(bucketPrefix.orElse(""))) { /* Seeking to the start key if it not null. The next key picked up would be ensured to start with the bucket @@ -761,7 +762,7 @@ public PendingKeysDeletion getPendingDeletionKeys( int currentCount = 0; while (delKeyIter.hasNext() && currentCount < count) { RepeatedOmKeyInfo notReclaimableKeyInfo = new RepeatedOmKeyInfo(); - Table.KeyValue kv = delKeyIter.next(); + KeyValue kv = delKeyIter.next(); if (kv != null) { List blockGroupList = Lists.newArrayList(); // Multiple keys with the same path can be queued in one DB entry @@ -796,12 +797,12 @@ public PendingKeysDeletion getPendingDeletionKeys( return new PendingKeysDeletion(keyBlocksList, keysToModify); } - private List> getTableEntries(String startKey, - TableIterator> tableIterator, + private List> getTableEntries(String startKey, + TableIterator> tableIterator, Function valueFunction, - CheckedFunction, Boolean, IOException> filter, + CheckedFunction, Boolean, IOException> filter, int size) throws IOException { - List> entries = new ArrayList<>(); + List> entries = new ArrayList<>(); /* Seek to the start key if it's not null. The next key in queue is ensured to start with the bucket prefix, {@link org.apache.hadoop.hdds.utils.db.Table#iterator(bucketPrefix)} would ensure this. */ @@ -812,7 +813,7 @@ private List> getTableEntries(String startKey, } int currentCount = 0; while (tableIterator.hasNext() && currentCount < size) { - Table.KeyValue kv = tableIterator.next(); + KeyValue kv = tableIterator.next(); if (kv != null && filter.apply(kv)) { entries.add(Table.newKeyValue(kv.getKey(), valueFunction.apply(kv.getValue()))); currentCount++; @@ -834,11 +835,11 @@ private Optional getBucketPrefix(String volumeName, String bucketName, b } @Override - public List> getRenamesKeyEntries( + public List> getRenamesKeyEntries( String volume, String bucket, String startKey, - CheckedFunction, Boolean, IOException> filter, int size) throws IOException { + CheckedFunction, Boolean, IOException> filter, int size) throws IOException { Optional bucketPrefix = getBucketPrefix(volume, bucket, false); - try (TableIterator> + try (TableIterator> renamedKeyIter = metadataManager.getSnapshotRenamedTable().iterator(bucketPrefix.orElse(""))) { return getTableEntries(startKey, renamedKeyIter, Function.identity(), filter, size); } @@ -883,12 +884,12 @@ private CheckedFunction getPreviousSnapshotOzone } @Override - public List>> getDeletedKeyEntries( + public List>> getDeletedKeyEntries( String volume, String bucket, String startKey, - CheckedFunction, Boolean, IOException> filter, + CheckedFunction, Boolean, IOException> filter, int size) throws IOException { Optional bucketPrefix = getBucketPrefix(volume, bucket, false); - try (TableIterator> + try (TableIterator> delKeyIter = metadataManager.getDeletedTable().iterator(bucketPrefix.orElse(""))) { return getTableEntries(startKey, delKeyIter, RepeatedOmKeyInfo::cloneOmKeyInfoList, filter, size); } @@ -1538,10 +1539,10 @@ private OmKeyInfo createFakeDirIfShould(String volume, String bucket, } } - try (TableIterator> + try (TableIterator> keyTblItr = keyTable.iterator(targetKey)) { while (keyTblItr.hasNext()) { - Table.KeyValue keyValue = keyTblItr.next(); + KeyValue keyValue = keyTblItr.next(); if (keyValue != null) { String key = keyValue.getKey(); // HDDS-7871: RocksIterator#seek() may position at the key @@ -1852,7 +1853,7 @@ public List listStatus(OmKeyArgs args, boolean recursive, String keyArgs = OzoneFSUtils.addTrailingSlashIfNeeded( metadataManager.getOzoneKey(volumeName, bucketName, keyName)); - TableIterator> iterator; + TableIterator> iterator; Table keyTable; metadataManager.getLock().acquireReadLock(BUCKET_LOCK, volumeName, bucketName); @@ -1909,12 +1910,12 @@ public List listStatus(OmKeyArgs args, boolean recursive, return fileStatusList; } - private TableIterator> + private TableIterator> getIteratorForKeyInTableCache( boolean recursive, String startKey, String volumeName, String bucketName, TreeMap cacheKeyMap, String keyArgs, Table keyTable) throws IOException { - TableIterator> iterator; + TableIterator> iterator; Iterator, CacheValue>> cacheIter = keyTable.cacheIterator(); String startCacheKey = metadataManager.getOzoneKey(volumeName, bucketName, startKey); @@ -1932,12 +1933,12 @@ private void findKeyInDbWithIterator(boolean recursive, String startKey, TreeMap cacheKeyMap, String keyArgs, Table keyTable, TableIterator> iterator) + ? extends KeyValue> iterator) throws IOException { // Then, find key in DB String seekKeyInDb = metadataManager.getOzoneKey(volumeName, bucketName, startKey); - Table.KeyValue entry = iterator.seek(seekKeyInDb); + KeyValue entry = iterator.seek(seekKeyInDb); int countEntries = 0; if (iterator.hasNext()) { if (entry.getKey().equals(keyArgs)) { @@ -2188,7 +2189,7 @@ private void slimLocationVersion(OmKeyInfo... keyInfos) { } @Override - public TableIterator> getDeletedDirEntries( + public TableIterator> getDeletedDirEntries( String volume, String bucket) throws IOException { Optional bucketPrefix = getBucketPrefix(volume, bucket, true); return metadataManager.getDeletedDirTable().iterator(bucketPrefix.orElse("")); @@ -2210,9 +2211,9 @@ private DeleteKeysResult gatherSubPathsWithIterat parentInfo.getObjectID(), ""); long consumedSize = 0; boolean processedSubPaths = false; - try (TableIterator> iterator = table.iterator(seekFileInDB)) { + try (TableIterator> iterator = table.iterator(seekFileInDB)) { while (iterator.hasNext() && remainingBufLimit > 0) { - Table.KeyValue entry = iterator.next(); + KeyValue entry = iterator.next(); T withParentObjectId = entry.getValue(); long objectSerializedSize = entry.getRawSize(); if (!OMFileRequest.isImmediateChild(withParentObjectId.getParentObjectID(), From 4c73e3a0853d1affdcb736599a44e4bff275a469 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 2 Jun 2025 23:36:22 -0400 Subject: [PATCH 03/35] HDDS-13034. Refactor Directory Deleting Service to use ReclaimableDirectoryFilter & ReclaimableKeyFilter Change-Id: Iffdda403cba914ddef2b75808cfbef1a72b9a2d3 --- .../apache/hadoop/ozone/om/OMConfigKeys.java | 8 - .../TestDirectoryDeletingServiceWithFSO.java | 8 +- ...napshotDeletingServiceIntegrationTest.java | 73 +++- .../TestSnapshotDirectoryCleaningService.java | 8 +- .../apache/hadoop/ozone/om/KeyManager.java | 22 +- .../hadoop/ozone/om/KeyManagerImpl.java | 68 ++- .../ozone/om/OMDBCheckpointServlet.java | 2 +- .../service/AbstractKeyDeletingService.java | 83 ++-- .../om/service/DirectoryDeletingService.java | 395 ++++++++++-------- .../ozone/om/service/KeyDeletingService.java | 8 +- .../om/service/TestKeyDeletingService.java | 18 +- 11 files changed, 398 insertions(+), 295 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 12a809043761..242ae03f0ccb 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -387,14 +387,6 @@ public final class OMConfigKeys { */ public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = false; - public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = - "ozone.snapshot.directory.service.interval"; - public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT - = "24h"; - public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT = - "ozone.snapshot.directory.service.timeout"; - public static final String - OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT = "300s"; public static final String OZONE_THREAD_NUMBER_DIR_DELETION = "ozone.thread.number.dir.deletion"; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index 4c6a21f1cbb5..a39aaf565ff7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -79,6 +79,8 @@ import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.hadoop.ozone.om.service.KeyDeletingService; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; @@ -592,8 +594,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() OmSnapshotManager omSnapshotManager = Mockito.spy(ozoneManager.getOmSnapshotManager()); when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> omSnapshotManager); DirectoryDeletingService service = Mockito.spy(new DirectoryDeletingService(1000, TimeUnit.MILLISECONDS, 1000, - ozoneManager, - cluster.getConf(), 1)); + ozoneManager, cluster.getConf(), 1, false)); service.shutdown(); final int initialSnapshotCount = (int) cluster.getOzoneManager().getMetadataManager().countRowsInTable(snapshotInfoTable); @@ -627,7 +628,8 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() } return null; }).when(service).optimizeDirDeletesAndSubmitRequest(anyLong(), anyLong(), - anyLong(), anyList(), anyList(), eq(null), anyLong(), anyLong(), Mockito.any(), any(), + anyLong(), anyList(), anyList(), eq(null), anyLong(), anyLong(), any(), + any(ReclaimableDirFilter.class), any(ReclaimableKeyFilter.class), any(), anyLong()); Mockito.doAnswer(i -> { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 3c7b35dd23ed..73fe9b007ac6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -127,8 +127,7 @@ public void setup() throws Exception { 500, TimeUnit.MILLISECONDS); conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT, - 10000, TimeUnit.MILLISECONDS); - conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, 500); + 10, TimeUnit.MILLISECONDS); conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 500); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 500, TimeUnit.MILLISECONDS); @@ -251,14 +250,18 @@ public void testSnapshotWithFSO() throws Exception { om.getMetadataManager().getDeletedDirTable(); Table renamedTable = om.getMetadataManager().getSnapshotRenamedTable(); - BucketArgs bucketArgs = new BucketArgs.Builder() .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) .build(); - OzoneBucket bucket2 = TestDataUtil.createBucket( client, VOLUME_NAME, bucketArgs, BUCKET_NAME_FSO); + assertTableRowCount(snapshotInfoTable, 0); + assertTableRowCount(deletedDirTable, 0); + assertTableRowCount(deletedTable, 0); + + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); // Create 10 keys for (int i = 1; i <= 10; i++) { TestDataUtil.createKey(bucket2, "key" + i, CONTENT.array()); @@ -382,8 +385,35 @@ public void testSnapshotWithFSO() throws Exception { SnapshotInfo deletedSnap = om.getMetadataManager() .getSnapshotInfoTable().get("/vol1/bucketfso/snap2"); + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + for (int i = 1; i <= 3; i++) { + String snapshotName = "snap" + i; + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(VOLUME_NAME, BUCKET_NAME_FSO, snapshotName); + LOG.info("SnapshotInfo for {} is {}", snapshotName, snap.getSnapshotId()); + return snap.isDeepCleaned() && snap.isDeepCleanedDeletedDir(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 2000, 100000000); + } + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); + + OmSnapshot snap2 = om.getOmSnapshotManager() + .getSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap2").get(); + //Child directories should have moved to deleted Directory table to deleted directory table of snap2 + assertTableRowCount(dirTable, 0); + assertTableRowCount(keyTable, 11); + assertTableRowCount(snap2.getMetadataManager().getDeletedDirTable(), 12); + assertTableRowCount(snap2.getMetadataManager().getDeletedTable(), 11); + client.getObjectStore().deleteSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap2"); + + assertTableRowCount(snapshotInfoTable, 2); // Delete 2 overwritten keys @@ -407,7 +437,28 @@ public void testSnapshotWithFSO() throws Exception { snap3.getMetadataManager().getDeletedTable(); assertTableRowCount(snapRenamedTable, 4); - assertTableRowCount(snapDeletedDirTable, 3); + assertTableRowCount(snapDeletedDirTable, 12); + // All the keys deleted before snapshot2 is moved to snap3 + assertTableRowCount(snapDeletedTable, 18); + + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + for (int snapshotIndex : new int[] {1, 3}) { + String snapshotName = "snap" + snapshotIndex; + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(VOLUME_NAME, BUCKET_NAME_FSO, snapshotName); + return snap.isDeepCleaned() && snap.isDeepCleanedDeletedDir(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 2000, 100000); + } + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); + + assertTableRowCount(snapRenamedTable, 4); + assertTableRowCount(snapDeletedDirTable, 12); // All the keys deleted before snapshot2 is moved to snap3 assertTableRowCount(snapDeletedTable, 15); @@ -418,11 +469,13 @@ public void testSnapshotWithFSO() throws Exception { // Delete Snapshot3 and check entries moved to active DB client.getObjectStore().deleteSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap3"); - + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); // Check entries moved to active DB assertTableRowCount(snapshotInfoTable, 1); assertTableRowCount(renamedTable, 4); - assertTableRowCount(deletedDirTable, 3); + assertTableRowCount(deletedDirTable, 12); + assertTableRowCount(deletedTable, 15); UncheckedAutoCloseableSupplier rcSnap1 = om.getOmSnapshotManager().getSnapshot( @@ -469,10 +522,12 @@ private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean throws InterruptedException, TimeoutException, IOException { OzoneManager ozoneManager = Mockito.spy(om); om.getKeyManager().getDirDeletingService().shutdown(); + KeyManager keyManager = Mockito.spy(om.getKeyManager()); + when(ozoneManager.getKeyManager()).thenReturn(keyManager); GenericTestUtils.waitFor(() -> om.getKeyManager().getDirDeletingService().getThreadCount() == 0, 1000, 100000); DirectoryDeletingService directoryDeletingService = Mockito.spy(new DirectoryDeletingService(10000, - TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1)); + TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1, false)); directoryDeletingService.shutdown(); GenericTestUtils.waitFor(() -> directoryDeletingService.getThreadCount() == 0, 1000, 100000); @@ -481,7 +536,7 @@ private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean GenericTestUtils.waitFor(dirDeletionWaitStarted::get, 1000, 100000); dirDeletionStarted.set(true); return i.callRealMethod(); - }).when(directoryDeletingService).getPendingDeletedDirInfo(); + }).when(keyManager).getDeletedDirEntries(); return directoryDeletingService; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index 8591c6d1e88b..f854448b1679 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -49,7 +49,7 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; +import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.api.AfterAll; @@ -76,7 +76,7 @@ public class TestSnapshotDirectoryCleaningService { @BeforeAll public static void init() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, 2500); + conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2500); conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 2500, TimeUnit.MILLISECONDS); @@ -140,8 +140,8 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { cluster.getOzoneManager().getMetadataManager().getDeletedTable(); Table snapshotInfoTable = cluster.getOzoneManager().getMetadataManager().getSnapshotInfoTable(); - SnapshotDirectoryCleaningService snapshotDirectoryCleaningService = - cluster.getOzoneManager().getKeyManager().getSnapshotDirectoryService(); + DirectoryDeletingService snapshotDirectoryCleaningService = + cluster.getOzoneManager().getKeyManager().getDirDeletingService(); /* DirTable /v/b/snapDir diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java index 0af075035704..7e76885c49bd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java @@ -41,7 +41,6 @@ import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket; import org.apache.ratis.util.function.CheckedFunction; @@ -274,7 +273,14 @@ OmMultipartUploadListParts listParts(String volumeName, String bucketName, void refresh(OmKeyInfo key) throws IOException; /** - * Returns an iterator for pending deleted directories. + * Returns an iterator for pending deleted directories all buckets. + */ + default TableIterator> getDeletedDirEntries() throws IOException { + return getDeletedDirEntries(null, null); + } + + /** + * Returns an iterator for pending deleted directories for volume and bucket. * @throws IOException */ TableIterator> getDeletedDirEntries( @@ -301,7 +307,8 @@ default List> getDeletedDirEntries(String volu * @throws IOException */ DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, - OmKeyInfo parentInfo, long remainingBufLimit) throws IOException; + OmKeyInfo parentInfo, CheckedFunction, Boolean, IOException> filter, + long remainingBufLimit) throws IOException; /** * Returns all sub files under the given parent directory. @@ -311,7 +318,8 @@ DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, * @throws IOException */ DeleteKeysResult getPendingDeletionSubFiles(long volumeId, - long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) + long bucketId, OmKeyInfo parentInfo, + CheckedFunction, Boolean, IOException> filter, long remainingBufLimit) throws IOException; /** @@ -344,12 +352,6 @@ DeleteKeysResult getPendingDeletionSubFiles(long volumeId, */ SnapshotDeletingService getSnapshotDeletingService(); - /** - * Returns the instance of Snapshot Directory service. - * @return Background service. - */ - SnapshotDirectoryCleaningService getSnapshotDirectoryService(); - /** * Returns the instance of CompactionService. * @return BackgroundService diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index da080be68cac..578afc630a1c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -58,10 +58,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_THREAD_NUMBER_DIR_DELETION; @@ -171,7 +167,6 @@ import org.apache.hadoop.ozone.om.service.MultipartUploadCleanupService; import org.apache.hadoop.ozone.om.service.OpenKeyCleanupService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PartKeyInfo; import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; @@ -214,7 +209,6 @@ public class KeyManagerImpl implements KeyManager { private BackgroundService openKeyCleanupService; private BackgroundService multipartUploadCleanupService; - private SnapshotDirectoryCleaningService snapshotDirectoryCleaningService; private DNSToSwitchMapping dnsToSwitchMapping; private CompactionService compactionService; @@ -292,7 +286,7 @@ public void start(OzoneConfiguration configuration) { dirDeletingService = new DirectoryDeletingService(dirDeleteInterval, TimeUnit.MILLISECONDS, serviceTimeout, ozoneManager, configuration, - dirDeletingServiceCorePoolSize); + dirDeletingServiceCorePoolSize, isSnapshotDeepCleaningEnabled); dirDeletingService.start(); } @@ -350,22 +344,6 @@ public void start(OzoneConfiguration configuration) { } } - if (isSnapshotDeepCleaningEnabled && snapshotDirectoryCleaningService == null && - ozoneManager.isFilesystemSnapshotEnabled()) { - long dirDeleteInterval = configuration.getTimeDuration( - OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, - OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT, - TimeUnit.MILLISECONDS); - long serviceTimeout = configuration.getTimeDuration( - OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT, - OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT, - TimeUnit.MILLISECONDS); - snapshotDirectoryCleaningService = new SnapshotDirectoryCleaningService( - dirDeleteInterval, TimeUnit.MILLISECONDS, serviceTimeout, - ozoneManager, scmClient.getBlockClient()); - snapshotDirectoryCleaningService.start(); - } - if (multipartUploadCleanupService == null) { long serviceInterval = configuration.getTimeDuration( OZONE_OM_MPU_CLEANUP_SERVICE_INTERVAL, @@ -443,10 +421,6 @@ public void stop() throws IOException { multipartUploadCleanupService.shutdown(); multipartUploadCleanupService = null; } - if (snapshotDirectoryCleaningService != null) { - snapshotDirectoryCleaningService.shutdown(); - snapshotDirectoryCleaningService = null; - } if (compactionService != null) { compactionService.shutdown(); compactionService = null; @@ -955,11 +929,6 @@ public SnapshotDeletingService getSnapshotDeletingService() { return snapshotDeletingService; } - @Override - public SnapshotDirectoryCleaningService getSnapshotDirectoryService() { - return snapshotDirectoryCleaningService; - } - @Override public CompactionService getCompactionService() { return compactionService; @@ -2197,14 +2166,19 @@ private void slimLocationVersion(OmKeyInfo... keyInfos) { @Override public DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, - OmKeyInfo parentInfo, long remainingBufLimit) throws IOException { + OmKeyInfo parentInfo, CheckedFunction, Boolean, IOException> filter, + long remainingBufLimit) throws IOException { return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getDirectoryTable(), - omDirectoryInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, omDirectoryInfo), remainingBufLimit); + kv -> Table.newKeyValue(metadataManager.getOzoneDeletePathKey(kv.getValue().getObjectID(), kv.getKey()), + OMFileRequest.getKeyInfoWithFullPath(parentInfo, kv.getValue())), + filter, remainingBufLimit); } private DeleteKeysResult gatherSubPathsWithIterator( long volumeId, long bucketId, OmKeyInfo parentInfo, - Table table, Function deleteKeyTransformer, + Table table, + CheckedFunction, KeyValue, IOException> deleteKeyTransformer, + CheckedFunction, Boolean, IOException> deleteKeyFilter, long remainingBufLimit) throws IOException { List keyInfos = new ArrayList<>(); String seekFileInDB = metadataManager.getOzonePathKey(volumeId, bucketId, @@ -2227,10 +2201,12 @@ private DeleteKeysResult gatherSubPathsWithIterat if (remainingBufLimit - objectSerializedSize < 0) { break; } - OmKeyInfo keyInfo = deleteKeyTransformer.apply(withParentObjectId); - keyInfos.add(keyInfo); - remainingBufLimit -= objectSerializedSize; - consumedSize += objectSerializedSize; + KeyValue keyInfo = deleteKeyTransformer.apply(entry); + if (deleteKeyFilter.apply(keyInfo)) { + keyInfos.add(keyInfo.getValue()); + remainingBufLimit -= objectSerializedSize; + consumedSize += objectSerializedSize; + } } processedSubPaths = processedSubPaths || (!iterator.hasNext()); return new DeleteKeysResult(keyInfos, consumedSize, processedSubPaths); @@ -2239,11 +2215,17 @@ private DeleteKeysResult gatherSubPathsWithIterat @Override public DeleteKeysResult getPendingDeletionSubFiles(long volumeId, - long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) + long bucketId, OmKeyInfo parentInfo, + CheckedFunction, Boolean, IOException> filter, long remainingBufLimit) throws IOException { - return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getFileTable(), - keyInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, keyInfo), - remainingBufLimit); + CheckedFunction, KeyValue, IOException> tranformer = kv -> { + OmKeyInfo keyInfo = OMFileRequest.getKeyInfoWithFullPath(parentInfo, kv.getValue()); + String deleteKey = metadataManager.getOzoneDeletePathKey(keyInfo.getObjectID(), + metadataManager.getOzoneKey(keyInfo.getVolumeName(), keyInfo.getBucketName(), keyInfo.getKeyName())); + return Table.newKeyValue(deleteKey, keyInfo); + }; + return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getFileTable(), tranformer, + filter, remainingBufLimit); } public boolean isBucketFSOptimized(String volName, String buckName) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 6117a7e373bf..bcff75fd0399 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -704,9 +704,9 @@ static class Lock extends BootstrapStateHandler.Lock { locks = Stream.of( om.getKeyManager().getDeletingService(), + om.getKeyManager().getDirDeletingService(), om.getKeyManager().getSnapshotSstFilteringService(), om.getKeyManager().getSnapshotDeletingService(), - om.getKeyManager().getSnapshotDirectoryService(), om.getMetadataManager().getStore().getRocksDBCheckpointDiffer() ) .filter(Objects::nonNull) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 536406111a96..ee699e16c31d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -28,11 +28,13 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -58,12 +60,14 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgeKeysRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.util.function.CheckedFunction; /** * Abstracts common code from KeyDeletingService and DirectoryDeletingService @@ -103,7 +107,7 @@ public AbstractKeyDeletingService(String serviceName, long interval, protected Pair processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, - String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException, InterruptedException { + String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { long startTime = Time.monotonicNow(); Pair purgeResult = Pair.of(0, false); @@ -143,7 +147,7 @@ protected Pair processKeyDeletes(List keyBlocksLis */ private Pair submitPurgeKeysRequest(List results, Map keysToModify, List renameEntriesToBeDeleted, - String snapTableKey, UUID expectedPreviousSnapshotId) throws InterruptedException { + String snapTableKey, UUID expectedPreviousSnapshotId) { List purgeKeys = new ArrayList<>(); // Put all keys to be purged in a list @@ -224,13 +228,13 @@ private Pair submitPurgeKeysRequest(List submitPurgeKeysRequest(List requests, - String snapTableKey, - UUID expectedPreviousSnapshotId) { + protected OMResponse submitPurgePaths(List requests, + String snapTableKey, UUID expectedPreviousSnapshotId) { OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); @@ -267,12 +270,13 @@ protected void submitPurgePaths(List requests, .setClientId(clientId.toString()) .build(); - // Submit Purge paths request to OM - try { - submitRequest(omRequest); - } catch (ServiceException e) { + // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. + try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { + return submitRequest(omRequest); + } catch (ServiceException | InterruptedException e) { LOG.error("PurgePaths request failed. Will retry at next run.", e); } + return null; } private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( @@ -305,10 +309,12 @@ private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( return purgePathsRequest.build(); } - protected PurgePathRequest prepareDeleteDirRequest( - OmKeyInfo pendingDeletedDirInfo, String delDirName, + protected Optional prepareDeleteDirRequest( + OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, List> subDirList, - KeyManager keyManager, long remainingBufLimit) throws IOException { + KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableFileFilter, + long remainingBufLimit) throws IOException { // step-0: Get one pending deleted directory if (LOG.isDebugEnabled()) { LOG.debug("Pending deleted dir name: {}", @@ -322,7 +328,7 @@ protected PurgePathRequest prepareDeleteDirRequest( // step-1: get all sub directories under the deletedDir DeleteKeysResult subDirDeleteResult = keyManager.getPendingDeletionSubDirs(volumeId, bucketId, - pendingDeletedDirInfo, remainingBufLimit); + pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); List subDirs = subDirDeleteResult.getKeysToDelete(); remainingBufLimit -= subDirDeleteResult.getConsumedSize(); @@ -337,9 +343,10 @@ protected PurgePathRequest prepareDeleteDirRequest( } // step-2: get all sub files under the deletedDir + // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. DeleteKeysResult subFileDeleteResult = keyManager.getPendingDeletionSubFiles(volumeId, bucketId, - pendingDeletedDirInfo, remainingBufLimit); + pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); List subFiles = subFileDeleteResult.getKeysToDelete(); if (LOG.isDebugEnabled()) { @@ -350,10 +357,13 @@ protected PurgePathRequest prepareDeleteDirRequest( // step-3: If both sub-dirs and sub-files are exhausted under a parent // directory, only then delete the parent. - String purgeDeletedDir = subDirDeleteResult.isProcessedKeys() && + String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && subFileDeleteResult.isProcessedKeys() ? delDirName : null; - return wrapPurgeRequest(volumeId, bucketId, - purgeDeletedDir, subFiles, subDirs); + if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { + return Optional.empty(); + } + return Optional.of(wrapPurgeRequest(volumeId, bucketId, + purgeDeletedDir, subFiles, subDirs)); } @SuppressWarnings("checkstyle:ParameterNumber") @@ -363,6 +373,8 @@ public void optimizeDirDeletesAndSubmitRequest( List purgePathRequestList, String snapTableKey, long startTime, long remainingBufLimit, KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableDirChecker, + CheckedFunction, Boolean, IOException> reclaimableFileChecker, UUID expectedPreviousSnapshotId, long rnCnt) { // Optimization to handle delete sub-dir and keys to remove quickly @@ -372,30 +384,31 @@ public void optimizeDirDeletesAndSubmitRequest( int consumedSize = 0; while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { try { - Pair stringOmKeyInfoPair - = allSubDirList.get(subDirRecursiveCnt); - PurgePathRequest request = prepareDeleteDirRequest( - stringOmKeyInfoPair.getValue(), - stringOmKeyInfoPair.getKey(), allSubDirList, keyManager, - remainingBufLimit); - consumedSize += request.getSerializedSize(); + Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); + Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), + stringOmKeyInfoPair.getValue())); + Optional request = prepareDeleteDirRequest( + stringOmKeyInfoPair.getValue(), stringOmKeyInfoPair.getKey(), subDirectoryReclaimable, allSubDirList, + keyManager, reclaimableFileChecker, remainingBufLimit); + if (!request.isPresent()) { + continue; + } + PurgePathRequest requestVal = request.get(); + consumedSize += requestVal.getSerializedSize(); remainingBufLimit -= consumedSize; - purgePathRequestList.add(request); + purgePathRequestList.add(requestVal); // Count up the purgeDeletedDir, subDirs and subFiles - if (request.getDeletedDir() != null - && !request.getDeletedDir().isEmpty()) { + if (requestVal.hasDeletedDir() && !StringUtils.isBlank(requestVal.getDeletedDir())) { subdirDelNum++; } - subDirNum += request.getMarkDeletedSubDirsCount(); - subFileNum += request.getDeletedSubFilesCount(); - subDirRecursiveCnt++; + subDirNum += requestVal.getMarkDeletedSubDirsCount(); + subFileNum += requestVal.getDeletedSubFilesCount(); } catch (IOException e) { LOG.error("Error while running delete directories and files " + "background task. Will retry at next run for subset.", e); break; } } - if (!purgePathRequestList.isEmpty()) { submitPurgePaths(purgePathRequestList, snapTableKey, expectedPreviousSnapshotId); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 7451032492ea..f0a1c1fed9e7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -18,15 +18,25 @@ package org.apache.hadoop.ozone.om.service; import com.google.common.annotations.VisibleForTesting; +import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; import java.util.List; -import java.util.Objects; +import java.util.Map; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -37,15 +47,20 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.util.Time; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; @@ -76,31 +91,33 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // Using multi thread for DirDeletion. Multiple threads would read // from parent directory info from deleted directory table concurrently // and send deletion requests. - private final int dirDeletingCorePoolSize; private int ratisByteLimit; private final AtomicBoolean suspended; - private AtomicBoolean isRunningOnAOS; - - private final DeletedDirSupplier deletedDirSupplier; - - private AtomicInteger taskCount = new AtomicInteger(0); + private final AtomicBoolean isRunningOnAOS; + private final SnapshotChainManager snapshotChainManager; + private final boolean deepCleanSnapshots; + private final ExecutorService deletionThreadPool; + private final int numberOfParallelThreadsPerStore; public DirectoryDeletingService(long interval, TimeUnit unit, long serviceTimeout, OzoneManager ozoneManager, - OzoneConfiguration configuration, int dirDeletingServiceCorePoolSize) { + OzoneConfiguration configuration, int dirDeletingServiceCorePoolSize, boolean deepCleanSnapshots) { super(DirectoryDeletingService.class.getSimpleName(), interval, unit, dirDeletingServiceCorePoolSize, serviceTimeout, ozoneManager, null); int limit = (int) configuration.getStorageSize( OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT, StorageUnit.BYTES); + this.numberOfParallelThreadsPerStore = dirDeletingServiceCorePoolSize; + this.deletionThreadPool = new ThreadPoolExecutor(0, numberOfParallelThreadsPerStore, interval, unit, + new LinkedBlockingDeque<>(Integer.MAX_VALUE)); + // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); this.suspended = new AtomicBoolean(false); this.isRunningOnAOS = new AtomicBoolean(false); - this.dirDeletingCorePoolSize = dirDeletingServiceCorePoolSize; - deletedDirSupplier = new DeletedDirSupplier(); - taskCount.set(0); + this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); + this.deepCleanSnapshots = deepCleanSnapshots; } private boolean shouldRun() { @@ -115,10 +132,6 @@ public boolean isRunningOnAOS() { return isRunningOnAOS.get(); } - public AtomicInteger getTaskCount() { - return taskCount; - } - /** * Suspend the service. */ @@ -142,20 +155,19 @@ public void setRatisByteLimit(int ratisByteLimit) { @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); - if (taskCount.get() > 0) { - LOG.info("{} Directory deleting task(s) already in progress.", - taskCount.get()); - return queue; - } - try { - deletedDirSupplier.reInitItr(); - } catch (IOException ex) { - LOG.error("Unable to get the iterator.", ex); - return queue; - } - taskCount.set(dirDeletingCorePoolSize); - for (int i = 0; i < dirDeletingCorePoolSize; i++) { - queue.add(new DirectoryDeletingService.DirDeletingTask(this)); + queue.add(new DirDeletingTask(this, null)); + if (deepCleanSnapshots) { + Iterator iterator = null; + try { + iterator = snapshotChainManager.iterator(true); + } catch (IOException e) { + LOG.error("Error while initializing snapshot chain iterator."); + return queue; + } + while (iterator.hasNext()) { + UUID snapshotId = iterator.next(); + queue.add(new DirDeletingTask(this, snapshotId)); + } } return queue; } @@ -163,39 +175,35 @@ public BackgroundTaskQueue getTasks() { @Override public void shutdown() { super.shutdown(); - deletedDirSupplier.closeItr(); } - private final class DeletedDirSupplier { + private final class DeletedDirSupplier implements Closeable { private TableIterator> deleteTableIterator; - private synchronized Table.KeyValue get() - throws IOException { + private DeletedDirSupplier(TableIterator> deleteTableIterator) { + this.deleteTableIterator = deleteTableIterator; + } + + private synchronized Table.KeyValue get() { if (deleteTableIterator.hasNext()) { return deleteTableIterator.next(); } return null; } - private synchronized void closeItr() { + public void close() { IOUtils.closeQuietly(deleteTableIterator); - deleteTableIterator = null; - } - - private synchronized void reInitItr() throws IOException { - closeItr(); - deleteTableIterator = - getOzoneManager().getMetadataManager().getDeletedDirTable() - .iterator(); } } private final class DirDeletingTask implements BackgroundTask { private final DirectoryDeletingService directoryDeletingService; + private final UUID snapshotId; - private DirDeletingTask(DirectoryDeletingService service) { + private DirDeletingTask(DirectoryDeletingService service, UUID snapshotId) { this.directoryDeletingService = service; + this.snapshotId = snapshotId; } @Override @@ -203,147 +211,192 @@ public int getPriority() { return 0; } - @Override - public BackgroundTaskResult call() { - try { - if (shouldRun()) { - isRunningOnAOS.set(true); - long rnCnt = getRunCount().incrementAndGet(); - if (LOG.isDebugEnabled()) { - LOG.debug("Running DirectoryDeletingService. {}", rnCnt); - } - long dirNum = 0L; - long subDirNum = 0L; - long subFileNum = 0L; - long remainingBufLimit = ratisByteLimit; - int consumedSize = 0; - List purgePathRequestList = new ArrayList<>(); - List> allSubDirList = - new ArrayList<>(); - - Table.KeyValue pendingDeletedDirInfo; - // This is to avoid race condition b/w purge request and snapshot chain updation. For AOS taking the global - // snapshotId since AOS could process multiple buckets in one iteration. - try { - UUID expectedPreviousSnapshotId = - ((OmMetadataManagerImpl) getOzoneManager().getMetadataManager()).getSnapshotChainManager() - .getLatestGlobalSnapshotId(); - - long startTime = Time.monotonicNow(); - while (remainingBufLimit > 0) { - pendingDeletedDirInfo = getPendingDeletedDirInfo(); - if (pendingDeletedDirInfo == null) { - break; - } - // Do not reclaim if the directory is still being referenced by - // the previous snapshot. - if (previousSnapshotHasDir(pendingDeletedDirInfo)) { - continue; - } + private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequestUpdatingExclusiveSize( + Map exclusiveSizeMap, Map exclusiveReplicatedSizeMap, UUID snapshotID) { + OzoneManagerProtocolProtos.SnapshotSize snapshotSize = OzoneManagerProtocolProtos.SnapshotSize.newBuilder() + .setExclusiveSize( + exclusiveSizeMap.getOrDefault(snapshotID, 0L)) + .setExclusiveReplicatedSize( + exclusiveReplicatedSizeMap.getOrDefault( + snapshotID, 0L)) + .build(); + + return OzoneManagerProtocolProtos.SetSnapshotPropertyRequest.newBuilder() + .setSnapshotKey(snapshotChainManager.getTableKey(snapshotID)) + .setSnapshotSizeDeltaFromDirDeepCleaning(snapshotSize) + .build(); + } - PurgePathRequest request = prepareDeleteDirRequest( - pendingDeletedDirInfo.getValue(), - pendingDeletedDirInfo.getKey(), allSubDirList, - getOzoneManager().getKeyManager(), remainingBufLimit); + /** + * + * @param currentSnapshotInfo if null, deleted directories in AOS should be processed. + * @param keyManager KeyManager of the underlying store. + */ + private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, + long remainingBufLimit, long rnCnt) throws IOException, ExecutionException, InterruptedException { + String volume, bucket, snapshotTableKey; + if (currentSnapshotInfo != null) { + volume = currentSnapshotInfo.getVolumeName(); + bucket = currentSnapshotInfo.getBucketName(); + snapshotTableKey = currentSnapshotInfo.getTableKey(); + } else { + volume = null; bucket = null; snapshotTableKey = null; + } - consumedSize += request.getSerializedSize(); - remainingBufLimit -= consumedSize; - purgePathRequestList.add(request); - // Count up the purgeDeletedDir, subDirs and subFiles - if (request.getDeletedDir() != null && !request.getDeletedDir() - .isEmpty()) { - dirNum++; - } - subDirNum += request.getMarkDeletedSubDirsCount(); - subFileNum += request.getDeletedSubFilesCount(); + OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); + IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); + + try (DeletedDirSupplier dirSupplier = new DeletedDirSupplier(currentSnapshotInfo == null ? + keyManager.getDeletedDirEntries() : keyManager.getDeletedDirEntries(volume, bucket)); + ReclaimableDirFilter reclaimableDirFilter = new ReclaimableDirFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock); + ReclaimableKeyFilter reclaimableFileFilter = new ReclaimableKeyFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock)) { + // This is to avoid race condition b/w purge request and snapshot chain update. For AOS taking the global + // snapshotId since AOS could process multiple buckets in one iteration. While using path + // previous snapshotId for a snapshot since it would process only one bucket. + UUID expectedPreviousSnapshotId = currentSnapshotInfo == null ? + snapshotChainManager.getLatestGlobalSnapshotId() : + SnapshotUtils.getPreviousSnapshotId(currentSnapshotInfo, snapshotChainManager); + CompletableFuture processedAllDeletedDirs = CompletableFuture.completedFuture(true); + for (int i = 0; i < numberOfParallelThreadsPerStore; i++) { + CompletableFuture future = new CompletableFuture<>(); + deletionThreadPool.submit(() -> { + try { + boolean processedAll = processDeletedDirectories(snapshotTableKey, dirSupplier, remainingBufLimit, + reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, rnCnt); + future.complete(processedAll); + } catch (Throwable e) { + future.complete(false); } - - optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, - subFileNum, allSubDirList, purgePathRequestList, null, - startTime, remainingBufLimit, - getOzoneManager().getKeyManager(), expectedPreviousSnapshotId, - rnCnt); - - } catch (IOException e) { - LOG.error( - "Error while running delete directories and files " + "background task. Will retry at next run.", - e); + }); + processedAllDeletedDirs = future.thenCombine(future, (a, b) -> a && b); + } + // If AOS or all directories have been processed for snapshot, update snapshot size delta and deep clean flag + if (currentSnapshotInfo == null || processedAllDeletedDirs.get()) { + List setSnapshotPropertyRequests = new ArrayList<>(); + Map exclusiveReplicatedSizeMap = reclaimableFileFilter.getExclusiveReplicatedSizeMap(); + Map exclusiveSizeMap = reclaimableFileFilter.getExclusiveSizeMap(); + List previousPathSnapshotsInChain = + Stream.of(exclusiveSizeMap.keySet(), exclusiveReplicatedSizeMap.keySet()) + .flatMap(Collection::stream).distinct().collect(Collectors.toList()); + for (UUID snapshot : previousPathSnapshotsInChain) { + setSnapshotPropertyRequests.add(getSetSnapshotRequestUpdatingExclusiveSize(exclusiveSizeMap, + exclusiveReplicatedSizeMap, snapshot)); } - isRunningOnAOS.set(false); - synchronized (directoryDeletingService) { - this.directoryDeletingService.notify(); + + // Updating directory deep clean flag of snapshot. + if (currentSnapshotInfo != null) { + setSnapshotPropertyRequests.add(OzoneManagerProtocolProtos.SetSnapshotPropertyRequest.newBuilder() + .setSnapshotKey(snapshotTableKey) + .setDeepCleanedDeletedDir(true) + .build()); } + submitSetSnapshotRequests(setSnapshotPropertyRequests); } - } finally { - taskCount.getAndDecrement(); } - // place holder by returning empty results of this call back. - return BackgroundTaskResult.EmptyTaskResult.newResult(); } - private boolean previousSnapshotHasDir( - KeyValue pendingDeletedDirInfo) throws IOException { - String key = pendingDeletedDirInfo.getKey(); - OmKeyInfo deletedDirInfo = pendingDeletedDirInfo.getValue(); - OmSnapshotManager omSnapshotManager = - getOzoneManager().getOmSnapshotManager(); - OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) - getOzoneManager().getMetadataManager(); - SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); - if (previousSnapshotInfo == null) { + private boolean processDeletedDirectories(String snapshotTableKey, + DeletedDirSupplier dirSupplier, long remainingBufLimit, ReclaimableDirFilter reclaimableDirFilter, + ReclaimableKeyFilter reclaimableFileFilter, UUID expectedPreviousSnapshotId, long runCount) { + try { + long startTime = Time.monotonicNow(); + long dirNum = 0L; + long subDirNum = 0L; + long subFileNum = 0L; + int consumedSize = 0; + List purgePathRequestList = new ArrayList<>(); + List> allSubDirList = new ArrayList<>(); + while (remainingBufLimit > 0) { + KeyValue pendingDeletedDirInfo = dirSupplier.get(); + if (pendingDeletedDirInfo == null) { + break; + } + boolean isDirReclaimable = reclaimableDirFilter.apply(pendingDeletedDirInfo); + Optional request = prepareDeleteDirRequest( + pendingDeletedDirInfo.getValue(), + pendingDeletedDirInfo.getKey(), isDirReclaimable, allSubDirList, + getOzoneManager().getKeyManager(), reclaimableFileFilter, remainingBufLimit); + if (!request.isPresent()) { + continue; + } + PurgePathRequest purgePathRequest = request.get(); + consumedSize += purgePathRequest.getSerializedSize(); + remainingBufLimit -= consumedSize; + purgePathRequestList.add(purgePathRequest); + // Count up the purgeDeletedDir, subDirs and subFiles + if (purgePathRequest.hasDeletedDir() && !StringUtils.isBlank(purgePathRequest.getDeletedDir())) { + dirNum++; + } + subDirNum += purgePathRequest.getMarkDeletedSubDirsCount(); + subFileNum += purgePathRequest.getDeletedSubFilesCount(); + } + + optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, + subFileNum, allSubDirList, purgePathRequestList, snapshotTableKey, + startTime, remainingBufLimit, getOzoneManager().getKeyManager(), + reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, + runCount); + + return purgePathRequestList.isEmpty(); + } catch (IOException e) { + LOG.error("Error while running delete directories for store : {} and files background task. " + + "Will retry at next run. ", snapshotTableKey, e); return false; } - // previous snapshot is not active or it has not been flushed to disk then don't process the key in this - // iteration. - if (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || - !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), - previousSnapshotInfo)) { - return true; - } - try (UncheckedAutoCloseableSupplier rcLatestSnapshot = - omSnapshotManager.getSnapshot( - deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), - previousSnapshotInfo.getName())) { + } - if (rcLatestSnapshot != null) { - String dbRenameKey = metadataManager - .getRenameKey(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), deletedDirInfo.getObjectID()); - Table prevDirTable = - rcLatestSnapshot.get().getMetadataManager().getDirectoryTable(); - Table prevDeletedDirTable = - rcLatestSnapshot.get().getMetadataManager().getDeletedDirTable(); - OmKeyInfo prevDeletedDirInfo = prevDeletedDirTable.get(key); - if (prevDeletedDirInfo != null) { - return true; + @Override + public BackgroundTaskResult call() { + // Check if this is the Leader OM. If not leader, no need to execute this + // task. + if (shouldRun()) { + final long run = getRunCount().incrementAndGet(); + if (snapshotId == null) { + LOG.debug("Running DirectoryDeletingService for active object store, {}", run); + isRunningOnAOS.set(true); + } else { + LOG.debug("Running DirectoryDeletingService for snapshot : {}, {}", snapshotId, run); + } + OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); + SnapshotInfo snapInfo = null; + try { + snapInfo = snapshotId == null ? null : + SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); + if (snapInfo != null) { + if (snapInfo.isDeepCleanedDeletedDir()) { + LOG.info("Snapshot {} has already been deep cleaned directory. Skipping the snapshot in this iteration.", + snapInfo.getSnapshotId()); + return BackgroundTaskResult.EmptyTaskResult.newResult(); + } + if (!OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), snapInfo)) { + LOG.info("Skipping snapshot processing since changes to snapshot {} have not been flushed to disk", + snapInfo); + return BackgroundTaskResult.EmptyTaskResult.newResult(); + } + } + try (UncheckedAutoCloseableSupplier omSnapshot = snapInfo == null ? null : + omSnapshotManager.getActiveSnapshot(snapInfo.getVolumeName(), snapInfo.getBucketName(), + snapInfo.getName())) { + KeyManager keyManager = snapInfo == null ? getOzoneManager().getKeyManager() + : omSnapshot.get().getKeyManager(); + processDeletedDirsForStore(snapInfo, keyManager, ratisByteLimit, run); + } + } catch (IOException | ExecutionException | InterruptedException e) { + LOG.error("Error while running delete files background task for store {}. Will retry at next run.", + snapInfo, e); + } finally { + if (snapshotId == null) { + isRunningOnAOS.set(false); + synchronized (directoryDeletingService) { + this.directoryDeletingService.notify(); + } } - String prevDirTableDBKey = metadataManager.getSnapshotRenamedTable() - .get(dbRenameKey); - // In OMKeyDeleteResponseWithFSO OzonePathKey is converted to - // OzoneDeletePathKey. Changing it back to check the previous DirTable - String prevDbKey = prevDirTableDBKey == null ? - metadataManager.getOzoneDeletePathDirKey(key) : prevDirTableDBKey; - OmDirectoryInfo prevDirInfo = prevDirTable.get(prevDbKey); - //Checking if the previous snapshot in the chain hasn't changed while checking if the deleted directory is - // present in the previous snapshot. If the chain has changed, the deleted directory could have been moved - // to the newly created snapshot. - SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); - return (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), - Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId))) || (prevDirInfo != null && - prevDirInfo.getObjectID() == deletedDirInfo.getObjectID()); } } - - return false; + // By design, no one cares about the results of this call back. + return BackgroundTaskResult.EmptyTaskResult.newResult(); } } - - public KeyValue getPendingDeletedDirInfo() - throws IOException { - return deletedDirSupplier.get(); - } - } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index d89726fd35ef..60b2ab55efd7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -204,7 +204,7 @@ private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequ * @param keyManager KeyManager of the underlying store. */ private void processDeletedKeysForStore(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, - int remainNum) throws IOException, InterruptedException { + int remainNum) throws IOException { String volume = null, bucket = null, snapshotTableKey = null; if (currentSnapshotInfo != null) { volume = currentSnapshotInfo.getVolumeName(); @@ -323,8 +323,8 @@ public BackgroundTaskResult call() { SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); if (snapInfo != null) { if (snapInfo.isDeepCleaned()) { - LOG.info("Snapshot {} has already been deep cleaned. Skipping the snapshot in this iteration.", - snapInfo.getSnapshotId()); + LOG.info("Snapshot {} has already been deep cleaned. Skipping the snapshot in this iteration. " + + "Snapshot name : {}", snapInfo.getSnapshotId(), snapInfo.getName()); return EmptyTaskResult.newResult(); } if (!OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), snapInfo)) { @@ -345,7 +345,7 @@ public BackgroundTaskResult call() { : omSnapshot.get().getKeyManager(); processDeletedKeysForStore(snapInfo, keyManager, remainNum); } - } catch (IOException | InterruptedException e) { + } catch (IOException e) { LOG.error("Error while running delete files background task for store {}. Will retry at next run.", snapInfo, e); } finally { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 68d9306584ae..42e76377e14d 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -21,8 +21,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -140,7 +140,7 @@ class TestKeyDeletingService extends OzoneTestBase { private KeyManager keyManager; private OMMetadataManager metadataManager; private KeyDeletingService keyDeletingService; - private SnapshotDirectoryCleaningService snapshotDirectoryCleaningService; + private DirectoryDeletingService directoryDeletingService; private ScmBlockLocationTestingClient scmBlockTestingClient; @BeforeAll @@ -156,7 +156,7 @@ private void createConfig(File testDir) { 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); - conf.setTimeDuration(OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, + conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1, TimeUnit.SECONDS); @@ -170,7 +170,7 @@ private void createSubject() throws Exception { OmTestManagers omTestManagers = new OmTestManagers(conf, scmBlockTestingClient, null); keyManager = omTestManagers.getKeyManager(); keyDeletingService = keyManager.getDeletingService(); - snapshotDirectoryCleaningService = keyManager.getSnapshotDirectoryService(); + directoryDeletingService = keyManager.getDirDeletingService(); writeClient = omTestManagers.getWriteClient(); om = omTestManagers.getOzoneManager(); metadataManager = omTestManagers.getMetadataManager(); @@ -524,6 +524,7 @@ void testSnapshotDeepClean() throws Exception { // Suspend KeyDeletingService keyDeletingService.suspend(); + directoryDeletingService.suspend(); final long initialSnapshotCount = metadataManager.countRowsInTable(snapshotInfoTable); final long initialKeyCount = metadataManager.countRowsInTable(keyTable); @@ -571,6 +572,7 @@ void testSnapshotDeepClean() throws Exception { checkSnapDeepCleanStatus(snapshotInfoTable, volumeName, false); keyDeletingService.resume(); + directoryDeletingService.resume(); try (UncheckedAutoCloseableSupplier rcOmSnapshot = om.getOmSnapshotManager().getSnapshot(volumeName, bucketName, snap3)) { @@ -640,6 +642,7 @@ void testSnapshotExclusiveSize() throws Exception { // Supspend KDS keyDeletingService.suspend(); + directoryDeletingService.suspend(); final long initialSnapshotCount = metadataManager.countRowsInTable(snapshotInfoTable); final long initialKeyCount = metadataManager.countRowsInTable(keyTable); @@ -711,10 +714,11 @@ void testSnapshotExclusiveSize() throws Exception { createAndCommitKey(testVolumeName, testBucketName, uniqueObjectName("key"), 3); long prevKdsRunCount = getRunCount(); - long prevSnapshotDirectorServiceCnt = snapshotDirectoryCleaningService.getRunCount().get(); + long prevSnapshotDirectorServiceCnt = directoryDeletingService.getRunCount().get(); + directoryDeletingService.resume(); // Let SnapshotDirectoryCleaningService to run for some iterations GenericTestUtils.waitFor( - () -> (snapshotDirectoryCleaningService.getRunCount().get() > prevSnapshotDirectorServiceCnt + 20), + () -> (directoryDeletingService.getRunCount().get() > prevSnapshotDirectorServiceCnt + 100), 100, 100000); keyDeletingService.resume(); @@ -779,7 +783,7 @@ void cleanup() { @Test @DisplayName("Should not update keys when purge request times out during key deletion") - public void testFailingModifiedKeyPurge() throws IOException, InterruptedException { + public void testFailingModifiedKeyPurge() throws IOException { try (MockedStatic mocked = mockStatic(OzoneManagerRatisUtils.class, CALLS_REAL_METHODS)) { From d14e83da7706aa9a2a45aeaee89c95f6612519c4 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 00:13:05 -0400 Subject: [PATCH 04/35] HDDS-13034. Fix pmd Change-Id: I11acc3782aadf8393f731adcaa2a436dd9b534ae --- .../src/main/resources/ozone-default.xml | 6 +++--- .../om/service/DirectoryDeletingService.java | 21 +++++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index f2539b589591..f257e7b1c154 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3786,7 +3786,7 @@ ozone.snapshot.directory.service.timeout 300s - OZONE, PERFORMANCE, OM + OZONE, PERFORMANCE, OM, DEPRECATED Timeout value for SnapshotDirectoryCleaningService. @@ -3795,9 +3795,9 @@ ozone.snapshot.directory.service.interval 24h - OZONE, PERFORMANCE, OM + OZONE, PERFORMANCE, OM, DEPRECATED - The time interval between successive SnapshotDirectoryCleaningService + DEPRECATED. The time interval between successive SnapshotDirectoryCleaningService thread run. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index f0a1c1fed9e7..a31d268e016d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -177,7 +177,7 @@ public void shutdown() { super.shutdown(); } - private final class DeletedDirSupplier implements Closeable { + private static final class DeletedDirSupplier implements Closeable { private TableIterator> deleteTableIterator; @@ -192,6 +192,7 @@ private synchronized Table.KeyValue get() { return null; } + @Override public void close() { IOUtils.closeQuietly(deleteTableIterator); } @@ -273,7 +274,8 @@ private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyMan processedAllDeletedDirs = future.thenCombine(future, (a, b) -> a && b); } // If AOS or all directories have been processed for snapshot, update snapshot size delta and deep clean flag - if (currentSnapshotInfo == null || processedAllDeletedDirs.get()) { + // if it is a snapshot. + if (processedAllDeletedDirs.get()) { List setSnapshotPropertyRequests = new ArrayList<>(); Map exclusiveReplicatedSizeMap = reclaimableFileFilter.getExclusiveReplicatedSizeMap(); Map exclusiveSizeMap = reclaimableFileFilter.getExclusiveSizeMap(); @@ -297,6 +299,21 @@ private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyMan } } + /** + * Processes the directories marked as deleted and performs reclamation if applicable. + * This includes preparing and submitting requests to delete directories and their + * subdirectories/files while respecting buffer limits and snapshot constraints. + * + * @param snapshotTableKey the key of the snapshot table to which the operation applies + * @param dirSupplier thread safe supplier to fetch the next directory marked as deleted. + * @param remainingBufLimit the limit for the remaining buffer size available for processing + * @param reclaimableDirFilter filter to determine whether a directory is reclaimable + * @param reclaimableFileFilter filter to determine whether a file is reclaimable + * @param expectedPreviousSnapshotId UUID of the expected previous snapshot in the snapshot chain + * @param runCount the current run count of the deletion process + * @return true if no purge requests were submitted (indicating no deletions processed), + * false otherwise + */ private boolean processDeletedDirectories(String snapshotTableKey, DeletedDirSupplier dirSupplier, long remainingBufLimit, ReclaimableDirFilter reclaimableDirFilter, ReclaimableKeyFilter reclaimableFileFilter, UUID expectedPreviousSnapshotId, long runCount) { From 735903cbd06e5c23aceb01f59cd9c644f8a6923c Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 00:37:29 -0400 Subject: [PATCH 05/35] HDDS-13160. Remove SnapshotDirectoryCleaningService and refactor AbstractDeletingService Change-Id: I11ad5f48e25a7d22676a061bf43d8d168f0ae683 --- .../TestDirectoryDeletingServiceWithFSO.java | 4 +- .../om/service}/TestRootedDDSWithFSO.java | 5 +- .../service/AbstractKeyDeletingService.java | 527 +----------------- .../om/service/DirectoryDeletingService.java | 236 ++++++++ .../ozone/om/service/KeyDeletingService.java | 149 +++++ .../SnapshotDirectoryCleaningService.java | 484 ---------------- 6 files changed, 395 insertions(+), 1010 deletions(-) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/{fs/ozone => ozone/om/service}/TestDirectoryDeletingServiceWithFSO.java (99%) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/{fs/ozone => ozone/om/service}/TestRootedDDSWithFSO.java (97%) delete mode 100644 hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java similarity index 99% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index a39aaf565ff7..e04903763307 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.hadoop.fs.ozone; +package org.apache.hadoop.ozone.om.service; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; @@ -77,8 +77,6 @@ import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer; import org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; -import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; -import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedDDSWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java similarity index 97% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedDDSWithFSO.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java index 3c2cfa914edc..6f4e13448261 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedDDSWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.hadoop.fs.ozone; +package org.apache.hadoop.ozone.om.service; -import static org.apache.hadoop.fs.ozone.TestDirectoryDeletingServiceWithFSO.assertSubPathsCount; +import static org.apache.hadoop.ozone.om.service.TestDirectoryDeletingServiceWithFSO.assertSubPathsCount; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; @@ -51,7 +51,6 @@ import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index ee699e16c31d..97f794688c49 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -81,9 +81,6 @@ public abstract class AbstractKeyDeletingService extends BackgroundService private final OMPerformanceMetrics perfMetrics; private final ScmBlockLocationProtocol scmClient; private final ClientId clientId = ClientId.randomId(); - private final AtomicLong deletedDirsCount; - private final AtomicLong movedDirsCount; - private final AtomicLong movedFilesCount; private final AtomicLong runCount; private final AtomicLong callId; private final BootstrapStateHandler.Lock lock = @@ -96,512 +93,33 @@ public AbstractKeyDeletingService(String serviceName, long interval, ozoneManager.getThreadNamePrefix()); this.ozoneManager = ozoneManager; this.scmClient = scmClient; - this.deletedDirsCount = new AtomicLong(0); - this.movedDirsCount = new AtomicLong(0); - this.movedFilesCount = new AtomicLong(0); this.runCount = new AtomicLong(0); this.metrics = ozoneManager.getDeletionMetrics(); this.perfMetrics = ozoneManager.getPerfMetrics(); this.callId = new AtomicLong(0); } - protected Pair processKeyDeletes(List keyBlocksList, - Map keysToModify, List renameEntries, - String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { - - long startTime = Time.monotonicNow(); - Pair purgeResult = Pair.of(0, false); - if (LOG.isDebugEnabled()) { - LOG.debug("Send {} key(s) to SCM: {}", - keyBlocksList.size(), keyBlocksList); - } else if (LOG.isInfoEnabled()) { - int logSize = 10; - if (keyBlocksList.size() < logSize) { - logSize = keyBlocksList.size(); - } - LOG.info("Send {} key(s) to SCM, first {} keys: {}", - keyBlocksList.size(), logSize, keyBlocksList.subList(0, logSize)); - } - List blockDeletionResults = - scmClient.deleteKeyBlocks(keyBlocksList); - LOG.info("{} BlockGroup deletion are acked by SCM in {} ms", - keyBlocksList.size(), Time.monotonicNow() - startTime); - if (blockDeletionResults != null) { - long purgeStartTime = Time.monotonicNow(); - purgeResult = submitPurgeKeysRequest(blockDeletionResults, - keysToModify, renameEntries, snapTableKey, expectedPreviousSnapshotId); - int limit = ozoneManager.getConfiguration().getInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, - OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); - LOG.info("Blocks for {} (out of {}) keys are deleted from DB in {} ms. Limit per task is {}.", - purgeResult, blockDeletionResults.size(), Time.monotonicNow() - purgeStartTime, limit); - } - perfMetrics.setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); - return purgeResult; + ClientId getClientId() { + return clientId; } - /** - * Submits PurgeKeys request for the keys whose blocks have been deleted - * by SCM. - * @param results DeleteBlockGroups returned by SCM. - * @param keysToModify Updated list of RepeatedOmKeyInfo - */ - private Pair submitPurgeKeysRequest(List results, - Map keysToModify, List renameEntriesToBeDeleted, - String snapTableKey, UUID expectedPreviousSnapshotId) { - List purgeKeys = new ArrayList<>(); - - // Put all keys to be purged in a list - int deletedCount = 0; - Set failedDeletedKeys = new HashSet<>(); - boolean purgeSuccess = true; - for (DeleteBlockGroupResult result : results) { - String deletedKey = result.getObjectKey(); - if (result.isSuccess()) { - // Add key to PurgeKeys list. - if (keysToModify != null && !keysToModify.containsKey(deletedKey)) { - // Parse Volume and BucketName - purgeKeys.add(deletedKey); - if (LOG.isDebugEnabled()) { - LOG.debug("Key {} set to be updated in OM DB, Other versions " + - "of the key that are reclaimable are reclaimed.", deletedKey); - } - } else if (keysToModify == null) { - purgeKeys.add(deletedKey); - if (LOG.isDebugEnabled()) { - LOG.debug("Key {} set to be purged from OM DB", deletedKey); - } - } - deletedCount++; - } else { - // If the block deletion failed, then the deleted keys should also not be modified. - failedDeletedKeys.add(deletedKey); - purgeSuccess = false; - } - } - - PurgeKeysRequest.Builder purgeKeysRequest = PurgeKeysRequest.newBuilder(); - if (snapTableKey != null) { - purgeKeysRequest.setSnapshotTableKey(snapTableKey); - } - OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = - OzoneManagerProtocolProtos.NullableUUID.newBuilder(); - if (expectedPreviousSnapshotId != null) { - expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); - } - purgeKeysRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); - DeletedKeys deletedKeys = DeletedKeys.newBuilder() - .setVolumeName("") - .setBucketName("") - .addAllKeys(purgeKeys) - .build(); - purgeKeysRequest.addDeletedKeys(deletedKeys); - // Adding rename entries to be purged. - if (renameEntriesToBeDeleted != null) { - purgeKeysRequest.addAllRenamedKeys(renameEntriesToBeDeleted); - } - List keysToUpdateList = new ArrayList<>(); - if (keysToModify != null) { - for (Map.Entry keyToModify : - keysToModify.entrySet()) { - if (failedDeletedKeys.contains(keyToModify.getKey())) { - continue; - } - SnapshotMoveKeyInfos.Builder keyToUpdate = - SnapshotMoveKeyInfos.newBuilder(); - keyToUpdate.setKey(keyToModify.getKey()); - List keyInfos = - keyToModify.getValue().getOmKeyInfoList().stream() - .map(k -> k.getProtobuf(ClientVersion.CURRENT_VERSION)) - .collect(Collectors.toList()); - keyToUpdate.addAllKeyInfos(keyInfos); - keysToUpdateList.add(keyToUpdate.build()); - } - - if (!keysToUpdateList.isEmpty()) { - purgeKeysRequest.addAllKeysToUpdate(keysToUpdateList); - } - } - - OMRequest omRequest = OMRequest.newBuilder() - .setCmdType(Type.PurgeKeys) - .setPurgeKeysRequest(purgeKeysRequest) - .setClientId(clientId.toString()) - .build(); - - // Submit PurgeKeys request to OM. Acquire bootstrap lock when processing deletes for snapshots. - try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { - OMResponse omResponse = submitRequest(omRequest); - if (omResponse != null) { - purgeSuccess = purgeSuccess && omResponse.getSuccess(); - } - } catch (ServiceException | InterruptedException e) { - LOG.error("PurgeKey request failed. Will retry at next run.", e); - return Pair.of(0, false); - } + OMPerformanceMetrics getPerfMetrics() { + return perfMetrics; + } - return Pair.of(deletedCount, purgeSuccess); + DeletingServiceMetrics getMetrics() { + return metrics; } protected OMResponse submitRequest(OMRequest omRequest) throws ServiceException { return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callId.incrementAndGet()); } - protected OMResponse submitPurgePaths(List requests, - String snapTableKey, UUID expectedPreviousSnapshotId) { - OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = - OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); - - if (snapTableKey != null) { - purgeDirRequest.setSnapshotTableKey(snapTableKey); - } - OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = - OzoneManagerProtocolProtos.NullableUUID.newBuilder(); - if (expectedPreviousSnapshotId != null) { - expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); - } - purgeDirRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); - - purgeDirRequest.addAllDeletedPath(requests); - - OzoneManagerProtocolProtos.OMRequest omRequest = - OzoneManagerProtocolProtos.OMRequest.newBuilder() - .setCmdType(OzoneManagerProtocolProtos.Type.PurgeDirectories) - .setPurgeDirectoriesRequest(purgeDirRequest) - .setClientId(clientId.toString()) - .build(); - - // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. - try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { - return submitRequest(omRequest); - } catch (ServiceException | InterruptedException e) { - LOG.error("PurgePaths request failed. Will retry at next run.", e); - } - return null; - } - - private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( - final long volumeId, - final long bucketId, - final String purgeDeletedDir, - final List purgeDeletedFiles, - final List markDirsAsDeleted) { - // Put all keys to be purged in a list - PurgePathRequest.Builder purgePathsRequest = PurgePathRequest.newBuilder(); - purgePathsRequest.setVolumeId(volumeId); - purgePathsRequest.setBucketId(bucketId); - - if (purgeDeletedDir != null) { - purgePathsRequest.setDeletedDir(purgeDeletedDir); - } - - for (OmKeyInfo purgeFile : purgeDeletedFiles) { - purgePathsRequest.addDeletedSubFiles( - purgeFile.getProtobuf(true, ClientVersion.CURRENT_VERSION)); - } - - // Add these directories to deletedDirTable, so that its sub-paths will be - // traversed in next iteration to ensure cleanup all sub-children. - for (OmKeyInfo dir : markDirsAsDeleted) { - purgePathsRequest.addMarkDeletedSubDirs( - dir.getProtobuf(ClientVersion.CURRENT_VERSION)); - } - - return purgePathsRequest.build(); - } - - protected Optional prepareDeleteDirRequest( - OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, - List> subDirList, - KeyManager keyManager, - CheckedFunction, Boolean, IOException> reclaimableFileFilter, - long remainingBufLimit) throws IOException { - // step-0: Get one pending deleted directory - if (LOG.isDebugEnabled()) { - LOG.debug("Pending deleted dir name: {}", - pendingDeletedDirInfo.getKeyName()); - } - - final String[] keys = delDirName.split(OM_KEY_PREFIX); - final long volumeId = Long.parseLong(keys[1]); - final long bucketId = Long.parseLong(keys[2]); - - // step-1: get all sub directories under the deletedDir - DeleteKeysResult subDirDeleteResult = - keyManager.getPendingDeletionSubDirs(volumeId, bucketId, - pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); - List subDirs = subDirDeleteResult.getKeysToDelete(); - remainingBufLimit -= subDirDeleteResult.getConsumedSize(); - - OMMetadataManager omMetadataManager = keyManager.getMetadataManager(); - for (OmKeyInfo dirInfo : subDirs) { - String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, - bucketId, dirInfo.getParentObjectID(), dirInfo.getFileName()); - String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( - dirInfo.getObjectID(), ozoneDbKey); - subDirList.add(Pair.of(ozoneDeleteKey, dirInfo)); - LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); - } - - // step-2: get all sub files under the deletedDir - // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. - DeleteKeysResult subFileDeleteResult = - keyManager.getPendingDeletionSubFiles(volumeId, bucketId, - pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); - List subFiles = subFileDeleteResult.getKeysToDelete(); - - if (LOG.isDebugEnabled()) { - for (OmKeyInfo fileInfo : subFiles) { - LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); - } - } - - // step-3: If both sub-dirs and sub-files are exhausted under a parent - // directory, only then delete the parent. - String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && - subFileDeleteResult.isProcessedKeys() ? delDirName : null; - if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { - return Optional.empty(); - } - return Optional.of(wrapPurgeRequest(volumeId, bucketId, - purgeDeletedDir, subFiles, subDirs)); - } - - @SuppressWarnings("checkstyle:ParameterNumber") - public void optimizeDirDeletesAndSubmitRequest( - long dirNum, long subDirNum, long subFileNum, - List> allSubDirList, - List purgePathRequestList, - String snapTableKey, long startTime, - long remainingBufLimit, KeyManager keyManager, - CheckedFunction, Boolean, IOException> reclaimableDirChecker, - CheckedFunction, Boolean, IOException> reclaimableFileChecker, - UUID expectedPreviousSnapshotId, long rnCnt) { - - // Optimization to handle delete sub-dir and keys to remove quickly - // This case will be useful to handle when depth of directory is high - int subdirDelNum = 0; - int subDirRecursiveCnt = 0; - int consumedSize = 0; - while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { - try { - Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); - Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), - stringOmKeyInfoPair.getValue())); - Optional request = prepareDeleteDirRequest( - stringOmKeyInfoPair.getValue(), stringOmKeyInfoPair.getKey(), subDirectoryReclaimable, allSubDirList, - keyManager, reclaimableFileChecker, remainingBufLimit); - if (!request.isPresent()) { - continue; - } - PurgePathRequest requestVal = request.get(); - consumedSize += requestVal.getSerializedSize(); - remainingBufLimit -= consumedSize; - purgePathRequestList.add(requestVal); - // Count up the purgeDeletedDir, subDirs and subFiles - if (requestVal.hasDeletedDir() && !StringUtils.isBlank(requestVal.getDeletedDir())) { - subdirDelNum++; - } - subDirNum += requestVal.getMarkDeletedSubDirsCount(); - subFileNum += requestVal.getDeletedSubFilesCount(); - } catch (IOException e) { - LOG.error("Error while running delete directories and files " + - "background task. Will retry at next run for subset.", e); - break; - } - } - if (!purgePathRequestList.isEmpty()) { - submitPurgePaths(purgePathRequestList, snapTableKey, expectedPreviousSnapshotId); - } - - if (dirNum != 0 || subDirNum != 0 || subFileNum != 0) { - long subdirMoved = subDirNum - subdirDelNum; - deletedDirsCount.addAndGet(dirNum + subdirDelNum); - movedDirsCount.addAndGet(subdirMoved); - movedFilesCount.addAndGet(subFileNum); - long timeTakenInIteration = Time.monotonicNow() - startTime; - LOG.info("Number of dirs deleted: {}, Number of sub-dir " + - "deleted: {}, Number of sub-files moved:" + - " {} to DeletedTable, Number of sub-dirs moved {} to " + - "DeletedDirectoryTable, iteration elapsed: {}ms, " + - " totalRunCount: {}", - dirNum, subdirDelNum, subFileNum, (subDirNum - subdirDelNum), - timeTakenInIteration, rnCnt); - metrics.incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); - perfMetrics.setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); - } - } - - /** - * To calculate Exclusive Size for current snapshot, Check - * the next snapshot deletedTable if the deleted key is - * referenced in current snapshot and not referenced in the - * previous snapshot then that key is exclusive to the current - * snapshot. Here since we are only iterating through - * deletedTable we can check the previous and previous to - * previous snapshot to achieve the same. - * previousSnapshot - Snapshot for which exclusive size is - * getting calculating. - * currSnapshot - Snapshot's deletedTable is used to calculate - * previousSnapshot snapshot's exclusive size. - * previousToPrevSnapshot - Snapshot which is used to check - * if key is exclusive to previousSnapshot. - */ - @SuppressWarnings("checkstyle:ParameterNumber") - public void calculateExclusiveSize( - SnapshotInfo previousSnapshot, - SnapshotInfo previousToPrevSnapshot, - OmKeyInfo keyInfo, - OmBucketInfo bucketInfo, long volumeId, - Table snapRenamedTable, - Table previousKeyTable, - Table prevRenamedTable, - Table previousToPrevKeyTable, - Map exclusiveSizeMap, - Map exclusiveReplicatedSizeMap) throws IOException { - String prevSnapKey = previousSnapshot.getTableKey(); - long exclusiveReplicatedSize = - exclusiveReplicatedSizeMap.getOrDefault( - prevSnapKey, 0L) + keyInfo.getReplicatedSize(); - long exclusiveSize = exclusiveSizeMap.getOrDefault( - prevSnapKey, 0L) + keyInfo.getDataSize(); - - // If there is no previous to previous snapshot, then - // the previous snapshot is the first snapshot. - if (previousToPrevSnapshot == null) { - exclusiveSizeMap.put(prevSnapKey, exclusiveSize); - exclusiveReplicatedSizeMap.put(prevSnapKey, - exclusiveReplicatedSize); - } else { - OmKeyInfo keyInfoPrevSnapshot = getPreviousSnapshotKeyName( - keyInfo, bucketInfo, volumeId, - snapRenamedTable, previousKeyTable); - OmKeyInfo keyInfoPrevToPrevSnapshot = getPreviousSnapshotKeyName( - keyInfoPrevSnapshot, bucketInfo, volumeId, - prevRenamedTable, previousToPrevKeyTable); - // If the previous to previous snapshot doesn't - // have the key, then it is exclusive size for the - // previous snapshot. - if (keyInfoPrevToPrevSnapshot == null) { - exclusiveSizeMap.put(prevSnapKey, exclusiveSize); - exclusiveReplicatedSizeMap.put(prevSnapKey, - exclusiveReplicatedSize); - } - } - } - - private OmKeyInfo getPreviousSnapshotKeyName( - OmKeyInfo keyInfo, OmBucketInfo bucketInfo, long volumeId, - Table snapRenamedTable, - Table previousKeyTable) throws IOException { - - if (keyInfo == null) { - return null; - } - - String dbKeyPrevSnap; - if (bucketInfo.getBucketLayout().isFileSystemOptimized()) { - dbKeyPrevSnap = getOzoneManager().getMetadataManager().getOzonePathKey( - volumeId, - bucketInfo.getObjectID(), - keyInfo.getParentObjectID(), - keyInfo.getFileName()); - } else { - dbKeyPrevSnap = getOzoneManager().getMetadataManager().getOzoneKey( - keyInfo.getVolumeName(), - keyInfo.getBucketName(), - keyInfo.getKeyName()); - } - - String dbRenameKey = getOzoneManager().getMetadataManager().getRenameKey( - keyInfo.getVolumeName(), - keyInfo.getBucketName(), - keyInfo.getObjectID()); - - String renamedKey = snapRenamedTable.getIfExist(dbRenameKey); - OmKeyInfo prevKeyInfo = renamedKey != null ? - previousKeyTable.get(renamedKey) : - previousKeyTable.get(dbKeyPrevSnap); - - if (prevKeyInfo == null || - prevKeyInfo.getObjectID() != keyInfo.getObjectID()) { - return null; - } - - return isBlockLocationInfoSame(prevKeyInfo, keyInfo) ? - prevKeyInfo : null; - } - protected boolean isBufferLimitCrossed( int maxLimit, int cLimit, int increment) { return cLimit + increment >= maxLimit; } - protected boolean isKeyReclaimable( - Table previousKeyTable, - Table renamedTable, - OmKeyInfo deletedKeyInfo, OmBucketInfo bucketInfo, - long volumeId, HddsProtos.KeyValue.Builder renamedKeyBuilder) - throws IOException { - - String dbKey; - // Handle case when the deleted snapshot is the first snapshot. - if (previousKeyTable == null) { - return true; - } - - // These are uncommitted blocks wrapped into a pseudo KeyInfo - if (deletedKeyInfo.getObjectID() == OBJECT_ID_RECLAIM_BLOCKS) { - return true; - } - - // Construct keyTable or fileTable DB key depending on the bucket type - if (bucketInfo.getBucketLayout().isFileSystemOptimized()) { - dbKey = ozoneManager.getMetadataManager().getOzonePathKey( - volumeId, - bucketInfo.getObjectID(), - deletedKeyInfo.getParentObjectID(), - deletedKeyInfo.getFileName()); - } else { - dbKey = ozoneManager.getMetadataManager().getOzoneKey( - deletedKeyInfo.getVolumeName(), - deletedKeyInfo.getBucketName(), - deletedKeyInfo.getKeyName()); - } - - /* - snapshotRenamedTable: - 1) /volumeName/bucketName/objectID -> - /volumeId/bucketId/parentId/fileName (FSO) - 2) /volumeName/bucketName/objectID -> - /volumeName/bucketName/keyName (non-FSO) - */ - String dbRenameKey = ozoneManager.getMetadataManager().getRenameKey( - deletedKeyInfo.getVolumeName(), deletedKeyInfo.getBucketName(), - deletedKeyInfo.getObjectID()); - - // Condition: key should not exist in snapshotRenamedTable - // of the current snapshot and keyTable of the previous snapshot. - // Check key exists in renamedTable of the Snapshot - String renamedKey = renamedTable.getIfExist(dbRenameKey); - - if (renamedKey != null && renamedKeyBuilder != null) { - renamedKeyBuilder.setKey(dbRenameKey).setValue(renamedKey); - } - // previousKeyTable is fileTable if the bucket is FSO, - // otherwise it is the keyTable. - OmKeyInfo prevKeyInfo = renamedKey != null ? previousKeyTable - .get(renamedKey) : previousKeyTable.get(dbKey); - - if (prevKeyInfo == null || - prevKeyInfo.getObjectID() != deletedKeyInfo.getObjectID()) { - return true; - } - - // For key overwrite the objectID will remain the same, In this - // case we need to check if OmKeyLocationInfo is also same. - return !isBlockLocationInfoSame(prevKeyInfo, deletedKeyInfo); - } - public OzoneManager getOzoneManager() { return ozoneManager; } @@ -624,37 +142,6 @@ public AtomicLong getCallId() { return callId; } - /** - * Returns the number of dirs deleted by the background service. - * - * @return Long count. - */ - @VisibleForTesting - public long getDeletedDirsCount() { - return deletedDirsCount.get(); - } - - /** - * Returns the number of sub-dirs deleted by the background service. - * - * @return Long count. - */ - @VisibleForTesting - public long getMovedDirsCount() { - return movedDirsCount.get(); - } - - /** - * Returns the number of files moved to DeletedTable by the background - * service. - * - * @return Long count. - */ - @VisibleForTesting - public long getMovedFilesCount() { - return movedFilesCount.get(); - } - @Override public BootstrapStateHandler.Lock getBootstrapStateLock() { return lock; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index a31d268e016d..a6648d591e20 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -17,7 +17,10 @@ package org.apache.hadoop.ozone.om.service; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ServiceException; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -34,10 +37,12 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.BackgroundTask; @@ -47,8 +52,12 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.lock.BootstrapStateHandler; +import org.apache.hadoop.ozone.om.DeleteKeysResult; import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; @@ -63,6 +72,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.util.Time; +import org.apache.ratis.util.function.CheckedFunction; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,6 +108,9 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { private final boolean deepCleanSnapshots; private final ExecutorService deletionThreadPool; private final int numberOfParallelThreadsPerStore; + private final AtomicLong deletedDirsCount; + private final AtomicLong movedDirsCount; + private final AtomicLong movedFilesCount; public DirectoryDeletingService(long interval, TimeUnit unit, long serviceTimeout, OzoneManager ozoneManager, @@ -118,6 +131,9 @@ public DirectoryDeletingService(long interval, TimeUnit unit, this.isRunningOnAOS = new AtomicBoolean(false); this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.deepCleanSnapshots = deepCleanSnapshots; + this.deletedDirsCount = new AtomicLong(0); + this.movedDirsCount = new AtomicLong(0); + this.movedFilesCount = new AtomicLong(0); } private boolean shouldRun() { @@ -152,6 +168,226 @@ public void setRatisByteLimit(int ratisByteLimit) { this.ratisByteLimit = ratisByteLimit; } + private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List requests, + String snapTableKey, UUID expectedPreviousSnapshotId) { + OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = + OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); + + if (snapTableKey != null) { + purgeDirRequest.setSnapshotTableKey(snapTableKey); + } + OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = + OzoneManagerProtocolProtos.NullableUUID.newBuilder(); + if (expectedPreviousSnapshotId != null) { + expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); + } + purgeDirRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); + + purgeDirRequest.addAllDeletedPath(requests); + + OzoneManagerProtocolProtos.OMRequest omRequest = + OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(OzoneManagerProtocolProtos.Type.PurgeDirectories) + .setPurgeDirectoriesRequest(purgeDirRequest) + .setClientId(getClientId().toString()) + .build(); + + // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. + try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { + return submitRequest(omRequest); + } catch (ServiceException | InterruptedException e) { + LOG.error("PurgePaths request failed. Will retry at next run.", e); + } + return null; + } + + + private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( + final long volumeId, + final long bucketId, + final String purgeDeletedDir, + final List purgeDeletedFiles, + final List markDirsAsDeleted) { + // Put all keys to be purged in a list + PurgePathRequest.Builder purgePathsRequest = PurgePathRequest.newBuilder(); + purgePathsRequest.setVolumeId(volumeId); + purgePathsRequest.setBucketId(bucketId); + + if (purgeDeletedDir != null) { + purgePathsRequest.setDeletedDir(purgeDeletedDir); + } + + for (OmKeyInfo purgeFile : purgeDeletedFiles) { + purgePathsRequest.addDeletedSubFiles( + purgeFile.getProtobuf(true, ClientVersion.CURRENT_VERSION)); + } + + // Add these directories to deletedDirTable, so that its sub-paths will be + // traversed in next iteration to ensure cleanup all sub-children. + for (OmKeyInfo dir : markDirsAsDeleted) { + purgePathsRequest.addMarkDeletedSubDirs( + dir.getProtobuf(ClientVersion.CURRENT_VERSION)); + } + + return purgePathsRequest.build(); + } + + + protected Optional prepareDeleteDirRequest( + OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, + List> subDirList, + KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableFileFilter, + long remainingBufLimit) throws IOException { + // step-0: Get one pending deleted directory + if (LOG.isDebugEnabled()) { + LOG.debug("Pending deleted dir name: {}", + pendingDeletedDirInfo.getKeyName()); + } + + final String[] keys = delDirName.split(OM_KEY_PREFIX); + final long volumeId = Long.parseLong(keys[1]); + final long bucketId = Long.parseLong(keys[2]); + + // step-1: get all sub directories under the deletedDir + DeleteKeysResult subDirDeleteResult = + keyManager.getPendingDeletionSubDirs(volumeId, bucketId, + pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); + List subDirs = subDirDeleteResult.getKeysToDelete(); + remainingBufLimit -= subDirDeleteResult.getConsumedSize(); + + OMMetadataManager omMetadataManager = keyManager.getMetadataManager(); + for (OmKeyInfo dirInfo : subDirs) { + String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, + bucketId, dirInfo.getParentObjectID(), dirInfo.getFileName()); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + dirInfo.getObjectID(), ozoneDbKey); + subDirList.add(Pair.of(ozoneDeleteKey, dirInfo)); + LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); + } + + // step-2: get all sub files under the deletedDir + // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. + DeleteKeysResult subFileDeleteResult = + keyManager.getPendingDeletionSubFiles(volumeId, bucketId, + pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); + List subFiles = subFileDeleteResult.getKeysToDelete(); + + if (LOG.isDebugEnabled()) { + for (OmKeyInfo fileInfo : subFiles) { + LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); + } + } + + // step-3: If both sub-dirs and sub-files are exhausted under a parent + // directory, only then delete the parent. + String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && + subFileDeleteResult.isProcessedKeys() ? delDirName : null; + if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { + return Optional.empty(); + } + return Optional.of(wrapPurgeRequest(volumeId, bucketId, + purgeDeletedDir, subFiles, subDirs)); + } + + + @SuppressWarnings("checkstyle:ParameterNumber") + void optimizeDirDeletesAndSubmitRequest( + long dirNum, long subDirNum, long subFileNum, + List> allSubDirList, + List purgePathRequestList, + String snapTableKey, long startTime, + long remainingBufLimit, KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableDirChecker, + CheckedFunction, Boolean, IOException> reclaimableFileChecker, + UUID expectedPreviousSnapshotId, long rnCnt) { + + // Optimization to handle delete sub-dir and keys to remove quickly + // This case will be useful to handle when depth of directory is high + int subdirDelNum = 0; + int subDirRecursiveCnt = 0; + int consumedSize = 0; + while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { + try { + Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); + Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), + stringOmKeyInfoPair.getValue())); + Optional request = prepareDeleteDirRequest( + stringOmKeyInfoPair.getValue(), stringOmKeyInfoPair.getKey(), subDirectoryReclaimable, allSubDirList, + keyManager, reclaimableFileChecker, remainingBufLimit); + if (!request.isPresent()) { + continue; + } + PurgePathRequest requestVal = request.get(); + consumedSize += requestVal.getSerializedSize(); + remainingBufLimit -= consumedSize; + purgePathRequestList.add(requestVal); + // Count up the purgeDeletedDir, subDirs and subFiles + if (requestVal.hasDeletedDir() && !StringUtils.isBlank(requestVal.getDeletedDir())) { + subdirDelNum++; + } + subDirNum += requestVal.getMarkDeletedSubDirsCount(); + subFileNum += requestVal.getDeletedSubFilesCount(); + } catch (IOException e) { + LOG.error("Error while running delete directories and files " + + "background task. Will retry at next run for subset.", e); + break; + } + } + if (!purgePathRequestList.isEmpty()) { + submitPurgePaths(purgePathRequestList, snapTableKey, expectedPreviousSnapshotId); + } + + if (dirNum != 0 || subDirNum != 0 || subFileNum != 0) { + long subdirMoved = subDirNum - subdirDelNum; + deletedDirsCount.addAndGet(dirNum + subdirDelNum); + movedDirsCount.addAndGet(subdirMoved); + movedFilesCount.addAndGet(subFileNum); + long timeTakenInIteration = Time.monotonicNow() - startTime; + LOG.info("Number of dirs deleted: {}, Number of sub-dir " + + "deleted: {}, Number of sub-files moved:" + + " {} to DeletedTable, Number of sub-dirs moved {} to " + + "DeletedDirectoryTable, iteration elapsed: {}ms, " + + " totalRunCount: {}", + dirNum, subdirDelNum, subFileNum, (subDirNum - subdirDelNum), + timeTakenInIteration, rnCnt); + getMetrics().incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); + getPerfMetrics().setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); + } + } + + /** + * Returns the number of dirs deleted by the background service. + * + * @return Long count. + */ + @VisibleForTesting + public long getDeletedDirsCount() { + return deletedDirsCount.get(); + } + + /** + * Returns the number of sub-dirs deleted by the background service. + * + * @return Long count. + */ + @VisibleForTesting + public long getMovedDirsCount() { + return movedDirsCount.get(); + } + + /** + * Returns the number of files moved to DeletedTable by the background + * service. + * + * @return Long count. + */ + @VisibleForTesting + public long getMovedFilesCount() { + return movedFilesCount.get(); + } + + @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index 60b2ab55efd7..acf9767e3653 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -22,13 +22,16 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.protobuf.ServiceException; import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -36,21 +39,27 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.utils.BackgroundTask; import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; import org.apache.hadoop.hdds.utils.BackgroundTaskResult; import org.apache.hadoop.hdds.utils.BackgroundTaskResult.EmptyTaskResult; +import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.common.BlockGroup; +import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; +import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.DeletingServiceMetrics; import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.PendingKeysDeletion; import org.apache.hadoop.ozone.om.SnapshotChainManager; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; @@ -58,6 +67,7 @@ import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableRenameEntryFilter; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SetSnapshotPropertyRequest; +import org.apache.hadoop.util.Time; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -165,6 +175,145 @@ public void setKeyLimitPerTask(int keyLimitPerTask) { this.keyLimitPerTask = keyLimitPerTask; } + Pair processKeyDeletes(List keyBlocksList, + Map keysToModify, List renameEntries, + String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { + + long startTime = Time.monotonicNow(); + Pair purgeResult = Pair.of(0, false); + if (LOG.isDebugEnabled()) { + LOG.debug("Send {} key(s) to SCM: {}", + keyBlocksList.size(), keyBlocksList); + } else if (LOG.isInfoEnabled()) { + int logSize = 10; + if (keyBlocksList.size() < logSize) { + logSize = keyBlocksList.size(); + } + LOG.info("Send {} key(s) to SCM, first {} keys: {}", + keyBlocksList.size(), logSize, keyBlocksList.subList(0, logSize)); + } + List blockDeletionResults = + getScmClient().deleteKeyBlocks(keyBlocksList); + LOG.info("{} BlockGroup deletion are acked by SCM in {} ms", + keyBlocksList.size(), Time.monotonicNow() - startTime); + if (blockDeletionResults != null) { + long purgeStartTime = Time.monotonicNow(); + purgeResult = submitPurgeKeysRequest(blockDeletionResults, + keysToModify, renameEntries, snapTableKey, expectedPreviousSnapshotId); + int limit = getOzoneManager().getConfiguration().getInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, + OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); + LOG.info("Blocks for {} (out of {}) keys are deleted from DB in {} ms. Limit per task is {}.", + purgeResult, blockDeletionResults.size(), Time.monotonicNow() - purgeStartTime, limit); + } + getPerfMetrics().setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); + return purgeResult; + } + + /** + * Submits PurgeKeys request for the keys whose blocks have been deleted + * by SCM. + * @param results DeleteBlockGroups returned by SCM. + * @param keysToModify Updated list of RepeatedOmKeyInfo + */ + private Pair submitPurgeKeysRequest(List results, + Map keysToModify, List renameEntriesToBeDeleted, + String snapTableKey, UUID expectedPreviousSnapshotId) { + List purgeKeys = new ArrayList<>(); + + // Put all keys to be purged in a list + int deletedCount = 0; + Set failedDeletedKeys = new HashSet<>(); + boolean purgeSuccess = true; + for (DeleteBlockGroupResult result : results) { + String deletedKey = result.getObjectKey(); + if (result.isSuccess()) { + // Add key to PurgeKeys list. + if (keysToModify != null && !keysToModify.containsKey(deletedKey)) { + // Parse Volume and BucketName + purgeKeys.add(deletedKey); + if (LOG.isDebugEnabled()) { + LOG.debug("Key {} set to be updated in OM DB, Other versions " + + "of the key that are reclaimable are reclaimed.", deletedKey); + } + } else if (keysToModify == null) { + purgeKeys.add(deletedKey); + if (LOG.isDebugEnabled()) { + LOG.debug("Key {} set to be purged from OM DB", deletedKey); + } + } + deletedCount++; + } else { + // If the block deletion failed, then the deleted keys should also not be modified. + failedDeletedKeys.add(deletedKey); + purgeSuccess = false; + } + } + + OzoneManagerProtocolProtos.PurgeKeysRequest.Builder purgeKeysRequest = OzoneManagerProtocolProtos.PurgeKeysRequest.newBuilder(); + if (snapTableKey != null) { + purgeKeysRequest.setSnapshotTableKey(snapTableKey); + } + OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = + OzoneManagerProtocolProtos.NullableUUID.newBuilder(); + if (expectedPreviousSnapshotId != null) { + expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); + } + purgeKeysRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); + OzoneManagerProtocolProtos.DeletedKeys deletedKeys = OzoneManagerProtocolProtos.DeletedKeys.newBuilder() + .setVolumeName("") + .setBucketName("") + .addAllKeys(purgeKeys) + .build(); + purgeKeysRequest.addDeletedKeys(deletedKeys); + // Adding rename entries to be purged. + if (renameEntriesToBeDeleted != null) { + purgeKeysRequest.addAllRenamedKeys(renameEntriesToBeDeleted); + } + List keysToUpdateList = new ArrayList<>(); + if (keysToModify != null) { + for (Map.Entry keyToModify : + keysToModify.entrySet()) { + if (failedDeletedKeys.contains(keyToModify.getKey())) { + continue; + } + OzoneManagerProtocolProtos.SnapshotMoveKeyInfos.Builder keyToUpdate = + OzoneManagerProtocolProtos.SnapshotMoveKeyInfos.newBuilder(); + keyToUpdate.setKey(keyToModify.getKey()); + List keyInfos = + keyToModify.getValue().getOmKeyInfoList().stream() + .map(k -> k.getProtobuf(ClientVersion.CURRENT_VERSION)) + .collect(Collectors.toList()); + keyToUpdate.addAllKeyInfos(keyInfos); + keysToUpdateList.add(keyToUpdate.build()); + } + + if (!keysToUpdateList.isEmpty()) { + purgeKeysRequest.addAllKeysToUpdate(keysToUpdateList); + } + } + + OzoneManagerProtocolProtos.OMRequest omRequest = OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(OzoneManagerProtocolProtos.Type.PurgeKeys) + .setPurgeKeysRequest(purgeKeysRequest) + .setClientId(getClientId().toString()) + .build(); + + // Submit PurgeKeys request to OM. Acquire bootstrap lock when processing deletes for snapshots. + try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { + OzoneManagerProtocolProtos.OMResponse omResponse = submitRequest(omRequest); + if (omResponse != null) { + purgeSuccess = purgeSuccess && omResponse.getSuccess(); + } + } catch (ServiceException | InterruptedException e) { + LOG.error("PurgeKey request failed. Will retry at next run.", e); + return Pair.of(0, false); + } + + return Pair.of(deletedCount, purgeSuccess); + } + + + /** * A key deleting task scans OM DB and looking for a certain number of * pending-deletion keys, sends these keys along with their associated blocks diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java deleted file mode 100644 index a14003c2245b..000000000000 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.om.service; - -import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE; -import static org.apache.hadoop.ozone.om.request.file.OMFileRequest.getDirectoryInfo; -import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.getPreviousSnapshot; - -import com.google.common.annotations.VisibleForTesting; -import com.google.protobuf.ServiceException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; -import org.apache.hadoop.hdds.utils.BackgroundTask; -import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; -import org.apache.hadoop.hdds.utils.BackgroundTaskResult; -import org.apache.hadoop.hdds.utils.IOUtils; -import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.hdds.utils.db.TableIterator; -import org.apache.hadoop.ozone.common.BlockGroup; -import org.apache.hadoop.ozone.om.OMMetadataManager; -import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.OmSnapshot; -import org.apache.hadoop.ozone.om.OmSnapshotManager; -import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.SnapshotChainManager; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; -import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; -import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; -import org.apache.hadoop.ozone.om.request.file.OMFileRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SetSnapshotPropertyRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotSize; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; -import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; - -/** - * Snapshot BG Service for deleted directory deep clean and exclusive size - * calculation for deleted directories. - */ -public class SnapshotDirectoryCleaningService - extends AbstractKeyDeletingService { - // Use only a single thread for DirDeletion. Multiple threads would read - // or write to same tables and can send deletion requests for same key - // multiple times. - private static final int SNAPSHOT_DIR_CORE_POOL_SIZE = 1; - - private final AtomicBoolean suspended; - private final Map exclusiveSizeMap; - private final Map exclusiveReplicatedSizeMap; - - public SnapshotDirectoryCleaningService(long interval, TimeUnit unit, - long serviceTimeout, - OzoneManager ozoneManager, - ScmBlockLocationProtocol scmClient) { - super(SnapshotDirectoryCleaningService.class.getSimpleName(), - interval, unit, SNAPSHOT_DIR_CORE_POOL_SIZE, serviceTimeout, - ozoneManager, scmClient); - this.suspended = new AtomicBoolean(false); - this.exclusiveSizeMap = new HashMap<>(); - this.exclusiveReplicatedSizeMap = new HashMap<>(); - } - - private boolean shouldRun() { - if (getOzoneManager() == null) { - // OzoneManager can be null for testing - return true; - } - return getOzoneManager().isLeaderReady() && !suspended.get(); - } - - /** - * Suspend the service. - */ - @VisibleForTesting - public void suspend() { - suspended.set(true); - } - - /** - * Resume the service if suspended. - */ - @VisibleForTesting - public void resume() { - suspended.set(false); - } - - @Override - public BackgroundTaskQueue getTasks() { - BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new SnapshotDirectoryCleaningService.SnapshotDirTask()); - return queue; - } - - private class SnapshotDirTask implements BackgroundTask { - - @Override - public BackgroundTaskResult call() { - if (!shouldRun()) { - return BackgroundTaskResult.EmptyTaskResult.newResult(); - } - LOG.debug("Running SnapshotDirectoryCleaningService"); - - getRunCount().incrementAndGet(); - OmSnapshotManager omSnapshotManager = - getOzoneManager().getOmSnapshotManager(); - Table snapshotInfoTable = - getOzoneManager().getMetadataManager().getSnapshotInfoTable(); - OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) - getOzoneManager().getMetadataManager(); - SnapshotChainManager snapChainManager = metadataManager - .getSnapshotChainManager(); - - try (TableIterator> iterator = snapshotInfoTable.iterator()) { - - while (iterator.hasNext()) { - SnapshotInfo currSnapInfo = snapshotInfoTable.get(iterator.next().getKey()); - - // Expand deleted dirs only on active snapshot. Deleted Snapshots - // will be cleaned up by SnapshotDeletingService. - if (currSnapInfo == null || currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || - currSnapInfo.isDeepCleanedDeletedDir()) { - continue; - } - - UncheckedAutoCloseableSupplier rcPrevOmSnapshot = null; - UncheckedAutoCloseableSupplier rcPrevToPrevOmSnapshot = null; - try { - long volumeId = metadataManager - .getVolumeId(currSnapInfo.getVolumeName()); - // Get bucketInfo for the snapshot bucket to get bucket layout. - String dbBucketKey = metadataManager - .getBucketKey(currSnapInfo.getVolumeName(), - currSnapInfo.getBucketName()); - OmBucketInfo bucketInfo = metadataManager - .getBucketTable().get(dbBucketKey); - - if (bucketInfo == null) { - throw new IllegalStateException("Bucket " + "/" + - currSnapInfo.getVolumeName() + "/" + currSnapInfo - .getBucketName() + - " is not found. BucketInfo should not be " + - "null for snapshotted bucket. The OM is in " + - "unexpected state."); - } - - SnapshotInfo previousSnapshot = getPreviousSnapshot(getOzoneManager(), snapChainManager, currSnapInfo); - SnapshotInfo previousToPrevSnapshot = null; - - Table previousKeyTable = null; - Table prevRenamedTable = null; - - if (previousSnapshot != null) { - rcPrevOmSnapshot = omSnapshotManager.getActiveSnapshot( - previousSnapshot.getVolumeName(), - previousSnapshot.getBucketName(), - previousSnapshot.getName()); - OmSnapshot omPreviousSnapshot = rcPrevOmSnapshot.get(); - - previousKeyTable = omPreviousSnapshot.getMetadataManager() - .getKeyTable(bucketInfo.getBucketLayout()); - prevRenamedTable = omPreviousSnapshot - .getMetadataManager().getSnapshotRenamedTable(); - previousToPrevSnapshot = getPreviousSnapshot(getOzoneManager(), snapChainManager, previousSnapshot); - } - - Table previousToPrevKeyTable = null; - if (previousToPrevSnapshot != null) { - rcPrevToPrevOmSnapshot = omSnapshotManager.getActiveSnapshot( - previousToPrevSnapshot.getVolumeName(), - previousToPrevSnapshot.getBucketName(), - previousToPrevSnapshot.getName()); - OmSnapshot omPreviousToPrevSnapshot = rcPrevToPrevOmSnapshot.get(); - - previousToPrevKeyTable = omPreviousToPrevSnapshot - .getMetadataManager() - .getKeyTable(bucketInfo.getBucketLayout()); - } - - String dbBucketKeyForDir = metadataManager.getBucketKeyPrefixFSO( - currSnapInfo.getVolumeName(), currSnapInfo.getBucketName()); - try (UncheckedAutoCloseableSupplier - rcCurrOmSnapshot = omSnapshotManager.getActiveSnapshot( - currSnapInfo.getVolumeName(), - currSnapInfo.getBucketName(), - currSnapInfo.getName())) { - - OmSnapshot currOmSnapshot = rcCurrOmSnapshot.get(); - Table snapDeletedDirTable = - currOmSnapshot.getMetadataManager().getDeletedDirTable(); - - try (TableIterator> deletedDirIterator = snapDeletedDirTable - .iterator(dbBucketKeyForDir)) { - - while (deletedDirIterator.hasNext()) { - Table.KeyValue deletedDirInfo = - deletedDirIterator.next(); - - // For each deleted directory we do an in-memory DFS and - // do a deep clean and exclusive size calculation. - iterateDirectoryTree(deletedDirInfo, volumeId, bucketInfo, - previousSnapshot, previousToPrevSnapshot, - currOmSnapshot, previousKeyTable, prevRenamedTable, - previousToPrevKeyTable, dbBucketKeyForDir); - } - updateDeepCleanSnapshotDir(currSnapInfo.getTableKey()); - if (previousSnapshot != null) { - updateExclusiveSize(previousSnapshot.getTableKey()); - } - } - } - } finally { - IOUtils.closeQuietly(rcPrevOmSnapshot, rcPrevToPrevOmSnapshot); - } - } - } catch (IOException ex) { - LOG.error("Error while running directory deep clean on snapshots." + - " Will retry at next run.", ex); - } - return BackgroundTaskResult.EmptyTaskResult.newResult(); - } - } - - @SuppressWarnings("checkstyle:ParameterNumber") - private void iterateDirectoryTree( - Table.KeyValue deletedDirInfo, long volumeId, - OmBucketInfo bucketInfo, - SnapshotInfo previousSnapshot, - SnapshotInfo previousToPrevSnapshot, - OmSnapshot currOmSnapshot, - Table previousKeyTable, - Table prevRenamedTable, - Table previousToPrevKeyTable, - String dbBucketKeyForDir) throws IOException { - - Table snapDirTable = - currOmSnapshot.getMetadataManager().getDirectoryTable(); - Table snapRenamedTable = - currOmSnapshot.getMetadataManager().getSnapshotRenamedTable(); - - Stack stackNodes = new Stack<>(); - OmDirectoryInfo omDeletedDirectoryInfo = - getDirectoryInfo(deletedDirInfo.getValue()); - String dirPathDbKey = currOmSnapshot.getMetadataManager() - .getOzonePathKey(volumeId, bucketInfo.getObjectID(), - omDeletedDirectoryInfo); - // Stack Init - StackNode topLevelDir = new StackNode(); - topLevelDir.setDirKey(dirPathDbKey); - topLevelDir.setDirValue(omDeletedDirectoryInfo); - stackNodes.push(topLevelDir); - - try (TableIterator> - directoryIterator = snapDirTable.iterator(dbBucketKeyForDir)) { - - while (!stackNodes.isEmpty()) { - StackNode stackTop = stackNodes.peek(); - // First process all the files in the current directory - // and then do a DFS for directory. - if (StringUtils.isEmpty(stackTop.getSubDirSeek())) { - processFilesUnderDir(previousSnapshot, - previousToPrevSnapshot, - volumeId, - bucketInfo, - stackTop.getDirValue(), - currOmSnapshot.getMetadataManager(), - snapRenamedTable, - previousKeyTable, - prevRenamedTable, - previousToPrevKeyTable); - // Format : /volId/bucketId/parentId/ - String seekDirInDB = currOmSnapshot.getMetadataManager() - .getOzonePathKey(volumeId, bucketInfo.getObjectID(), - stackTop.getDirValue().getObjectID(), ""); - stackTop.setSubDirSeek(seekDirInDB); - } else { - // Adding \0 to seek the next greater element. - directoryIterator.seek(stackTop.getSubDirSeek() + "\0"); - if (directoryIterator.hasNext()) { - - Table.KeyValue deletedSubDirInfo = directoryIterator.next(); - String deletedSubDirKey = deletedSubDirInfo.getKey(); - String prefixCheck = currOmSnapshot.getMetadataManager() - .getOzoneDeletePathDirKey(stackTop.getSubDirSeek()); - // Exit if it is out of the sub dir prefix scope. - if (!deletedSubDirKey.startsWith(prefixCheck)) { - stackNodes.pop(); - } else { - stackTop.setSubDirSeek(deletedSubDirKey); - StackNode nextSubDir = new StackNode(); - nextSubDir.setDirKey(deletedSubDirInfo.getKey()); - nextSubDir.setDirValue(deletedSubDirInfo.getValue()); - stackNodes.push(nextSubDir); - } - } else { - stackNodes.pop(); - } - } - } - } - } - - private void updateExclusiveSize(String prevSnapshotKeyTable) throws IOException { - ClientId clientId = ClientId.randomId(); - SnapshotSize snapshotSize = SnapshotSize.newBuilder() - .setExclusiveSize( - exclusiveSizeMap.getOrDefault(prevSnapshotKeyTable, 0L)) - .setExclusiveReplicatedSize( - exclusiveReplicatedSizeMap.getOrDefault( - prevSnapshotKeyTable, 0L)) - .build(); - exclusiveSizeMap.remove(prevSnapshotKeyTable); - exclusiveReplicatedSizeMap.remove(prevSnapshotKeyTable); - SetSnapshotPropertyRequest - setSnapshotPropertyRequest = - SetSnapshotPropertyRequest.newBuilder() - .setSnapshotKey(prevSnapshotKeyTable) - .setSnapshotSizeDeltaFromDirDeepCleaning(snapshotSize) - .build(); - - OMRequest omRequest = OMRequest.newBuilder() - .setCmdType(Type.SetSnapshotProperty) - .setSetSnapshotPropertyRequest(setSnapshotPropertyRequest) - .setClientId(clientId.toString()) - .build(); - - submitRequest(omRequest, clientId); - } - - @SuppressWarnings("checkstyle:ParameterNumber") - private void processFilesUnderDir( - SnapshotInfo previousSnapshot, - SnapshotInfo previousToPrevSnapshot, - long volumeId, - OmBucketInfo bucketInfo, - OmDirectoryInfo parentInfo, - OMMetadataManager metadataManager, - Table snapRenamedTable, - Table previousKeyTable, - Table prevRenamedTable, - Table previousToPrevKeyTable) - throws IOException { - String seekFileInDB = metadataManager.getOzonePathKey(volumeId, - bucketInfo.getObjectID(), - parentInfo.getObjectID(), ""); - List blocksForKeyDelete = new ArrayList<>(); - - Table fileTable = metadataManager.getFileTable(); - try (TableIterator> - iterator = fileTable.iterator(seekFileInDB)) { - - while (iterator.hasNext()) { - Table.KeyValue entry = iterator.next(); - OmKeyInfo fileInfo = entry.getValue(); - if (!OMFileRequest.isImmediateChild(fileInfo.getParentObjectID(), - parentInfo.getObjectID())) { - break; - } - - String ozoneDeletePathKey = metadataManager - .getOzoneDeletePathKey(fileInfo.getObjectID(), entry.getKey()); - if (isKeyReclaimable(previousKeyTable, snapRenamedTable, - fileInfo, bucketInfo, volumeId, null)) { - for (OmKeyLocationInfoGroup keyLocations : - fileInfo.getKeyLocationVersions()) { - List item = keyLocations.getLocationList().stream() - .map(b -> new BlockID(b.getContainerID(), b.getLocalID())) - .collect(Collectors.toList()); - BlockGroup keyBlocks = BlockGroup.newBuilder() - .setKeyName(ozoneDeletePathKey) - .addAllBlockIDs(item) - .build(); - blocksForKeyDelete.add(keyBlocks); - } - // TODO: Add Retry mechanism. - getScmClient().deleteKeyBlocks(blocksForKeyDelete); - } else if (previousSnapshot != null) { - calculateExclusiveSize(previousSnapshot, previousToPrevSnapshot, - fileInfo, bucketInfo, volumeId, snapRenamedTable, - previousKeyTable, prevRenamedTable, previousToPrevKeyTable, - exclusiveSizeMap, exclusiveReplicatedSizeMap); - } - } - } - } - - private void updateDeepCleanSnapshotDir(String snapshotKeyTable) { - ClientId clientId = ClientId.randomId(); - SetSnapshotPropertyRequest setSnapshotPropertyRequest = - SetSnapshotPropertyRequest.newBuilder() - .setSnapshotKey(snapshotKeyTable) - .setDeepCleanedDeletedDir(true) - .build(); - - OMRequest omRequest = OMRequest.newBuilder() - .setCmdType(Type.SetSnapshotProperty) - .setSetSnapshotPropertyRequest(setSnapshotPropertyRequest) - .setClientId(clientId.toString()) - .build(); - - submitRequest(omRequest, clientId); - } - - public void submitRequest(OMRequest omRequest, ClientId clientId) { - try { - OzoneManagerRatisUtils.submitRequest(getOzoneManager(), omRequest, clientId, getRunCount().get()); - } catch (ServiceException e) { - LOG.error("Snapshot deep cleaning request failed. " + - "Will retry at next run.", e); - } - } - - /** - * Stack node data for directory deep clean for snapshot. - */ - private static class StackNode { - private String dirKey; - private OmDirectoryInfo dirValue; - private String subDirSeek; - - public String getDirKey() { - return dirKey; - } - - public void setDirKey(String dirKey) { - this.dirKey = dirKey; - } - - public OmDirectoryInfo getDirValue() { - return dirValue; - } - - public void setDirValue(OmDirectoryInfo dirValue) { - this.dirValue = dirValue; - } - - public String getSubDirSeek() { - return subDirSeek; - } - - public void setSubDirSeek(String subDirSeek) { - this.subDirSeek = subDirSeek; - } - - @Override - public String toString() { - return "StackNode{" + - "dirKey='" + dirKey + '\'' + - ", dirObjectId=" + dirValue.getObjectID() + - ", subDirSeek='" + subDirSeek + '\'' + - '}'; - } - } -} From 034585bc936a6015a5e1df5e73af942524310c8a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 05:33:39 -0400 Subject: [PATCH 06/35] HDDS-13170. Reclaimable filter should always reclaim entries when buckets and volumes have already been deleted Change-Id: I16dc9d8f00686320b4e98fa5691420294a7f1e2f --- .../om/snapshot/filter/ReclaimableFilter.java | 21 +++++++++---- .../filter/AbstractReclaimableFilterTest.java | 27 ++++++++++++----- .../filter/TestReclaimableDirFilter.java | 2 +- .../filter/TestReclaimableFilter.java | 30 +++++++++++++++++++ .../filter/TestReclaimableKeyFilter.java | 2 +- .../TestReclaimableRenameEntryFilter.java | 2 +- 6 files changed, 68 insertions(+), 16 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java index 0bb53e628032..5dc78e708fcb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/filter/ReclaimableFilter.java @@ -33,6 +33,7 @@ import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.SnapshotChainManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; @@ -167,11 +168,21 @@ private void initializePreviousSnapshotsFromChain(String volume, String bucket) previousOmSnapshots.add(null); previousSnapshotInfos.add(null); } - - // NOTE: Getting volumeId and bucket from active OM. - // This would be wrong on volume & bucket renames support. - bucketInfo = ozoneManager.getBucketInfo(volume, bucket); + } + // NOTE: Getting volumeId and bucket from active OM. + // This would be wrong on volume & bucket renames support. + try { + bucketInfo = ozoneManager.getBucketManager().getBucketInfo(volume, bucket); volumeId = ozoneManager.getMetadataManager().getVolumeId(volume); + } catch (OMException e) { + // If Volume or bucket has been deleted then all keys should be reclaimable as no snapshots would exist. + if (OMException.ResultCodes.VOLUME_NOT_FOUND == e.getResult() || + OMException.ResultCodes.BUCKET_NOT_FOUND == e.getResult()) { + bucketInfo = null; + volumeId = null; + return; + } + throw e; } } catch (IOException e) { this.cleanup(); @@ -187,7 +198,7 @@ public synchronized Boolean apply(Table.KeyValue keyValue) throws IOE if (!validateExistingLastNSnapshotsInChain(volume, bucket) || !snapshotIdLocks.isLockAcquired()) { initializePreviousSnapshotsFromChain(volume, bucket); } - boolean isReclaimable = isReclaimable(keyValue); + boolean isReclaimable = (bucketInfo == null) || isReclaimable(keyValue); // This is to ensure the reclamation ran on the same previous snapshot and no change occurred in the chain // while processing the entry. return isReclaimable && validateExistingLastNSnapshotsInChain(volume, bucket); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/AbstractReclaimableFilterTest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/AbstractReclaimableFilterTest.java index fc7a53422c50..4f0205a0e15a 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/AbstractReclaimableFilterTest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/AbstractReclaimableFilterTest.java @@ -48,12 +48,14 @@ import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; +import org.apache.hadoop.ozone.om.BucketManager; import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.SnapshotChainManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; @@ -159,19 +161,28 @@ protected void teardown() throws IOException { private void mockOzoneManager(BucketLayout bucketLayout) throws IOException { OMMetadataManager metadataManager = mock(OMMetadataManager.class); + BucketManager bucketManager = mock(BucketManager.class); when(ozoneManager.getMetadataManager()).thenReturn(metadataManager); + when(ozoneManager.getBucketManager()).thenReturn(bucketManager); long volumeCount = 0; - long bucketCount = 0; for (String volume : volumes) { when(metadataManager.getVolumeId(eq(volume))).thenReturn(volumeCount); - for (String bucket : buckets) { - when(ozoneManager.getBucketInfo(eq(volume), eq(bucket))) - .thenReturn(OmBucketInfo.newBuilder().setVolumeName(volume).setBucketName(bucket) - .setObjectID(bucketCount).setBucketLayout(bucketLayout).build()); - bucketCount++; - } volumeCount++; } + + when(bucketManager.getBucketInfo(anyString(), anyString())).thenAnswer(i -> { + String volume = i.getArgument(0, String.class); + String bucket = i.getArgument(1, String.class); + if (!volumes.contains(volume)) { + throw new OMException("Volume " + volume + " already exists", OMException.ResultCodes.VOLUME_NOT_FOUND); + } + if (!buckets.contains(bucket)) { + throw new OMException("Bucket " + bucket + " already exists", OMException.ResultCodes.BUCKET_NOT_FOUND); + } + return OmBucketInfo.newBuilder().setVolumeName(volume).setBucketName(bucket) + .setObjectID((long) volumes.indexOf(volume) * buckets.size() + buckets.indexOf(bucket)) + .setBucketLayout(bucketLayout).build(); + }); } private void mockOmSnapshotManager(OzoneManager om) throws RocksDBException, IOException { @@ -232,7 +243,7 @@ private void mockOmSnapshotManager(OzoneManager om) throws RocksDBException, IOE protected List getLastSnapshotInfos( String volume, String bucket, int numberOfSnapshotsInChain, int index) { - List infos = getSnapshotInfos().get(getKey(volume, bucket)); + List infos = getSnapshotInfos().getOrDefault(getKey(volume, bucket), Collections.emptyList()); int endIndex = Math.min(index - 1, infos.size() - 1); return IntStream.range(endIndex - numberOfSnapshotsInChain + 1, endIndex + 1).mapToObj(i -> i >= 0 ? infos.get(i) : null).collect(Collectors.toList()); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableDirFilter.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableDirFilter.java index a85da9900a03..c2fcfa30b097 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableDirFilter.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableDirFilter.java @@ -72,7 +72,7 @@ private void testReclaimableDirFilter(String volume, String bucket, int index, List snapshotInfos = getLastSnapshotInfos(volume, bucket, 1, index); assertEquals(snapshotInfos.size(), 1); SnapshotInfo prevSnapshotInfo = snapshotInfos.get(0); - OmBucketInfo bucketInfo = getOzoneManager().getBucketInfo(volume, bucket); + OmBucketInfo bucketInfo = getOzoneManager().getBucketManager().getBucketInfo(volume, bucket); long volumeId = getOzoneManager().getMetadataManager().getVolumeId(volume); KeyManager keyManager = getKeyManager(); if (prevSnapshotInfo != null) { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableFilter.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableFilter.java index 2b986f8fb32a..7b50cff3f388 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableFilter.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableFilter.java @@ -130,6 +130,36 @@ public void testReclaimableFilterSnapshotChainInitialization( false); } + @ParameterizedTest + @MethodSource("testReclaimableFilterArguments") + public void testReclaimableFilterSnapshotChainInitializationWithInvalidVolume( + int numberOfPreviousSnapshotsFromChain, int actualNumberOfSnapshots) + throws IOException, RocksDBException { + SnapshotInfo currentSnapshotInfo = + setup(numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots, actualNumberOfSnapshots + 1, 4, 2); + String volume = "volume" + 6; + String bucket = getBuckets().get(1); + testSnapshotInitAndLocking(volume, bucket, numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots + 1, + currentSnapshotInfo, true, true); + testSnapshotInitAndLocking(volume, bucket, numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots + 1, + currentSnapshotInfo, false, true); + } + + @ParameterizedTest + @MethodSource("testReclaimableFilterArguments") + public void testReclaimableFilterSnapshotChainInitializationWithInvalidBucket( + int numberOfPreviousSnapshotsFromChain, int actualNumberOfSnapshots) + throws IOException, RocksDBException { + SnapshotInfo currentSnapshotInfo = + setup(numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots, actualNumberOfSnapshots + 1, 4, 2); + String volume = getVolumes().get(3); + String bucket = "bucket" + 6; + testSnapshotInitAndLocking(volume, bucket, numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots + 1, + currentSnapshotInfo, true, true); + testSnapshotInitAndLocking(volume, bucket, numberOfPreviousSnapshotsFromChain, actualNumberOfSnapshots + 1, + currentSnapshotInfo, false, true); + } + @ParameterizedTest @MethodSource("testReclaimableFilterArguments") public void testReclaimableFilterWithBucketVolumeMismatch( diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableKeyFilter.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableKeyFilter.java index 9db680c18f97..5e781ddfec17 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableKeyFilter.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableKeyFilter.java @@ -101,7 +101,7 @@ private void testReclaimableKeyFilter(String volume, String bucket, int index, List snapshotInfos = getLastSnapshotInfos(volume, bucket, 2, index); SnapshotInfo previousToPreviousSapshotInfo = snapshotInfos.get(0); SnapshotInfo prevSnapshotInfo = snapshotInfos.get(1); - OmBucketInfo bucketInfo = getOzoneManager().getBucketInfo(volume, bucket); + OmBucketInfo bucketInfo = getOzoneManager().getBucketManager().getBucketInfo(volume, bucket); long volumeId = getOzoneManager().getMetadataManager().getVolumeId(volume); UncheckedAutoCloseableSupplier prevSnap = Optional.ofNullable(prevSnapshotInfo) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableRenameEntryFilter.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableRenameEntryFilter.java index 4fad10f248f7..59f4cf0ca02e 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableRenameEntryFilter.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/filter/TestReclaimableRenameEntryFilter.java @@ -80,7 +80,7 @@ private void testReclaimableRenameEntryFilter(String volume, String bucket, int throws IOException { List snapshotInfos = getLastSnapshotInfos(volume, bucket, 1, index); SnapshotInfo prevSnapshotInfo = snapshotInfos.get(0); - OmBucketInfo bucketInfo = getOzoneManager().getBucketInfo(volume, bucket); + OmBucketInfo bucketInfo = getOzoneManager().getBucketManager().getBucketInfo(volume, bucket); if (prevSnapshotInfo != null) { UncheckedAutoCloseableSupplier prevSnap = Optional.ofNullable(prevSnapshotInfo) .map(info -> { From 7eb2b98102aec4b9e53e7954764c0d2c467e64b6 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 05:37:08 -0400 Subject: [PATCH 07/35] HDDS-13160. Fix tests Change-Id: Ie5fd1406bbb8af3a9ba76440dcba9b8d8db14691 --- .../TestDirectoryDeletingServiceWithFSO.java | 14 +-- .../service/AbstractKeyDeletingService.java | 1 + .../om/service/DirectoryDeletingService.java | 104 ++++++++++-------- 3 files changed, 64 insertions(+), 55 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index a39aaf565ff7..4eceacf918d8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -281,7 +281,7 @@ public void testDeleteWithLargeSubPathsThanBatchSize() throws Exception { long elapsedRunCount = dirDeletingService.getRunCount().get() - preRunCount; assertThat(dirDeletingService.getRunCount().get()).isGreaterThan(1); // Ensure dir deleting speed, here provide a backup value for safe CI - assertThat(elapsedRunCount).isGreaterThanOrEqualTo(7); + GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() - preRunCount >= 7, 1000, 100000); } @Test @@ -653,8 +653,8 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() } }, 1000, 10000); return i.callRealMethod(); - }).when(omSnapshotManager).getSnapshot(ArgumentMatchers.eq(testVolumeName), ArgumentMatchers.eq(testBucketName), - ArgumentMatchers.eq(snap1)); + }).when(omSnapshotManager).getActiveSnapshot(ArgumentMatchers.eq(testVolumeName), + ArgumentMatchers.eq(testBucketName), ArgumentMatchers.eq(snap1)); assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 1); service.runPeriodicalTaskNow(); service.runPeriodicalTaskNow(); @@ -731,7 +731,6 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { DirectoryDeletingService dirDeletingService = (DirectoryDeletingService) cluster.getOzoneManager().getKeyManager() .getDirDeletingService(); - // After delete. 5 more files left out under the root dir assertTableRowCount(keyTable, 5); assertTableRowCount(dirTable, 5); @@ -751,14 +750,13 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { assertSubPathsCount(dirDeletingService::getMovedFilesCount, 0); assertSubPathsCount(dirDeletingService::getMovedDirsCount, 0); assertSubPathsCount(dirDeletingService::getDeletedDirsCount, 0); - // Case-2) Delete dir fs.delete(root, true); // After delete. 5 sub files are still in keyTable. // 4 dirs in dirTable. assertTableRowCount(keyTable, 5); - assertTableRowCount(dirTable, 4); + assertTableRowCount(dirTable, 0); // KeyDeletingService and DirectoryDeletingService will not // clean up because the paths are part of a snapshot. @@ -766,7 +764,7 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { // remain in dirTable and keyTable respectively. long prevDDSRunCount = dirDeletingService.getRunCount().get(); long prevKDSRunCount = keyDeletingService.getRunCount().get(); - assertTableRowCount(deletedDirTable, 1); + assertTableRowCount(deletedDirTable, 5); assertTableRowCount(deletedKeyTable, 3); GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() > prevDDSRunCount, 100, 10000); @@ -774,7 +772,7 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { prevKDSRunCount, 100, 10000); assertSubPathsCount(dirDeletingService::getMovedFilesCount, 0); - assertSubPathsCount(dirDeletingService::getMovedDirsCount, 0); + assertSubPathsCount(dirDeletingService::getMovedDirsCount, 4); assertSubPathsCount(dirDeletingService::getDeletedDirsCount, 0); // Manual cleanup deletedDirTable for next tests diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index ee699e16c31d..b7b536b2a36b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -384,6 +384,7 @@ public void optimizeDirDeletesAndSubmitRequest( int consumedSize = 0; while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { try { + LOG.info("Subdir deleting request: {}", subDirRecursiveCnt); Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), stringOmKeyInfoPair.getValue())); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index a31d268e016d..a29a4cbb8d4a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.service; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -213,15 +214,11 @@ public int getPriority() { } private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequestUpdatingExclusiveSize( - Map exclusiveSizeMap, Map exclusiveReplicatedSizeMap, UUID snapshotID) { + long exclusiveSize, long exclusiveReplicatedSize, UUID snapshotID) { OzoneManagerProtocolProtos.SnapshotSize snapshotSize = OzoneManagerProtocolProtos.SnapshotSize.newBuilder() - .setExclusiveSize( - exclusiveSizeMap.getOrDefault(snapshotID, 0L)) - .setExclusiveReplicatedSize( - exclusiveReplicatedSizeMap.getOrDefault( - snapshotID, 0L)) + .setExclusiveSize(exclusiveSize) + .setExclusiveReplicatedSize(exclusiveReplicatedSize) .build(); - return OzoneManagerProtocolProtos.SetSnapshotPropertyRequest.newBuilder() .setSnapshotKey(snapshotChainManager.getTableKey(snapshotID)) .setSnapshotSizeDeltaFromDirDeepCleaning(snapshotSize) @@ -235,7 +232,7 @@ private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequ */ private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, long remainingBufLimit, long rnCnt) throws IOException, ExecutionException, InterruptedException { - String volume, bucket, snapshotTableKey; + String volume, bucket; String snapshotTableKey; if (currentSnapshotInfo != null) { volume = currentSnapshotInfo.getVolumeName(); bucket = currentSnapshotInfo.getBucketName(); @@ -244,47 +241,39 @@ private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyMan volume = null; bucket = null; snapshotTableKey = null; } - OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); - IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); - try (DeletedDirSupplier dirSupplier = new DeletedDirSupplier(currentSnapshotInfo == null ? - keyManager.getDeletedDirEntries() : keyManager.getDeletedDirEntries(volume, bucket)); - ReclaimableDirFilter reclaimableDirFilter = new ReclaimableDirFilter(getOzoneManager(), - omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock); - ReclaimableKeyFilter reclaimableFileFilter = new ReclaimableKeyFilter(getOzoneManager(), - omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock)) { + keyManager.getDeletedDirEntries() : keyManager.getDeletedDirEntries(volume, bucket))) { // This is to avoid race condition b/w purge request and snapshot chain update. For AOS taking the global // snapshotId since AOS could process multiple buckets in one iteration. While using path // previous snapshotId for a snapshot since it would process only one bucket. UUID expectedPreviousSnapshotId = currentSnapshotInfo == null ? snapshotChainManager.getLatestGlobalSnapshotId() : SnapshotUtils.getPreviousSnapshotId(currentSnapshotInfo, snapshotChainManager); + Map> exclusiveSizeMap = Maps.newConcurrentMap(); + CompletableFuture processedAllDeletedDirs = CompletableFuture.completedFuture(true); for (int i = 0; i < numberOfParallelThreadsPerStore; i++) { - CompletableFuture future = new CompletableFuture<>(); - deletionThreadPool.submit(() -> { + CompletableFuture future = CompletableFuture.supplyAsync(() -> { try { - boolean processedAll = processDeletedDirectories(snapshotTableKey, dirSupplier, remainingBufLimit, - reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, rnCnt); - future.complete(processedAll); + return processDeletedDirectories(currentSnapshotInfo, keyManager, dirSupplier, remainingBufLimit, + expectedPreviousSnapshotId, exclusiveSizeMap, rnCnt); } catch (Throwable e) { - future.complete(false); + return false; } - }); + }, deletionThreadPool); processedAllDeletedDirs = future.thenCombine(future, (a, b) -> a && b); } // If AOS or all directories have been processed for snapshot, update snapshot size delta and deep clean flag // if it is a snapshot. if (processedAllDeletedDirs.get()) { List setSnapshotPropertyRequests = new ArrayList<>(); - Map exclusiveReplicatedSizeMap = reclaimableFileFilter.getExclusiveReplicatedSizeMap(); - Map exclusiveSizeMap = reclaimableFileFilter.getExclusiveSizeMap(); - List previousPathSnapshotsInChain = - Stream.of(exclusiveSizeMap.keySet(), exclusiveReplicatedSizeMap.keySet()) - .flatMap(Collection::stream).distinct().collect(Collectors.toList()); - for (UUID snapshot : previousPathSnapshotsInChain) { - setSnapshotPropertyRequests.add(getSetSnapshotRequestUpdatingExclusiveSize(exclusiveSizeMap, - exclusiveReplicatedSizeMap, snapshot)); + + for (Map.Entry> entry : exclusiveSizeMap.entrySet()) { + UUID snapshotID = entry.getKey(); + long exclusiveSize = entry.getValue().getLeft(); + long exclusiveReplicatedSize = entry.getValue().getRight(); + setSnapshotPropertyRequests.add(getSetSnapshotRequestUpdatingExclusiveSize( + exclusiveSize, exclusiveReplicatedSize, snapshotID)); } // Updating directory deep clean flag of snapshot. @@ -300,24 +289,30 @@ private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyMan } /** - * Processes the directories marked as deleted and performs reclamation if applicable. - * This includes preparing and submitting requests to delete directories and their - * subdirectories/files while respecting buffer limits and snapshot constraints. + * Processes deleted directories for snapshot management, determining whether + * directories and files can be purged, and calculates exclusive size mappings + * for snapshots. * - * @param snapshotTableKey the key of the snapshot table to which the operation applies - * @param dirSupplier thread safe supplier to fetch the next directory marked as deleted. - * @param remainingBufLimit the limit for the remaining buffer size available for processing - * @param reclaimableDirFilter filter to determine whether a directory is reclaimable - * @param reclaimableFileFilter filter to determine whether a file is reclaimable - * @param expectedPreviousSnapshotId UUID of the expected previous snapshot in the snapshot chain - * @param runCount the current run count of the deletion process - * @return true if no purge requests were submitted (indicating no deletions processed), - * false otherwise + * @param currentSnapshotInfo Information about the current snapshot whose deleted directories are being processed. + * @param keyManager Key manager of the underlying storage system to handle key operations. + * @param dirSupplier Supplier for fetching pending deleted directories to be processed. + * @param remainingBufLimit Remaining buffer limit for processing directories and files. + * @param expectedPreviousSnapshotId The UUID of the previous snapshot expected in the chain. + * @param totalExclusiveSizeMap A map for storing total exclusive size and exclusive replicated size + * for each snapshot. + * @param runCount The number of times the processing task has been executed. + * @return A boolean indicating whether the processed directory list is empty. */ - private boolean processDeletedDirectories(String snapshotTableKey, - DeletedDirSupplier dirSupplier, long remainingBufLimit, ReclaimableDirFilter reclaimableDirFilter, - ReclaimableKeyFilter reclaimableFileFilter, UUID expectedPreviousSnapshotId, long runCount) { - try { + private boolean processDeletedDirectories(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, + DeletedDirSupplier dirSupplier, long remainingBufLimit, UUID expectedPreviousSnapshotId, + Map> totalExclusiveSizeMap, long runCount) { + OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); + IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); + String snapshotTableKey = currentSnapshotInfo == null ? null : currentSnapshotInfo.getTableKey(); + try (ReclaimableDirFilter reclaimableDirFilter = new ReclaimableDirFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock); + ReclaimableKeyFilter reclaimableFileFilter = new ReclaimableKeyFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock)) { long startTime = Time.monotonicNow(); long dirNum = 0L; long subDirNum = 0L; @@ -355,6 +350,21 @@ private boolean processDeletedDirectories(String snapshotTableKey, startTime, remainingBufLimit, getOzoneManager().getKeyManager(), reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, runCount); + Map exclusiveReplicatedSizeMap = reclaimableFileFilter.getExclusiveReplicatedSizeMap(); + Map exclusiveSizeMap = reclaimableFileFilter.getExclusiveSizeMap(); + List previousPathSnapshotsInChain = + Stream.of(exclusiveSizeMap.keySet(), exclusiveReplicatedSizeMap.keySet()) + .flatMap(Collection::stream).distinct().collect(Collectors.toList()); + for (UUID snapshot : previousPathSnapshotsInChain) { + totalExclusiveSizeMap.compute(snapshot, (k, v) -> { + long exclusiveSize = exclusiveSizeMap.getOrDefault(snapshot, 0L); + long exclusiveReplicatedSize = exclusiveReplicatedSizeMap.getOrDefault(snapshot, 0L); + if (v == null) { + return Pair.of(exclusiveSize, exclusiveReplicatedSize); + } + return Pair.of(v.getLeft() + exclusiveSize, v.getRight() + exclusiveReplicatedSize); + }); + } return purgePathRequestList.isEmpty(); } catch (IOException e) { From 1fe3cfcbd29654df3bf50cdb82ac90e8f5fb6339 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 09:04:34 -0400 Subject: [PATCH 08/35] HDDS-13034. Fix find bugs Change-Id: I61ef68263ff88daa0e53dfb9d7d8eb62495d226b --- .../hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index 4eceacf918d8..d7c12d0b81f4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -277,8 +277,6 @@ public void testDeleteWithLargeSubPathsThanBatchSize() throws Exception { assertEquals(18, metrics.getNumSubDirsMovedToDeletedDirTable()); assertEquals(18, metrics.getNumSubDirsSentForPurge()); - - long elapsedRunCount = dirDeletingService.getRunCount().get() - preRunCount; assertThat(dirDeletingService.getRunCount().get()).isGreaterThan(1); // Ensure dir deleting speed, here provide a backup value for safe CI GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() - preRunCount >= 7, 1000, 100000); From 99b61a2813b28e2f25399b4cabac7e06d309470a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 09:19:19 -0400 Subject: [PATCH 09/35] HDDS-13034. Fix find bugs Change-Id: I2667c6d12523f4dee7cbcf7c48c93803fe84d3d4 --- .../hadoop/ozone/om/service/AbstractKeyDeletingService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index b7b536b2a36b..ee699e16c31d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -384,7 +384,6 @@ public void optimizeDirDeletesAndSubmitRequest( int consumedSize = 0; while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { try { - LOG.info("Subdir deleting request: {}", subDirRecursiveCnt); Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), stringOmKeyInfoPair.getValue())); From 9df1494e32b169dd0feb01966491638e735c6ba3 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 11:33:24 -0400 Subject: [PATCH 10/35] HDDS-13160. Remove Snapshot directory cleaning service Change-Id: Iba8e6ad3fd3e9b997bcaaf1a80f1ea92ae59b6f9 --- .../om/service/TestRootedDDSWithFSO.java | 2 +- .../service/AbstractKeyDeletingService.java | 55 +--- .../om/service/DirectoryDeletingService.java | 308 +++++++++--------- .../ozone/om/service/KeyDeletingService.java | 109 +++---- 4 files changed, 211 insertions(+), 263 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java index 6f4e13448261..3fc7d15f2375 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRootedDDSWithFSO.java @@ -17,12 +17,12 @@ package org.apache.hadoop.ozone.om.service; -import static org.apache.hadoop.ozone.om.service.TestDirectoryDeletingServiceWithFSO.assertSubPathsCount; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY; +import static org.apache.hadoop.ozone.om.service.TestDirectoryDeletingServiceWithFSO.assertSubPathsCount; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 97f794688c49..05fb1ae9cbef 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -17,57 +17,22 @@ package org.apache.hadoop.ozone.om.service; -import static org.apache.hadoop.ozone.OzoneConsts.OBJECT_ID_RECLAIM_BLOCKS; -import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; -import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.isBlockLocationInfoSame; - import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.HddsUtils; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.utils.BackgroundService; -import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.ozone.ClientVersion; -import org.apache.hadoop.ozone.common.BlockGroup; -import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; -import org.apache.hadoop.ozone.om.DeleteKeysResult; import org.apache.hadoop.ozone.om.DeletingServiceMetrics; -import org.apache.hadoop.ozone.om.KeyManager; -import org.apache.hadoop.ozone.om.OMConfigKeys; -import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMPerformanceMetrics; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgeKeysRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; -import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.util.function.CheckedFunction; /** * Abstracts common code from KeyDeletingService and DirectoryDeletingService @@ -77,10 +42,10 @@ public abstract class AbstractKeyDeletingService extends BackgroundService implements BootstrapStateHandler { private final OzoneManager ozoneManager; - private final DeletingServiceMetrics metrics; - private final OMPerformanceMetrics perfMetrics; - private final ScmBlockLocationProtocol scmClient; - private final ClientId clientId = ClientId.randomId(); + final DeletingServiceMetrics metrics; + final OMPerformanceMetrics perfMetrics; + final ScmBlockLocationProtocol scmClient; + final ClientId clientId = ClientId.randomId(); private final AtomicLong runCount; private final AtomicLong callId; private final BootstrapStateHandler.Lock lock = @@ -99,18 +64,6 @@ public AbstractKeyDeletingService(String serviceName, long interval, this.callId = new AtomicLong(0); } - ClientId getClientId() { - return clientId; - } - - OMPerformanceMetrics getPerfMetrics() { - return perfMetrics; - } - - DeletingServiceMetrics getMetrics() { - return metrics; - } - protected OMResponse submitRequest(OMRequest omRequest) throws ServiceException { return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callId.incrementAndGet()); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 5925674978d2..50b6d93e97f8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -169,128 +169,30 @@ public void setRatisByteLimit(int ratisByteLimit) { this.ratisByteLimit = ratisByteLimit; } - private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List requests, - String snapTableKey, UUID expectedPreviousSnapshotId) { - OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = - OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); - - if (snapTableKey != null) { - purgeDirRequest.setSnapshotTableKey(snapTableKey); - } - OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = - OzoneManagerProtocolProtos.NullableUUID.newBuilder(); - if (expectedPreviousSnapshotId != null) { - expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); - } - purgeDirRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); - - purgeDirRequest.addAllDeletedPath(requests); - - OzoneManagerProtocolProtos.OMRequest omRequest = - OzoneManagerProtocolProtos.OMRequest.newBuilder() - .setCmdType(OzoneManagerProtocolProtos.Type.PurgeDirectories) - .setPurgeDirectoriesRequest(purgeDirRequest) - .setClientId(getClientId().toString()) - .build(); - - // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. - try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { - return submitRequest(omRequest); - } catch (ServiceException | InterruptedException e) { - LOG.error("PurgePaths request failed. Will retry at next run.", e); - } - return null; - } - - - private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( - final long volumeId, - final long bucketId, - final String purgeDeletedDir, - final List purgeDeletedFiles, - final List markDirsAsDeleted) { - // Put all keys to be purged in a list - PurgePathRequest.Builder purgePathsRequest = PurgePathRequest.newBuilder(); - purgePathsRequest.setVolumeId(volumeId); - purgePathsRequest.setBucketId(bucketId); - - if (purgeDeletedDir != null) { - purgePathsRequest.setDeletedDir(purgeDeletedDir); - } - - for (OmKeyInfo purgeFile : purgeDeletedFiles) { - purgePathsRequest.addDeletedSubFiles( - purgeFile.getProtobuf(true, ClientVersion.CURRENT_VERSION)); - } - - // Add these directories to deletedDirTable, so that its sub-paths will be - // traversed in next iteration to ensure cleanup all sub-children. - for (OmKeyInfo dir : markDirsAsDeleted) { - purgePathsRequest.addMarkDeletedSubDirs( - dir.getProtobuf(ClientVersion.CURRENT_VERSION)); - } - - return purgePathsRequest.build(); - } - - - protected Optional prepareDeleteDirRequest( - OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, - List> subDirList, - KeyManager keyManager, - CheckedFunction, Boolean, IOException> reclaimableFileFilter, - long remainingBufLimit) throws IOException { - // step-0: Get one pending deleted directory - if (LOG.isDebugEnabled()) { - LOG.debug("Pending deleted dir name: {}", - pendingDeletedDirInfo.getKeyName()); - } - - final String[] keys = delDirName.split(OM_KEY_PREFIX); - final long volumeId = Long.parseLong(keys[1]); - final long bucketId = Long.parseLong(keys[2]); - - // step-1: get all sub directories under the deletedDir - DeleteKeysResult subDirDeleteResult = - keyManager.getPendingDeletionSubDirs(volumeId, bucketId, - pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); - List subDirs = subDirDeleteResult.getKeysToDelete(); - remainingBufLimit -= subDirDeleteResult.getConsumedSize(); - - OMMetadataManager omMetadataManager = keyManager.getMetadataManager(); - for (OmKeyInfo dirInfo : subDirs) { - String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, - bucketId, dirInfo.getParentObjectID(), dirInfo.getFileName()); - String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( - dirInfo.getObjectID(), ozoneDbKey); - subDirList.add(Pair.of(ozoneDeleteKey, dirInfo)); - LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); - } - - // step-2: get all sub files under the deletedDir - // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. - DeleteKeysResult subFileDeleteResult = - keyManager.getPendingDeletionSubFiles(volumeId, bucketId, - pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); - List subFiles = subFileDeleteResult.getKeysToDelete(); - - if (LOG.isDebugEnabled()) { - for (OmKeyInfo fileInfo : subFiles) { - LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); + @Override + public BackgroundTaskQueue getTasks() { + BackgroundTaskQueue queue = new BackgroundTaskQueue(); + queue.add(new DirDeletingTask(this, null)); + if (deepCleanSnapshots) { + Iterator iterator = null; + try { + iterator = snapshotChainManager.iterator(true); + } catch (IOException e) { + LOG.error("Error while initializing snapshot chain iterator."); + return queue; + } + while (iterator.hasNext()) { + UUID snapshotId = iterator.next(); + queue.add(new DirDeletingTask(this, snapshotId)); } } - - // step-3: If both sub-dirs and sub-files are exhausted under a parent - // directory, only then delete the parent. - String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && - subFileDeleteResult.isProcessedKeys() ? delDirName : null; - if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { - return Optional.empty(); - } - return Optional.of(wrapPurgeRequest(volumeId, bucketId, - purgeDeletedDir, subFiles, subDirs)); + return queue; } + @Override + public void shutdown() { + super.shutdown(); + } @SuppressWarnings("checkstyle:ParameterNumber") void optimizeDirDeletesAndSubmitRequest( @@ -300,7 +202,7 @@ void optimizeDirDeletesAndSubmitRequest( String snapTableKey, long startTime, long remainingBufLimit, KeyManager keyManager, CheckedFunction, Boolean, IOException> reclaimableDirChecker, - CheckedFunction, Boolean, IOException> reclaimableFileChecker, + CheckedFunction, Boolean, IOException> reclaimableFileChecker, UUID expectedPreviousSnapshotId, long rnCnt) { // Optimization to handle delete sub-dir and keys to remove quickly @@ -352,8 +254,29 @@ void optimizeDirDeletesAndSubmitRequest( " totalRunCount: {}", dirNum, subdirDelNum, subFileNum, (subDirNum - subdirDelNum), timeTakenInIteration, rnCnt); - getMetrics().incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); - getPerfMetrics().setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); + metrics.incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); + perfMetrics.setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); + } + } + + private static final class DeletedDirSupplier implements Closeable { + private TableIterator> + deleteTableIterator; + + private DeletedDirSupplier(TableIterator> deleteTableIterator) { + this.deleteTableIterator = deleteTableIterator; + } + + private synchronized Table.KeyValue get() { + if (deleteTableIterator.hasNext()) { + return deleteTableIterator.next(); + } + return null; + } + + @Override + public void close() { + IOUtils.closeQuietly(deleteTableIterator); } } @@ -388,53 +311,128 @@ public long getMovedFilesCount() { return movedFilesCount.get(); } + private Optional prepareDeleteDirRequest( + OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, + List> subDirList, + KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableFileFilter, + long remainingBufLimit) throws IOException { + // step-0: Get one pending deleted directory + if (LOG.isDebugEnabled()) { + LOG.debug("Pending deleted dir name: {}", + pendingDeletedDirInfo.getKeyName()); + } - @Override - public BackgroundTaskQueue getTasks() { - BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new DirDeletingTask(this, null)); - if (deepCleanSnapshots) { - Iterator iterator = null; - try { - iterator = snapshotChainManager.iterator(true); - } catch (IOException e) { - LOG.error("Error while initializing snapshot chain iterator."); - return queue; - } - while (iterator.hasNext()) { - UUID snapshotId = iterator.next(); - queue.add(new DirDeletingTask(this, snapshotId)); + final String[] keys = delDirName.split(OM_KEY_PREFIX); + final long volumeId = Long.parseLong(keys[1]); + final long bucketId = Long.parseLong(keys[2]); + + // step-1: get all sub directories under the deletedDir + DeleteKeysResult subDirDeleteResult = + keyManager.getPendingDeletionSubDirs(volumeId, bucketId, + pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); + List subDirs = subDirDeleteResult.getKeysToDelete(); + remainingBufLimit -= subDirDeleteResult.getConsumedSize(); + + OMMetadataManager omMetadataManager = keyManager.getMetadataManager(); + for (OmKeyInfo dirInfo : subDirs) { + String ozoneDbKey = omMetadataManager.getOzonePathKey(volumeId, + bucketId, dirInfo.getParentObjectID(), dirInfo.getFileName()); + String ozoneDeleteKey = omMetadataManager.getOzoneDeletePathKey( + dirInfo.getObjectID(), ozoneDbKey); + subDirList.add(Pair.of(ozoneDeleteKey, dirInfo)); + LOG.debug("Moved sub dir name: {}", dirInfo.getKeyName()); + } + + // step-2: get all sub files under the deletedDir + // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. + DeleteKeysResult subFileDeleteResult = + keyManager.getPendingDeletionSubFiles(volumeId, bucketId, + pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); + List subFiles = subFileDeleteResult.getKeysToDelete(); + + if (LOG.isDebugEnabled()) { + for (OmKeyInfo fileInfo : subFiles) { + LOG.debug("Moved sub file name: {}", fileInfo.getKeyName()); } } - return queue; - } - @Override - public void shutdown() { - super.shutdown(); + // step-3: If both sub-dirs and sub-files are exhausted under a parent + // directory, only then delete the parent. + String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && + subFileDeleteResult.isProcessedKeys() ? delDirName : null; + if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { + return Optional.empty(); + } + return Optional.of(wrapPurgeRequest(volumeId, bucketId, + purgeDeletedDir, subFiles, subDirs)); } - private static final class DeletedDirSupplier implements Closeable { - private TableIterator> - deleteTableIterator; + private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( + final long volumeId, + final long bucketId, + final String purgeDeletedDir, + final List purgeDeletedFiles, + final List markDirsAsDeleted) { + // Put all keys to be purged in a list + PurgePathRequest.Builder purgePathsRequest = PurgePathRequest.newBuilder(); + purgePathsRequest.setVolumeId(volumeId); + purgePathsRequest.setBucketId(bucketId); - private DeletedDirSupplier(TableIterator> deleteTableIterator) { - this.deleteTableIterator = deleteTableIterator; + if (purgeDeletedDir != null) { + purgePathsRequest.setDeletedDir(purgeDeletedDir); } - private synchronized Table.KeyValue get() { - if (deleteTableIterator.hasNext()) { - return deleteTableIterator.next(); - } - return null; + for (OmKeyInfo purgeFile : purgeDeletedFiles) { + purgePathsRequest.addDeletedSubFiles( + purgeFile.getProtobuf(true, ClientVersion.CURRENT_VERSION)); } - @Override - public void close() { - IOUtils.closeQuietly(deleteTableIterator); + // Add these directories to deletedDirTable, so that its sub-paths will be + // traversed in next iteration to ensure cleanup all sub-children. + for (OmKeyInfo dir : markDirsAsDeleted) { + purgePathsRequest.addMarkDeletedSubDirs( + dir.getProtobuf(ClientVersion.CURRENT_VERSION)); } + + return purgePathsRequest.build(); } + private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List requests, + String snapTableKey, UUID expectedPreviousSnapshotId) { + OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = + OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); + + if (snapTableKey != null) { + purgeDirRequest.setSnapshotTableKey(snapTableKey); + } + OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = + OzoneManagerProtocolProtos.NullableUUID.newBuilder(); + if (expectedPreviousSnapshotId != null) { + expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); + } + purgeDirRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); + + purgeDirRequest.addAllDeletedPath(requests); + + OzoneManagerProtocolProtos.OMRequest omRequest = + OzoneManagerProtocolProtos.OMRequest.newBuilder() + .setCmdType(OzoneManagerProtocolProtos.Type.PurgeDirectories) + .setPurgeDirectoriesRequest(purgeDirRequest) + .setClientId(clientId.toString()) + .build(); + + // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. + try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { + return submitRequest(omRequest); + } catch (ServiceException | InterruptedException e) { + LOG.error("PurgePaths request failed. Will retry at next run.", e); + } + return null; + } + + + private final class DirDeletingTask implements BackgroundTask { private final DirectoryDeletingService directoryDeletingService; private final UUID snapshotId; @@ -581,7 +579,7 @@ private boolean processDeletedDirectories(SnapshotInfo currentSnapshotInfo, KeyM subFileNum += purgePathRequest.getDeletedSubFilesCount(); } - optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, + directoryDeletingService.optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, subFileNum, allSubDirList, purgePathRequestList, snapshotTableKey, startTime, remainingBufLimit, getOzoneManager().getKeyManager(), reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index acf9767e3653..4920a4e4f26f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -123,62 +123,9 @@ public boolean isRunningOnAOS() { return isRunningOnAOS.get(); } - @Override - public BackgroundTaskQueue getTasks() { - BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new KeyDeletingTask(this, null)); - if (deepCleanSnapshots) { - Iterator iterator = null; - try { - iterator = snapshotChainManager.iterator(true); - } catch (IOException e) { - LOG.error("Error while initializing snapshot chain iterator."); - return queue; - } - while (iterator.hasNext()) { - UUID snapshotId = iterator.next(); - queue.add(new KeyDeletingTask(this, snapshotId)); - } - } - return queue; - } - - private boolean shouldRun() { - if (getOzoneManager() == null) { - // OzoneManager can be null for testing - return true; - } - return !suspended.get() && getOzoneManager().isLeaderReady(); - } - - /** - * Suspend the service. - */ - @VisibleForTesting - public void suspend() { - suspended.set(true); - } - - /** - * Resume the service if suspended. - */ - @VisibleForTesting - public void resume() { - suspended.set(false); - } - - public int getKeyLimitPerTask() { - return keyLimitPerTask; - } - - public void setKeyLimitPerTask(int keyLimitPerTask) { - this.keyLimitPerTask = keyLimitPerTask; - } - Pair processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { - long startTime = Time.monotonicNow(); Pair purgeResult = Pair.of(0, false); if (LOG.isDebugEnabled()) { @@ -193,7 +140,7 @@ Pair processKeyDeletes(List keyBlocksList, keyBlocksList.size(), logSize, keyBlocksList.subList(0, logSize)); } List blockDeletionResults = - getScmClient().deleteKeyBlocks(keyBlocksList); + scmClient.deleteKeyBlocks(keyBlocksList); LOG.info("{} BlockGroup deletion are acked by SCM in {} ms", keyBlocksList.size(), Time.monotonicNow() - startTime); if (blockDeletionResults != null) { @@ -205,7 +152,7 @@ Pair processKeyDeletes(List keyBlocksList, LOG.info("Blocks for {} (out of {}) keys are deleted from DB in {} ms. Limit per task is {}.", purgeResult, blockDeletionResults.size(), Time.monotonicNow() - purgeStartTime, limit); } - getPerfMetrics().setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); + perfMetrics.setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); return purgeResult; } @@ -295,7 +242,7 @@ private Pair submitPurgeKeysRequest(List submitPurgeKeysRequest(List iterator = null; + try { + iterator = snapshotChainManager.iterator(true); + } catch (IOException e) { + LOG.error("Error while initializing snapshot chain iterator."); + return queue; + } + while (iterator.hasNext()) { + UUID snapshotId = iterator.next(); + queue.add(new KeyDeletingTask(this, snapshotId)); + } + } + return queue; + } + private boolean shouldRun() { + if (getOzoneManager() == null) { + // OzoneManager can be null for testing + return true; + } + return !suspended.get() && getOzoneManager().isLeaderReady(); + } + + /** + * Suspend the service. + */ + @VisibleForTesting + public void suspend() { + suspended.set(true); + } + + /** + * Resume the service if suspended. + */ + @VisibleForTesting + public void resume() { + suspended.set(false); + } + + public int getKeyLimitPerTask() { + return keyLimitPerTask; + } + + public void setKeyLimitPerTask(int keyLimitPerTask) { + this.keyLimitPerTask = keyLimitPerTask; + } /** * A key deleting task scans OM DB and looking for a certain number of From effdf71dd57f8d2128564dce229ffc01d71c0db5 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 11:43:56 -0400 Subject: [PATCH 11/35] HDDS-13160. Fix checkstyle Change-Id: I2e5e5a95079dbda192b5305f18e4b29b0eeb620d --- .../service/AbstractKeyDeletingService.java | 23 +++++++++++-------- .../om/service/DirectoryDeletingService.java | 10 ++++---- .../ozone/om/service/KeyDeletingService.java | 22 +++++++++--------- .../om/service/SnapshotDeletingService.java | 2 +- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 05fb1ae9cbef..e84de3574666 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.DeletingServiceMetrics; @@ -42,10 +41,9 @@ public abstract class AbstractKeyDeletingService extends BackgroundService implements BootstrapStateHandler { private final OzoneManager ozoneManager; - final DeletingServiceMetrics metrics; - final OMPerformanceMetrics perfMetrics; - final ScmBlockLocationProtocol scmClient; - final ClientId clientId = ClientId.randomId(); + private final DeletingServiceMetrics metrics; + private final OMPerformanceMetrics perfMetrics; + private final ClientId clientId = ClientId.randomId(); private final AtomicLong runCount; private final AtomicLong callId; private final BootstrapStateHandler.Lock lock = @@ -53,11 +51,10 @@ public abstract class AbstractKeyDeletingService extends BackgroundService public AbstractKeyDeletingService(String serviceName, long interval, TimeUnit unit, int threadPoolSize, long serviceTimeout, - OzoneManager ozoneManager, ScmBlockLocationProtocol scmClient) { + OzoneManager ozoneManager) { super(serviceName, interval, unit, threadPoolSize, serviceTimeout, ozoneManager.getThreadNamePrefix()); this.ozoneManager = ozoneManager; - this.scmClient = scmClient; this.runCount = new AtomicLong(0); this.metrics = ozoneManager.getDeletionMetrics(); this.perfMetrics = ozoneManager.getPerfMetrics(); @@ -77,8 +74,16 @@ public OzoneManager getOzoneManager() { return ozoneManager; } - public ScmBlockLocationProtocol getScmClient() { - return scmClient; + ClientId getClientId() { + return clientId; + } + + DeletingServiceMetrics getMetrics() { + return metrics; + } + + OMPerformanceMetrics getPerfMetrics() { + return perfMetrics; } /** diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 50b6d93e97f8..252301b934ea 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -117,7 +117,7 @@ public DirectoryDeletingService(long interval, TimeUnit unit, long serviceTimeout, OzoneManager ozoneManager, OzoneConfiguration configuration, int dirDeletingServiceCorePoolSize, boolean deepCleanSnapshots) { super(DirectoryDeletingService.class.getSimpleName(), interval, unit, - dirDeletingServiceCorePoolSize, serviceTimeout, ozoneManager, null); + dirDeletingServiceCorePoolSize, serviceTimeout, ozoneManager); int limit = (int) configuration.getStorageSize( OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT, @@ -254,8 +254,8 @@ void optimizeDirDeletesAndSubmitRequest( " totalRunCount: {}", dirNum, subdirDelNum, subFileNum, (subDirNum - subdirDelNum), timeTakenInIteration, rnCnt); - metrics.incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); - perfMetrics.setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); + getMetrics().incrementDirectoryDeletionTotalMetrics(dirNum + subdirDelNum, subDirNum, subFileNum); + getPerfMetrics().setDirectoryDeletingServiceLatencyMs(timeTakenInIteration); } } @@ -419,7 +419,7 @@ private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List= 0, @@ -106,7 +107,7 @@ public KeyDeletingService(OzoneManager ozoneManager, this.isRunningOnAOS = new AtomicBoolean(false); this.deepCleanSnapshots = deepCleanSnapshots; this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); - this.metrics = ozoneManager.getDeletionMetrics(); + this.scmClient = scmClient; } /** @@ -152,7 +153,7 @@ Pair processKeyDeletes(List keyBlocksList, LOG.info("Blocks for {} (out of {}) keys are deleted from DB in {} ms. Limit per task is {}.", purgeResult, blockDeletionResults.size(), Time.monotonicNow() - purgeStartTime, limit); } - perfMetrics.setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); + getPerfMetrics().setKeyDeletingServiceLatencyMs(Time.monotonicNow() - startTime); return purgeResult; } @@ -196,12 +197,11 @@ private Pair submitPurgeKeysRequest(List submitPurgeKeysRequest(List Date: Tue, 3 Jun 2025 11:56:08 -0400 Subject: [PATCH 12/35] HDDS-13034. deprecate config instead of removing Change-Id: I5ed93af3b5ae794b0cfe4671ec2a851592edcb8c --- .../org/apache/hadoop/ozone/om/OMConfigKeys.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 242ae03f0ccb..748d5f7d6c95 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -387,6 +387,18 @@ public final class OMConfigKeys { */ public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = false; + @Deprecated + public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = + "ozone.snapshot.directory.service.interval"; + @Deprecated + public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT + = "24h"; + @Deprecated + public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT = + "ozone.snapshot.directory.service.timeout"; + @Deprecated + public static final String + OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT = "300s"; public static final String OZONE_THREAD_NUMBER_DIR_DELETION = "ozone.thread.number.dir.deletion"; From 806111cb895ffcef7dec145cbd6be43fe99c91ef Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 12:08:52 -0400 Subject: [PATCH 13/35] HDDS-13160. Fix function access Change-Id: Ifda5cd552ad23086cec7163d2a7983ad1dd5f5c4 --- .../hadoop/ozone/om/service/DirectoryDeletingService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 252301b934ea..92516487625a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -577,7 +577,7 @@ private boolean processDeletedDirectories(SnapshotInfo currentSnapshotInfo, KeyM subFileNum += purgePathRequest.getDeletedSubFilesCount(); } - directoryDeletingService.optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, + optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, subFileNum, allSubDirList, purgePathRequestList, snapshotTableKey, startTime, remainingBufLimit, getOzoneManager().getKeyManager(), reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, From 4867d7d7105443e95992535dbdcd16b36003b05a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 3 Jun 2025 12:58:11 -0400 Subject: [PATCH 14/35] HDDS-13035. Snapshot Deleting Service should hold write locks while purging deleted snapshots Change-Id: I7f0c12c53f8838e652624bb993a6c8414ef638c7 --- .../TestDirectoryDeletingServiceWithFSO.java | 2 +- ...napshotDeletingServiceIntegrationTest.java | 248 ------------------ .../om/service/DirectoryDeletingService.java | 22 +- .../ozone/om/service/KeyDeletingService.java | 22 +- .../om/service/SnapshotDeletingService.java | 46 ++-- .../om/service/TestKeyDeletingService.java | 6 +- 6 files changed, 28 insertions(+), 318 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index ccc1bc6a1ced..d510872a5847 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -574,7 +574,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() DirectoryDeletingService dirDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); // Suspend KeyDeletingService dirDeletingService.suspend(); - GenericTestUtils.waitFor(() -> !dirDeletingService.isRunningOnAOS(), 1000, 10000); + Thread.sleep(1000); Random random = new Random(); final String testVolumeName = "volume" + random.nextInt(); final String testBucketName = "bucket" + random.nextInt(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 73fe9b007ac6..1b69e51e9969 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -25,31 +25,17 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.when; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Random; import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -import org.apache.commons.compress.utils.Lists; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.IOUtils; @@ -61,34 +47,27 @@ import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; -import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; -import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestMethodOrder; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -517,233 +496,6 @@ public void testSnapshotWithFSO() throws Exception { rcSnap1.close(); } - private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean dirDeletionWaitStarted, - AtomicBoolean dirDeletionStarted) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getDirDeletingService().shutdown(); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - GenericTestUtils.waitFor(() -> om.getKeyManager().getDirDeletingService().getThreadCount() == 0, 1000, - 100000); - DirectoryDeletingService directoryDeletingService = Mockito.spy(new DirectoryDeletingService(10000, - TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1, false)); - directoryDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> directoryDeletingService.getThreadCount() == 0, 1000, - 100000); - doAnswer(i -> { - // Wait for SDS to reach DDS wait block before processing any deleted directories. - GenericTestUtils.waitFor(dirDeletionWaitStarted::get, 1000, 100000); - dirDeletionStarted.set(true); - return i.callRealMethod(); - }).when(keyManager).getDeletedDirEntries(); - return directoryDeletingService; - } - - private KeyDeletingService getMockedKeyDeletingService(AtomicBoolean keyDeletionWaitStarted, - AtomicBoolean keyDeletionStarted) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getDeletingService().shutdown(); - GenericTestUtils.waitFor(() -> om.getKeyManager().getDeletingService().getThreadCount() == 0, 1000, - 100000); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - KeyDeletingService keyDeletingService = Mockito.spy(new KeyDeletingService(ozoneManager, - ozoneManager.getScmClient().getBlockClient(), 10000, - 100000, cluster.getConf(), 10, false)); - keyDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> keyDeletingService.getThreadCount() == 0, 1000, - 100000); - when(keyManager.getPendingDeletionKeys(any(), anyInt())).thenAnswer(i -> { - // wait for SDS to reach the KDS wait block before processing any key. - GenericTestUtils.waitFor(keyDeletionWaitStarted::get, 1000, 100000); - keyDeletionStarted.set(true); - return i.callRealMethod(); - }); - return keyDeletingService; - } - - @SuppressWarnings("checkstyle:parameternumber") - private SnapshotDeletingService getMockedSnapshotDeletingService(KeyDeletingService keyDeletingService, - DirectoryDeletingService directoryDeletingService, - AtomicBoolean snapshotDeletionStarted, - AtomicBoolean keyDeletionWaitStarted, - AtomicBoolean dirDeletionWaitStarted, - AtomicBoolean keyDeletionStarted, - AtomicBoolean dirDeletionStarted, - OzoneBucket testBucket) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getSnapshotDeletingService().shutdown(); - GenericTestUtils.waitFor(() -> om.getKeyManager().getSnapshotDeletingService().getThreadCount() == 0, 1000, - 100000); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - OmMetadataManagerImpl omMetadataManager = Mockito.spy((OmMetadataManagerImpl)om.getMetadataManager()); - SnapshotChainManager unMockedSnapshotChainManager = - ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(); - SnapshotChainManager snapshotChainManager = Mockito.spy(unMockedSnapshotChainManager); - OmSnapshotManager omSnapshotManager = Mockito.spy(om.getOmSnapshotManager()); - when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager); - when(omMetadataManager.getSnapshotChainManager()).thenReturn(snapshotChainManager); - when(keyManager.getDeletingService()).thenReturn(keyDeletingService); - when(keyManager.getDirDeletingService()).thenReturn(directoryDeletingService); - SnapshotDeletingService snapshotDeletingService = Mockito.spy(new SnapshotDeletingService(10000, - 100000, ozoneManager)); - snapshotDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> snapshotDeletingService.getThreadCount() == 0, 1000, - 100000); - when(snapshotChainManager.iterator(anyBoolean())).thenAnswer(i -> { - Iterator itr = (Iterator) i.callRealMethod(); - return Lists.newArrayList(itr).stream().filter(uuid -> { - try { - SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(om, snapshotChainManager, uuid); - return snapshotInfo.getBucketName().equals(testBucket.getName()) && - snapshotInfo.getVolumeName().equals(testBucket.getVolumeName()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }).iterator(); - }); - when(snapshotChainManager.getLatestGlobalSnapshotId()) - .thenAnswer(i -> unMockedSnapshotChainManager.getLatestGlobalSnapshotId()); - when(snapshotChainManager.getOldestGlobalSnapshotId()) - .thenAnswer(i -> unMockedSnapshotChainManager.getOldestGlobalSnapshotId()); - doAnswer(i -> { - // KDS wait block reached in SDS. - GenericTestUtils.waitFor(() -> { - return keyDeletingService.isRunningOnAOS(); - }, 1000, 100000); - keyDeletionWaitStarted.set(true); - return i.callRealMethod(); - }).when(snapshotDeletingService).waitForKeyDeletingService(); - doAnswer(i -> { - // DDS wait block reached in SDS. - GenericTestUtils.waitFor(directoryDeletingService::isRunningOnAOS, 1000, 100000); - dirDeletionWaitStarted.set(true); - return i.callRealMethod(); - }).when(snapshotDeletingService).waitForDirDeletingService(); - doAnswer(i -> { - // Assert KDS & DDS is not running when SDS starts moving entries & assert all wait block, KDS processing - // AOS block & DDS AOS block have been executed. - Assertions.assertTrue(keyDeletionWaitStarted.get()); - Assertions.assertTrue(dirDeletionWaitStarted.get()); - Assertions.assertTrue(keyDeletionStarted.get()); - Assertions.assertTrue(dirDeletionStarted.get()); - Assertions.assertFalse(keyDeletingService.isRunningOnAOS()); - Assertions.assertFalse(directoryDeletingService.isRunningOnAOS()); - snapshotDeletionStarted.set(true); - return i.callRealMethod(); - }).when(omSnapshotManager).getSnapshot(anyString(), anyString(), anyString()); - return snapshotDeletingService; - } - - @Test - @Order(4) - @Flaky("HDDS-11847") - public void testParallelExcecutionOfKeyDeletionAndSnapshotDeletion() throws Exception { - AtomicBoolean keyDeletionWaitStarted = new AtomicBoolean(false); - AtomicBoolean dirDeletionWaitStarted = new AtomicBoolean(false); - AtomicBoolean keyDeletionStarted = new AtomicBoolean(false); - AtomicBoolean dirDeletionStarted = new AtomicBoolean(false); - AtomicBoolean snapshotDeletionStarted = new AtomicBoolean(false); - Random random = new Random(); - String bucketName = "bucket" + random.nextInt(); - BucketArgs bucketArgs = new BucketArgs.Builder() - .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) - .build(); - OzoneBucket testBucket = TestDataUtil.createBucket( - client, VOLUME_NAME, bucketArgs, bucketName); - // mock keyDeletingService - KeyDeletingService keyDeletingService = getMockedKeyDeletingService(keyDeletionWaitStarted, keyDeletionStarted); - - // mock dirDeletingService - DirectoryDeletingService directoryDeletingService = getMockedDirectoryDeletingService(dirDeletionWaitStarted, - dirDeletionStarted); - - // mock snapshotDeletingService. - SnapshotDeletingService snapshotDeletingService = getMockedSnapshotDeletingService(keyDeletingService, - directoryDeletingService, snapshotDeletionStarted, keyDeletionWaitStarted, dirDeletionWaitStarted, - keyDeletionStarted, dirDeletionStarted, testBucket); - createSnapshotFSODataForBucket(testBucket); - List> renamesKeyEntries; - List>> deletedKeyEntries; - List> deletedDirEntries; - try (UncheckedAutoCloseableSupplier snapshot = - om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), - testBucket.getName() + "snap2")) { - renamesKeyEntries = snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - deletedKeyEntries = snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - deletedDirEntries = snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000); - } - Thread keyDeletingThread = new Thread(() -> { - try { - keyDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - Thread directoryDeletingThread = new Thread(() -> { - try { - directoryDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - ExecutorService snapshotDeletingThread = Executors.newFixedThreadPool(1); - Runnable snapshotDeletionRunnable = () -> { - try { - snapshotDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }; - keyDeletingThread.start(); - directoryDeletingThread.start(); - Future future = snapshotDeletingThread.submit(snapshotDeletionRunnable); - GenericTestUtils.waitFor(snapshotDeletionStarted::get, 1000, 30000); - future.get(); - try (UncheckedAutoCloseableSupplier snapshot = - om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), - testBucket.getName() + "snap2")) { - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000)); - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000)); - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000)); - } - List> aosRenamesKeyEntries = - om.getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - List>> aosDeletedKeyEntries = - om.getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - List> aosDeletedDirEntries = - om.getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000); - renamesKeyEntries.forEach(entry -> Assertions.assertTrue(aosRenamesKeyEntries.contains(entry))); - deletedKeyEntries.forEach(entry -> Assertions.assertTrue(aosDeletedKeyEntries.contains(entry))); - deletedDirEntries.forEach(entry -> Assertions.assertTrue(aosDeletedDirEntries.contains(entry))); - Mockito.reset(snapshotDeletingService); - SnapshotInfo snap2 = SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), - testBucket.getName(), testBucket.getName() + "snap2"); - Assertions.assertEquals(snap2.getSnapshotStatus(), SnapshotInfo.SnapshotStatus.SNAPSHOT_DELETED); - future = snapshotDeletingThread.submit(snapshotDeletionRunnable); - future.get(); - Assertions.assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), - testBucket.getName(), testBucket.getName() + "snap2")); - cluster.restartOzoneManager(); - } - /* Flow ---- diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 92516487625a..6ff66f772ce1 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -104,7 +104,6 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // and send deletion requests. private int ratisByteLimit; private final AtomicBoolean suspended; - private final AtomicBoolean isRunningOnAOS; private final SnapshotChainManager snapshotChainManager; private final boolean deepCleanSnapshots; private final ExecutorService deletionThreadPool; @@ -129,7 +128,6 @@ public DirectoryDeletingService(long interval, TimeUnit unit, // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); this.suspended = new AtomicBoolean(false); - this.isRunningOnAOS = new AtomicBoolean(false); this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.deepCleanSnapshots = deepCleanSnapshots; this.deletedDirsCount = new AtomicLong(0); @@ -145,10 +143,6 @@ private boolean shouldRun() { return getOzoneManager().isLeaderReady() && !suspended.get(); } - public boolean isRunningOnAOS() { - return isRunningOnAOS.get(); - } - /** * Suspend the service. */ @@ -172,7 +166,7 @@ public void setRatisByteLimit(int ratisByteLimit) { @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new DirDeletingTask(this, null)); + queue.add(new DirDeletingTask(null)); if (deepCleanSnapshots) { Iterator iterator = null; try { @@ -183,7 +177,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new DirDeletingTask(this, snapshotId)); + queue.add(new DirDeletingTask(snapshotId)); } } return queue; @@ -432,11 +426,9 @@ private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { @@ -262,7 +256,7 @@ private Pair submitPurgeKeysRequest(List iterator = null; try { @@ -273,7 +267,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new KeyDeletingTask(this, snapshotId)); + queue.add(new KeyDeletingTask(snapshotId)); } } return queue; @@ -320,11 +314,9 @@ public void setKeyLimitPerTask(int keyLimitPerTask) { */ @VisibleForTesting final class KeyDeletingTask implements BackgroundTask { - private final KeyDeletingService deletingService; private final UUID snapshotId; - KeyDeletingTask(KeyDeletingService service, UUID snapshotId) { - this.deletingService = service; + KeyDeletingTask(UUID snapshotId) { this.snapshotId = snapshotId; } @@ -457,7 +449,6 @@ public BackgroundTaskResult call() { final long run = getRunCount().incrementAndGet(); if (snapshotId == null) { LOG.debug("Running KeyDeletingService for active object store, {}", run); - isRunningOnAOS.set(true); } else { LOG.debug("Running KeyDeletingService for snapshot : {}, {}", snapshotId, run); } @@ -494,13 +485,6 @@ public BackgroundTaskResult call() { } catch (IOException e) { LOG.error("Error while running delete files background task for store {}. Will retry at next run.", snapInfo, e); - } finally { - if (snapshotId == null) { - isRunningOnAOS.set(false); - synchronized (deletingService) { - this.deletingService.notify(); - } - } } } // By design, no one cares about the results of this call back. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index 5b27fe2d6f85..c97716f41207 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -22,14 +22,17 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT; +import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.FlatResource.SNAPSHOT_GC_LOCK; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -53,6 +56,8 @@ import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; +import org.apache.hadoop.ozone.om.snapshot.MultiSnapshotLocks; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; @@ -88,7 +93,7 @@ public class SnapshotDeletingService extends AbstractKeyDeletingService { private final int keyLimitPerTask; private final int snapshotDeletionPerTask; private final int ratisByteLimit; - private final long serviceTimeout; + private MultiSnapshotLocks snapshotIdLocks; public SnapshotDeletingService(long interval, long serviceTimeout, OzoneManager ozoneManager) @@ -115,32 +120,8 @@ public SnapshotDeletingService(long interval, long serviceTimeout, this.keyLimitPerTask = conf.getInt( OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); - this.serviceTimeout = serviceTimeout; - } - - // Wait for a notification from KeyDeletingService if the key deletion is running. This is to ensure, merging of - // entries do not start while the AOS is still processing the deleted keys. - @VisibleForTesting - public void waitForKeyDeletingService() throws InterruptedException { - KeyDeletingService keyDeletingService = getOzoneManager().getKeyManager().getDeletingService(); - synchronized (keyDeletingService) { - while (keyDeletingService.isRunningOnAOS()) { - keyDeletingService.wait(serviceTimeout); - } - } - } - - // Wait for a notification from DirectoryDeletingService if the directory deletion is running. This is to ensure, - // merging of entries do not start while the AOS is still processing the deleted keys. - @VisibleForTesting - public void waitForDirDeletingService() throws InterruptedException { - DirectoryDeletingService directoryDeletingService = getOzoneManager().getKeyManager() - .getDirDeletingService(); - synchronized (directoryDeletingService) { - while (directoryDeletingService.isRunningOnAOS()) { - directoryDeletingService.wait(serviceTimeout); - } - } + IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); + this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true); } private class SnapshotDeletingTask implements BackgroundTask { @@ -177,11 +158,16 @@ public BackgroundTaskResult call() throws InterruptedException { continue; } + // Acquire write lock on current snapshot and next snapshot in chain. + if (!snapshotIdLocks.acquireLock(Arrays.asList(snapInfo.getSnapshotId(), + Optional.ofNullable(nextSnapshot).map(SnapshotInfo::getSnapshotId).orElse(null))) + .isLockAcquired()) { + continue; + } + // nextSnapshot = null means entries would be moved to AOS. if (nextSnapshot == null) { LOG.info("Snapshot: {} entries will be moved to AOS.", snapInfo.getTableKey()); - waitForKeyDeletingService(); - waitForDirDeletingService(); } else { LOG.info("Snapshot: {} entries will be moved to next active snapshot: {}", snapInfo.getTableKey(), nextSnapshot.getTableKey()); @@ -232,6 +218,8 @@ public BackgroundTaskResult call() throws InterruptedException { } else { snapshotsToBePurged.add(snapInfo.getTableKey()); } + } finally { + snapshotIdLocks.releaseLock(); } successRunCount.incrementAndGet(); snapshotLimit--; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 42e76377e14d..76af8ee37559 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -169,6 +169,7 @@ private void createConfig(File testDir) { private void createSubject() throws Exception { OmTestManagers omTestManagers = new OmTestManagers(conf, scmBlockTestingClient, null); keyManager = omTestManagers.getKeyManager(); + keyDeletingService = keyManager.getDeletingService(); directoryDeletingService = keyManager.getDirDeletingService(); writeClient = omTestManagers.getWriteClient(); @@ -345,7 +346,8 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() keyDeletingService.suspend(); SnapshotDeletingService snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); snapshotDeletingService.suspend(); - GenericTestUtils.waitFor(() -> !keyDeletingService.isRunningOnAOS(), 1000, 10000); + Thread.sleep(1000); + final String volumeName = getTestName(); final String bucketName = uniqueObjectName("bucket"); OzoneManager ozoneManager = Mockito.spy(om); @@ -620,7 +622,7 @@ public void testKeyDeletingServiceWithDeepCleanedSnapshots() throws Exception { when(kds.getTasks()).thenAnswer(i -> { BackgroundTaskQueue queue = new BackgroundTaskQueue(); for (UUID id : snapshotIds) { - queue.add(kds.new KeyDeletingTask(kds, id)); + queue.add(kds.new KeyDeletingTask(id)); } return queue; }); From 2db371d77d50de707c176dc31cb8dcf3d83dd2e1 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 09:01:12 -0400 Subject: [PATCH 15/35] HDDS-13034. refactor test case Change-Id: Iac3af98a7e568a135073b6704a6ad5a5fac7b427 --- .../om/snapshot/TestSnapshotDirectoryCleaningService.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index f854448b1679..f57fc37536a0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -140,7 +140,7 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { cluster.getOzoneManager().getMetadataManager().getDeletedTable(); Table snapshotInfoTable = cluster.getOzoneManager().getMetadataManager().getSnapshotInfoTable(); - DirectoryDeletingService snapshotDirectoryCleaningService = + DirectoryDeletingService directoryDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); /* DirTable @@ -220,8 +220,8 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { fs.delete(root, true); assertTableRowCount(deletedKeyTable, 10); client.getObjectStore().createSnapshot(volumeName, bucketName, "snap3"); - long prevRunCount = snapshotDirectoryCleaningService.getRunCount().get(); - GenericTestUtils.waitFor(() -> snapshotDirectoryCleaningService.getRunCount().get() + long prevRunCount = directoryDeletingService.getRunCount().get(); + GenericTestUtils.waitFor(() -> directoryDeletingService.getRunCount().get() > prevRunCount + 1, 100, 10000); Thread.sleep(2000); From 464e3214557fb11b9092fa53befd1af760ab4308 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 11:00:35 -0400 Subject: [PATCH 16/35] HDDS-13034. Fix test case Change-Id: I9b7b41cf667e03d48120a4201757e445227924f7 --- .../TestSnapshotDirectoryCleaningService.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index f57fc37536a0..73ca77454541 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -24,6 +24,8 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import com.google.common.collect.ImmutableMap; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -44,6 +46,8 @@ import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -142,6 +146,8 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { cluster.getOzoneManager().getMetadataManager().getSnapshotInfoTable(); DirectoryDeletingService directoryDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); + SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl)cluster.getOzoneManager().getMetadataManager()) + .getSnapshotChainManager(); /* DirTable /v/b/snapDir @@ -223,8 +229,6 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { long prevRunCount = directoryDeletingService.getRunCount().get(); GenericTestUtils.waitFor(() -> directoryDeletingService.getRunCount().get() > prevRunCount + 1, 100, 10000); - - Thread.sleep(2000); Map expectedSize = new HashMap() {{ // /v/b/snapDir/appRoot0/parentDir0-2/childFile contribute // exclusive size, /v/b/snapDir/appRoot0/parentDir0-2/childFile0-4 @@ -234,11 +238,22 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { put("snap2", 5L); put("snap3", 0L); }}; + try (TableIterator> iterator = snapshotInfoTable.iterator()) { while (iterator.hasNext()) { Table.KeyValue snapshotEntry = iterator.next(); String snapshotName = snapshotEntry.getValue().getName(); + + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo nextSnapshot = SnapshotUtils.getNextSnapshot(cluster.getOzoneManager(), snapshotChainManager, + snapshotEntry.getValue()); + return nextSnapshot == null || (nextSnapshot.isDeepCleanedDeletedDir() && nextSnapshot.isDeepCleaned()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 10000); SnapshotInfo snapshotInfo = snapshotInfoTable.get(snapshotEntry.getKey()); assertEquals(expectedSize.get(snapshotName), snapshotInfo.getExclusiveSize() + snapshotInfo.getExclusiveSizeDeltaFromDirDeepCleaning()); From 0ecec316a59405685bd92871d4e90163ccc68898 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 12:33:24 -0400 Subject: [PATCH 17/35] HDDS-13034. Fix test case Change-Id: I2ff1cf3ecf3baa00a5c5646901f6c9ffdbe6e370 --- .../ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index 73ca77454541..80a88871590b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -24,7 +24,6 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import com.google.common.collect.ImmutableMap; import java.io.IOException; import java.util.HashMap; import java.util.Map; From 1e9896bab43a82adc5a2ee34ce8a3131fbf8802c Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 20:24:27 -0400 Subject: [PATCH 18/35] HDDS-13160. Suspend should wait Change-Id: I28997dbafe0f6eba2fc02d00310e55e7c33b0dda --- .../hadoop/hdds/utils/BackgroundService.java | 10 ++++-- .../service/AbstractKeyDeletingService.java | 29 +++++++++++++++++ .../om/service/DirectoryDeletingService.java | 32 ++----------------- .../ozone/om/service/KeyDeletingService.java | 27 ---------------- .../om/service/SnapshotDeletingService.java | 23 ------------- 5 files changed, 40 insertions(+), 81 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java index 959bee8d8c5f..c3557980cc72 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java @@ -48,6 +48,7 @@ public abstract class BackgroundService { private final long serviceTimeoutInNanos; private final TimeUnit unit; private final PeriodicalTask service; + private CompletableFuture future; public BackgroundService(String serviceName, long interval, TimeUnit unit, int threadPoolSize, long serviceTimeout) { @@ -71,6 +72,11 @@ public BackgroundService(String serviceName, long interval, exec = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool( threadPoolSize, threadFactory); service = new PeriodicalTask(); + this.future = CompletableFuture.completedFuture(null); + } + + protected CompletableFuture getFuture() { + return future; } @VisibleForTesting @@ -131,7 +137,7 @@ public synchronized void run() { while (!tasks.isEmpty()) { BackgroundTask task = tasks.poll(); - CompletableFuture.runAsync(() -> { + future = future.thenCombine(CompletableFuture.runAsync(() -> { long startTime = System.nanoTime(); try { BackgroundTaskResult result = task.call(); @@ -150,7 +156,7 @@ public synchronized void run() { serviceName, endTime - startTime, serviceTimeoutInNanos); } } - }, exec); + }, exec), (Void1, Void) -> null); } } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index e84de3574666..3ee50ffd04f4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -20,7 +20,9 @@ import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; import java.util.List; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; @@ -46,6 +48,7 @@ public abstract class AbstractKeyDeletingService extends BackgroundService private final ClientId clientId = ClientId.randomId(); private final AtomicLong runCount; private final AtomicLong callId; + private final AtomicBoolean suspended; private final BootstrapStateHandler.Lock lock = new BootstrapStateHandler.Lock(); @@ -59,12 +62,38 @@ public AbstractKeyDeletingService(String serviceName, long interval, this.metrics = ozoneManager.getDeletionMetrics(); this.perfMetrics = ozoneManager.getPerfMetrics(); this.callId = new AtomicLong(0); + this.suspended = new AtomicBoolean(false); } protected OMResponse submitRequest(OMRequest omRequest) throws ServiceException { return OzoneManagerRatisUtils.submitRequest(ozoneManager, omRequest, clientId, callId.incrementAndGet()); } + final boolean shouldRun() { + if (getOzoneManager() == null) { + // OzoneManager can be null for testing + return true; + } + return !suspended.get() && getOzoneManager().isLeaderReady(); + } + + /** + * Suspend the service. + */ + @VisibleForTesting + public void suspend() throws ExecutionException, InterruptedException { + suspended.set(true); + getFuture().get(); + } + + /** + * Resume the service if suspended. + */ + @VisibleForTesting + public void resume() { + suspended.set(false); + } + protected boolean isBufferLimitCrossed( int maxLimit, int cLimit, int increment) { return cLimit + increment >= maxLimit; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 6ff66f772ce1..d7cfc32aa6cb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -37,7 +37,6 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -103,7 +102,6 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // from parent directory info from deleted directory table concurrently // and send deletion requests. private int ratisByteLimit; - private final AtomicBoolean suspended; private final SnapshotChainManager snapshotChainManager; private final boolean deepCleanSnapshots; private final ExecutorService deletionThreadPool; @@ -127,7 +125,6 @@ public DirectoryDeletingService(long interval, TimeUnit unit, // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); - this.suspended = new AtomicBoolean(false); this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.deepCleanSnapshots = deepCleanSnapshots; this.deletedDirsCount = new AtomicLong(0); @@ -135,30 +132,6 @@ public DirectoryDeletingService(long interval, TimeUnit unit, this.movedFilesCount = new AtomicLong(0); } - private boolean shouldRun() { - if (getOzoneManager() == null) { - // OzoneManager can be null for testing - return true; - } - return getOzoneManager().isLeaderReady() && !suspended.get(); - } - - /** - * Suspend the service. - */ - @VisibleForTesting - public void suspend() { - suspended.set(true); - } - - /** - * Resume the service if suspended. - */ - @VisibleForTesting - public void resume() { - suspended.set(false); - } - public void setRatisByteLimit(int ratisByteLimit) { this.ratisByteLimit = ratisByteLimit; } @@ -425,10 +398,11 @@ private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List= 0, OZONE_KEY_DELETING_LIMIT_PER_TASK + " cannot be negative."); this.deletedKeyCount = new AtomicLong(0); - this.suspended = new AtomicBoolean(false); this.deepCleanSnapshots = deepCleanSnapshots; this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.scmClient = scmClient; @@ -273,30 +270,6 @@ public BackgroundTaskQueue getTasks() { return queue; } - private boolean shouldRun() { - if (getOzoneManager() == null) { - // OzoneManager can be null for testing - return true; - } - return !suspended.get() && getOzoneManager().isLeaderReady(); - } - - /** - * Suspend the service. - */ - @VisibleForTesting - public void suspend() { - suspended.set(true); - } - - /** - * Resume the service if suspended. - */ - @VisibleForTesting - public void resume() { - suspended.set(false); - } - public int getKeyLimitPerTask() { return keyLimitPerTask; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index c97716f41207..6d9b3cf787a3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -35,7 +35,6 @@ import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -87,7 +86,6 @@ public class SnapshotDeletingService extends AbstractKeyDeletingService { private final OzoneManager ozoneManager; private final OmSnapshotManager omSnapshotManager; private final SnapshotChainManager chainManager; - private final AtomicBoolean suspended; private final OzoneConfiguration conf; private final AtomicLong successRunCount; private final int keyLimitPerTask; @@ -107,7 +105,6 @@ public SnapshotDeletingService(long interval, long serviceTimeout, ozoneManager.getMetadataManager(); this.chainManager = omMetadataManager.getSnapshotChainManager(); this.successRunCount = new AtomicLong(0); - this.suspended = new AtomicBoolean(false); this.conf = ozoneManager.getConfiguration(); this.snapshotDeletionPerTask = conf.getInt(SNAPSHOT_DELETING_LIMIT_PER_TASK, SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT); @@ -321,26 +318,6 @@ public BackgroundTaskQueue getTasks() { return queue; } - private boolean shouldRun() { - return !suspended.get() && ozoneManager.isLeaderReady(); - } - - /** - * Suspend the service. - */ - @VisibleForTesting - public void suspend() { - suspended.set(true); - } - - /** - * Resume the service if suspended. - */ - @VisibleForTesting - public void resume() { - suspended.set(false); - } - public long getSuccessfulRunCount() { return successRunCount.get(); } From 968d90ad0a640c67963c3b4198d65829cc430d17 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 21:39:43 -0400 Subject: [PATCH 19/35] HDDS-13035. Fix test Change-Id: I5ae8e13ec670d3fc639be26450f6ee21a9fd48ea --- .../ozone/om/service/TestDirectoryDeletingServiceWithFSO.java | 1 - .../hadoop/ozone/om/service/TestKeyDeletingService.java | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index d510872a5847..1cc670580dbd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -574,7 +574,6 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() DirectoryDeletingService dirDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); // Suspend KeyDeletingService dirDeletingService.suspend(); - Thread.sleep(1000); Random random = new Random(); final String testVolumeName = "volume" + random.nextInt(); final String testBucketName = "bucket" + random.nextInt(); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 76af8ee37559..b150d46f4bd4 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -52,6 +52,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; @@ -346,7 +347,6 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() keyDeletingService.suspend(); SnapshotDeletingService snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); snapshotDeletingService.suspend(); - Thread.sleep(1000); final String volumeName = getTestName(); final String bucketName = uniqueObjectName("bucket"); @@ -437,7 +437,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() @ParameterizedTest @ValueSource(booleans = {true, false}) public void testRenamedKeyReclaimation(boolean testForSnapshot) - throws IOException, InterruptedException, TimeoutException { + throws IOException, InterruptedException, TimeoutException, ExecutionException { Table snapshotInfoTable = om.getMetadataManager().getSnapshotInfoTable(); Table deletedTable = From c12a36e0d5134437a592e7a724caa37c2b9bdf77 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 21:42:33 -0400 Subject: [PATCH 20/35] HDDS-13034. Fix test Change-Id: Ibe4244ba9eac58eeb86ff4a5a65bd42b15b2a8ae --- .../om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 73fe9b007ac6..959ea686538f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -397,7 +397,7 @@ public void testSnapshotWithFSO() throws Exception { } catch (IOException e) { throw new RuntimeException(e); } - }, 2000, 100000000); + }, 2000, 100000); } om.getKeyManager().getDirDeletingService().suspend(); om.getKeyManager().getDeletingService().suspend(); From 72fc5414785fd33c507b0e071e409a928377f3b6 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 21:45:39 -0400 Subject: [PATCH 21/35] HDDS-13034. Fix test Change-Id: I2c74594424fac70e62750815b45daf3780f7d85c --- .../om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 959ea686538f..805f3f474747 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -127,7 +127,7 @@ public void setup() throws Exception { 500, TimeUnit.MILLISECONDS); conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT, - 10, TimeUnit.MILLISECONDS); + 500, TimeUnit.MILLISECONDS); conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 500); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 500, TimeUnit.MILLISECONDS); From 847dab7a02d47292d00fe868c4f39f655f3957a3 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 21:58:05 -0400 Subject: [PATCH 22/35] HDDS-13160. Fix test Change-Id: I186d410df06fd53526c4b3d0c6c9fc8f8a5e0a97 --- .../hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java index 584a426716fb..8a5e4ed81ad6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java @@ -35,6 +35,7 @@ import java.util.Objects; import java.util.UUID; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.lang3.RandomStringUtils; @@ -314,7 +315,8 @@ private void createFileKey(OzoneBucket bucket, String keyName) * and purgeSnapshot in same batch. */ @Test - public void testKeyAndSnapshotDeletionService() throws IOException, InterruptedException, TimeoutException { + public void testKeyAndSnapshotDeletionService() + throws IOException, InterruptedException, TimeoutException, ExecutionException { OzoneManager omLeader = cluster.getOMLeader(); OzoneManager omFollower; From b77c025191fa4031971d5c9f4d2d22b615929dff Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 22:21:29 -0400 Subject: [PATCH 23/35] HDDS-13160. Fix test Change-Id: I4b1da4c1291a5bce2dfb9d75a9f21f5d88f6e1cd --- .../TestDirectoryDeletingServiceWithFSO.java | 1 + ...napshotDeletingServiceIntegrationTest.java | 248 ++++++++++++++++++ .../om/service/DirectoryDeletingService.java | 23 +- .../ozone/om/service/KeyDeletingService.java | 23 +- .../om/service/SnapshotDeletingService.java | 46 ++-- .../om/service/TestKeyDeletingService.java | 4 +- 6 files changed, 320 insertions(+), 25 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index 1cc670580dbd..ccc1bc6a1ced 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -574,6 +574,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() DirectoryDeletingService dirDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); // Suspend KeyDeletingService dirDeletingService.suspend(); + GenericTestUtils.waitFor(() -> !dirDeletingService.isRunningOnAOS(), 1000, 10000); Random random = new Random(); final String testVolumeName = "volume" + random.nextInt(); final String testBucketName = "bucket" + random.nextInt(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index fca519b36b32..805f3f474747 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -25,17 +25,31 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.when; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Random; import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import org.apache.commons.compress.utils.Lists; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.IOUtils; @@ -47,27 +61,34 @@ import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; +import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; +import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestMethodOrder; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -496,6 +517,233 @@ public void testSnapshotWithFSO() throws Exception { rcSnap1.close(); } + private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean dirDeletionWaitStarted, + AtomicBoolean dirDeletionStarted) + throws InterruptedException, TimeoutException, IOException { + OzoneManager ozoneManager = Mockito.spy(om); + om.getKeyManager().getDirDeletingService().shutdown(); + KeyManager keyManager = Mockito.spy(om.getKeyManager()); + when(ozoneManager.getKeyManager()).thenReturn(keyManager); + GenericTestUtils.waitFor(() -> om.getKeyManager().getDirDeletingService().getThreadCount() == 0, 1000, + 100000); + DirectoryDeletingService directoryDeletingService = Mockito.spy(new DirectoryDeletingService(10000, + TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1, false)); + directoryDeletingService.shutdown(); + GenericTestUtils.waitFor(() -> directoryDeletingService.getThreadCount() == 0, 1000, + 100000); + doAnswer(i -> { + // Wait for SDS to reach DDS wait block before processing any deleted directories. + GenericTestUtils.waitFor(dirDeletionWaitStarted::get, 1000, 100000); + dirDeletionStarted.set(true); + return i.callRealMethod(); + }).when(keyManager).getDeletedDirEntries(); + return directoryDeletingService; + } + + private KeyDeletingService getMockedKeyDeletingService(AtomicBoolean keyDeletionWaitStarted, + AtomicBoolean keyDeletionStarted) + throws InterruptedException, TimeoutException, IOException { + OzoneManager ozoneManager = Mockito.spy(om); + om.getKeyManager().getDeletingService().shutdown(); + GenericTestUtils.waitFor(() -> om.getKeyManager().getDeletingService().getThreadCount() == 0, 1000, + 100000); + KeyManager keyManager = Mockito.spy(om.getKeyManager()); + when(ozoneManager.getKeyManager()).thenReturn(keyManager); + KeyDeletingService keyDeletingService = Mockito.spy(new KeyDeletingService(ozoneManager, + ozoneManager.getScmClient().getBlockClient(), 10000, + 100000, cluster.getConf(), 10, false)); + keyDeletingService.shutdown(); + GenericTestUtils.waitFor(() -> keyDeletingService.getThreadCount() == 0, 1000, + 100000); + when(keyManager.getPendingDeletionKeys(any(), anyInt())).thenAnswer(i -> { + // wait for SDS to reach the KDS wait block before processing any key. + GenericTestUtils.waitFor(keyDeletionWaitStarted::get, 1000, 100000); + keyDeletionStarted.set(true); + return i.callRealMethod(); + }); + return keyDeletingService; + } + + @SuppressWarnings("checkstyle:parameternumber") + private SnapshotDeletingService getMockedSnapshotDeletingService(KeyDeletingService keyDeletingService, + DirectoryDeletingService directoryDeletingService, + AtomicBoolean snapshotDeletionStarted, + AtomicBoolean keyDeletionWaitStarted, + AtomicBoolean dirDeletionWaitStarted, + AtomicBoolean keyDeletionStarted, + AtomicBoolean dirDeletionStarted, + OzoneBucket testBucket) + throws InterruptedException, TimeoutException, IOException { + OzoneManager ozoneManager = Mockito.spy(om); + om.getKeyManager().getSnapshotDeletingService().shutdown(); + GenericTestUtils.waitFor(() -> om.getKeyManager().getSnapshotDeletingService().getThreadCount() == 0, 1000, + 100000); + KeyManager keyManager = Mockito.spy(om.getKeyManager()); + OmMetadataManagerImpl omMetadataManager = Mockito.spy((OmMetadataManagerImpl)om.getMetadataManager()); + SnapshotChainManager unMockedSnapshotChainManager = + ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(); + SnapshotChainManager snapshotChainManager = Mockito.spy(unMockedSnapshotChainManager); + OmSnapshotManager omSnapshotManager = Mockito.spy(om.getOmSnapshotManager()); + when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); + when(ozoneManager.getKeyManager()).thenReturn(keyManager); + when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager); + when(omMetadataManager.getSnapshotChainManager()).thenReturn(snapshotChainManager); + when(keyManager.getDeletingService()).thenReturn(keyDeletingService); + when(keyManager.getDirDeletingService()).thenReturn(directoryDeletingService); + SnapshotDeletingService snapshotDeletingService = Mockito.spy(new SnapshotDeletingService(10000, + 100000, ozoneManager)); + snapshotDeletingService.shutdown(); + GenericTestUtils.waitFor(() -> snapshotDeletingService.getThreadCount() == 0, 1000, + 100000); + when(snapshotChainManager.iterator(anyBoolean())).thenAnswer(i -> { + Iterator itr = (Iterator) i.callRealMethod(); + return Lists.newArrayList(itr).stream().filter(uuid -> { + try { + SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(om, snapshotChainManager, uuid); + return snapshotInfo.getBucketName().equals(testBucket.getName()) && + snapshotInfo.getVolumeName().equals(testBucket.getVolumeName()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).iterator(); + }); + when(snapshotChainManager.getLatestGlobalSnapshotId()) + .thenAnswer(i -> unMockedSnapshotChainManager.getLatestGlobalSnapshotId()); + when(snapshotChainManager.getOldestGlobalSnapshotId()) + .thenAnswer(i -> unMockedSnapshotChainManager.getOldestGlobalSnapshotId()); + doAnswer(i -> { + // KDS wait block reached in SDS. + GenericTestUtils.waitFor(() -> { + return keyDeletingService.isRunningOnAOS(); + }, 1000, 100000); + keyDeletionWaitStarted.set(true); + return i.callRealMethod(); + }).when(snapshotDeletingService).waitForKeyDeletingService(); + doAnswer(i -> { + // DDS wait block reached in SDS. + GenericTestUtils.waitFor(directoryDeletingService::isRunningOnAOS, 1000, 100000); + dirDeletionWaitStarted.set(true); + return i.callRealMethod(); + }).when(snapshotDeletingService).waitForDirDeletingService(); + doAnswer(i -> { + // Assert KDS & DDS is not running when SDS starts moving entries & assert all wait block, KDS processing + // AOS block & DDS AOS block have been executed. + Assertions.assertTrue(keyDeletionWaitStarted.get()); + Assertions.assertTrue(dirDeletionWaitStarted.get()); + Assertions.assertTrue(keyDeletionStarted.get()); + Assertions.assertTrue(dirDeletionStarted.get()); + Assertions.assertFalse(keyDeletingService.isRunningOnAOS()); + Assertions.assertFalse(directoryDeletingService.isRunningOnAOS()); + snapshotDeletionStarted.set(true); + return i.callRealMethod(); + }).when(omSnapshotManager).getSnapshot(anyString(), anyString(), anyString()); + return snapshotDeletingService; + } + + @Test + @Order(4) + @Flaky("HDDS-11847") + public void testParallelExcecutionOfKeyDeletionAndSnapshotDeletion() throws Exception { + AtomicBoolean keyDeletionWaitStarted = new AtomicBoolean(false); + AtomicBoolean dirDeletionWaitStarted = new AtomicBoolean(false); + AtomicBoolean keyDeletionStarted = new AtomicBoolean(false); + AtomicBoolean dirDeletionStarted = new AtomicBoolean(false); + AtomicBoolean snapshotDeletionStarted = new AtomicBoolean(false); + Random random = new Random(); + String bucketName = "bucket" + random.nextInt(); + BucketArgs bucketArgs = new BucketArgs.Builder() + .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) + .build(); + OzoneBucket testBucket = TestDataUtil.createBucket( + client, VOLUME_NAME, bucketArgs, bucketName); + // mock keyDeletingService + KeyDeletingService keyDeletingService = getMockedKeyDeletingService(keyDeletionWaitStarted, keyDeletionStarted); + + // mock dirDeletingService + DirectoryDeletingService directoryDeletingService = getMockedDirectoryDeletingService(dirDeletionWaitStarted, + dirDeletionStarted); + + // mock snapshotDeletingService. + SnapshotDeletingService snapshotDeletingService = getMockedSnapshotDeletingService(keyDeletingService, + directoryDeletingService, snapshotDeletionStarted, keyDeletionWaitStarted, dirDeletionWaitStarted, + keyDeletionStarted, dirDeletionStarted, testBucket); + createSnapshotFSODataForBucket(testBucket); + List> renamesKeyEntries; + List>> deletedKeyEntries; + List> deletedDirEntries; + try (UncheckedAutoCloseableSupplier snapshot = + om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), + testBucket.getName() + "snap2")) { + renamesKeyEntries = snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000); + deletedKeyEntries = snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000); + deletedDirEntries = snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), + testBucket.getName(), 1000); + } + Thread keyDeletingThread = new Thread(() -> { + try { + keyDeletingService.runPeriodicalTaskNow(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + Thread directoryDeletingThread = new Thread(() -> { + try { + directoryDeletingService.runPeriodicalTaskNow(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + ExecutorService snapshotDeletingThread = Executors.newFixedThreadPool(1); + Runnable snapshotDeletionRunnable = () -> { + try { + snapshotDeletingService.runPeriodicalTaskNow(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }; + keyDeletingThread.start(); + directoryDeletingThread.start(); + Future future = snapshotDeletingThread.submit(snapshotDeletionRunnable); + GenericTestUtils.waitFor(snapshotDeletionStarted::get, 1000, 30000); + future.get(); + try (UncheckedAutoCloseableSupplier snapshot = + om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), + testBucket.getName() + "snap2")) { + Assertions.assertEquals(Collections.emptyList(), + snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000)); + Assertions.assertEquals(Collections.emptyList(), + snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000)); + Assertions.assertEquals(Collections.emptyList(), + snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), + testBucket.getName(), 1000)); + } + List> aosRenamesKeyEntries = + om.getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000); + List>> aosDeletedKeyEntries = + om.getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), + testBucket.getName(), "", (kv) -> true, 1000); + List> aosDeletedDirEntries = + om.getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), + testBucket.getName(), 1000); + renamesKeyEntries.forEach(entry -> Assertions.assertTrue(aosRenamesKeyEntries.contains(entry))); + deletedKeyEntries.forEach(entry -> Assertions.assertTrue(aosDeletedKeyEntries.contains(entry))); + deletedDirEntries.forEach(entry -> Assertions.assertTrue(aosDeletedDirEntries.contains(entry))); + Mockito.reset(snapshotDeletingService); + SnapshotInfo snap2 = SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), + testBucket.getName(), testBucket.getName() + "snap2"); + Assertions.assertEquals(snap2.getSnapshotStatus(), SnapshotInfo.SnapshotStatus.SNAPSHOT_DELETED); + future = snapshotDeletingThread.submit(snapshotDeletionRunnable); + future.get(); + Assertions.assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), + testBucket.getName(), testBucket.getName() + "snap2")); + cluster.restartOzoneManager(); + } + /* Flow ---- diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index d7cfc32aa6cb..4b0f32976455 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -37,6 +37,7 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -102,6 +103,7 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // from parent directory info from deleted directory table concurrently // and send deletion requests. private int ratisByteLimit; + private final AtomicBoolean isRunningOnAOS; private final SnapshotChainManager snapshotChainManager; private final boolean deepCleanSnapshots; private final ExecutorService deletionThreadPool; @@ -125,6 +127,7 @@ public DirectoryDeletingService(long interval, TimeUnit unit, // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); + this.isRunningOnAOS = new AtomicBoolean(false); this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.deepCleanSnapshots = deepCleanSnapshots; this.deletedDirsCount = new AtomicLong(0); @@ -136,10 +139,14 @@ public void setRatisByteLimit(int ratisByteLimit) { this.ratisByteLimit = ratisByteLimit; } + public boolean isRunningOnAOS() { + return isRunningOnAOS.get(); + } + @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new DirDeletingTask(null)); + queue.add(new DirDeletingTask(this, null)); if (deepCleanSnapshots) { Iterator iterator = null; try { @@ -150,7 +157,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new DirDeletingTask(snapshotId)); + queue.add(new DirDeletingTask(this, snapshotId)); } } return queue; @@ -400,9 +407,11 @@ private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List= 0, OZONE_KEY_DELETING_LIMIT_PER_TASK + " cannot be negative."); this.deletedKeyCount = new AtomicLong(0); + this.isRunningOnAOS = new AtomicBoolean(false); this.deepCleanSnapshots = deepCleanSnapshots; this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.scmClient = scmClient; @@ -115,6 +118,10 @@ public AtomicLong getDeletedKeyCount() { return deletedKeyCount; } + public boolean isRunningOnAOS() { + return isRunningOnAOS.get(); + } + Pair processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { @@ -253,7 +260,7 @@ private Pair submitPurgeKeysRequest(List iterator = null; try { @@ -264,7 +271,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new KeyDeletingTask(snapshotId)); + queue.add(new KeyDeletingTask(this, snapshotId)); } } return queue; @@ -287,9 +294,11 @@ public void setKeyLimitPerTask(int keyLimitPerTask) { */ @VisibleForTesting final class KeyDeletingTask implements BackgroundTask { + private final KeyDeletingService deletingService; private final UUID snapshotId; - KeyDeletingTask(UUID snapshotId) { + KeyDeletingTask(KeyDeletingService service, UUID snapshotId) { + this.deletingService = service; this.snapshotId = snapshotId; } @@ -422,6 +431,7 @@ public BackgroundTaskResult call() { final long run = getRunCount().incrementAndGet(); if (snapshotId == null) { LOG.debug("Running KeyDeletingService for active object store, {}", run); + isRunningOnAOS.set(true); } else { LOG.debug("Running KeyDeletingService for snapshot : {}, {}", snapshotId, run); } @@ -458,6 +468,13 @@ public BackgroundTaskResult call() { } catch (IOException e) { LOG.error("Error while running delete files background task for store {}. Will retry at next run.", snapInfo, e); + } finally { + if (snapshotId == null) { + isRunningOnAOS.set(false); + synchronized (deletingService) { + this.deletingService.notify(); + } + } } } // By design, no one cares about the results of this call back. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index 6d9b3cf787a3..96ae98a19b6b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -22,17 +22,14 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT; -import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.FlatResource.SNAPSHOT_GC_LOCK; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -55,8 +52,6 @@ import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; -import org.apache.hadoop.ozone.om.snapshot.MultiSnapshotLocks; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; @@ -91,7 +86,7 @@ public class SnapshotDeletingService extends AbstractKeyDeletingService { private final int keyLimitPerTask; private final int snapshotDeletionPerTask; private final int ratisByteLimit; - private MultiSnapshotLocks snapshotIdLocks; + private final long serviceTimeout; public SnapshotDeletingService(long interval, long serviceTimeout, OzoneManager ozoneManager) @@ -117,8 +112,32 @@ public SnapshotDeletingService(long interval, long serviceTimeout, this.keyLimitPerTask = conf.getInt( OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); - IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); - this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true); + this.serviceTimeout = serviceTimeout; + } + + // Wait for a notification from KeyDeletingService if the key deletion is running. This is to ensure, merging of + // entries do not start while the AOS is still processing the deleted keys. + @VisibleForTesting + public void waitForKeyDeletingService() throws InterruptedException { + KeyDeletingService keyDeletingService = getOzoneManager().getKeyManager().getDeletingService(); + synchronized (keyDeletingService) { + while (keyDeletingService.isRunningOnAOS()) { + keyDeletingService.wait(serviceTimeout); + } + } + } + + // Wait for a notification from DirectoryDeletingService if the directory deletion is running. This is to ensure, + // merging of entries do not start while the AOS is still processing the deleted keys. + @VisibleForTesting + public void waitForDirDeletingService() throws InterruptedException { + DirectoryDeletingService directoryDeletingService = getOzoneManager().getKeyManager() + .getDirDeletingService(); + synchronized (directoryDeletingService) { + while (directoryDeletingService.isRunningOnAOS()) { + directoryDeletingService.wait(serviceTimeout); + } + } } private class SnapshotDeletingTask implements BackgroundTask { @@ -155,16 +174,11 @@ public BackgroundTaskResult call() throws InterruptedException { continue; } - // Acquire write lock on current snapshot and next snapshot in chain. - if (!snapshotIdLocks.acquireLock(Arrays.asList(snapInfo.getSnapshotId(), - Optional.ofNullable(nextSnapshot).map(SnapshotInfo::getSnapshotId).orElse(null))) - .isLockAcquired()) { - continue; - } - // nextSnapshot = null means entries would be moved to AOS. if (nextSnapshot == null) { LOG.info("Snapshot: {} entries will be moved to AOS.", snapInfo.getTableKey()); + waitForKeyDeletingService(); + waitForDirDeletingService(); } else { LOG.info("Snapshot: {} entries will be moved to next active snapshot: {}", snapInfo.getTableKey(), nextSnapshot.getTableKey()); @@ -215,8 +229,6 @@ public BackgroundTaskResult call() throws InterruptedException { } else { snapshotsToBePurged.add(snapInfo.getTableKey()); } - } finally { - snapshotIdLocks.releaseLock(); } successRunCount.incrementAndGet(); snapshotLimit--; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index b150d46f4bd4..3c32a70e047c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -347,7 +347,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() keyDeletingService.suspend(); SnapshotDeletingService snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); snapshotDeletingService.suspend(); - + GenericTestUtils.waitFor(() -> !keyDeletingService.isRunningOnAOS(), 1000, 10000); final String volumeName = getTestName(); final String bucketName = uniqueObjectName("bucket"); OzoneManager ozoneManager = Mockito.spy(om); @@ -622,7 +622,7 @@ public void testKeyDeletingServiceWithDeepCleanedSnapshots() throws Exception { when(kds.getTasks()).thenAnswer(i -> { BackgroundTaskQueue queue = new BackgroundTaskQueue(); for (UUID id : snapshotIds) { - queue.add(kds.new KeyDeletingTask(id)); + queue.add(kds.new KeyDeletingTask(kds, id)); } return queue; }); From ea98dfeebaefdea2aa4a74462e53455da1ba19fb Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 5 Jun 2025 22:25:58 -0400 Subject: [PATCH 24/35] Revert "HDDS-13160. Fix test" This reverts commit b77c025191fa4031971d5c9f4d2d22b615929dff. --- .../TestDirectoryDeletingServiceWithFSO.java | 1 - ...napshotDeletingServiceIntegrationTest.java | 248 ------------------ .../om/service/DirectoryDeletingService.java | 23 +- .../ozone/om/service/KeyDeletingService.java | 23 +- .../om/service/SnapshotDeletingService.java | 46 ++-- .../om/service/TestKeyDeletingService.java | 4 +- 6 files changed, 25 insertions(+), 320 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index ccc1bc6a1ced..1cc670580dbd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -574,7 +574,6 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() DirectoryDeletingService dirDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); // Suspend KeyDeletingService dirDeletingService.suspend(); - GenericTestUtils.waitFor(() -> !dirDeletingService.isRunningOnAOS(), 1000, 10000); Random random = new Random(); final String testVolumeName = "volume" + random.nextInt(); final String testBucketName = "bucket" + random.nextInt(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 805f3f474747..fca519b36b32 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -25,31 +25,17 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.when; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Random; import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; -import org.apache.commons.compress.utils.Lists; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.utils.IOUtils; @@ -61,34 +47,27 @@ import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; -import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; -import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestMethodOrder; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -517,233 +496,6 @@ public void testSnapshotWithFSO() throws Exception { rcSnap1.close(); } - private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean dirDeletionWaitStarted, - AtomicBoolean dirDeletionStarted) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getDirDeletingService().shutdown(); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - GenericTestUtils.waitFor(() -> om.getKeyManager().getDirDeletingService().getThreadCount() == 0, 1000, - 100000); - DirectoryDeletingService directoryDeletingService = Mockito.spy(new DirectoryDeletingService(10000, - TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1, false)); - directoryDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> directoryDeletingService.getThreadCount() == 0, 1000, - 100000); - doAnswer(i -> { - // Wait for SDS to reach DDS wait block before processing any deleted directories. - GenericTestUtils.waitFor(dirDeletionWaitStarted::get, 1000, 100000); - dirDeletionStarted.set(true); - return i.callRealMethod(); - }).when(keyManager).getDeletedDirEntries(); - return directoryDeletingService; - } - - private KeyDeletingService getMockedKeyDeletingService(AtomicBoolean keyDeletionWaitStarted, - AtomicBoolean keyDeletionStarted) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getDeletingService().shutdown(); - GenericTestUtils.waitFor(() -> om.getKeyManager().getDeletingService().getThreadCount() == 0, 1000, - 100000); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - KeyDeletingService keyDeletingService = Mockito.spy(new KeyDeletingService(ozoneManager, - ozoneManager.getScmClient().getBlockClient(), 10000, - 100000, cluster.getConf(), 10, false)); - keyDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> keyDeletingService.getThreadCount() == 0, 1000, - 100000); - when(keyManager.getPendingDeletionKeys(any(), anyInt())).thenAnswer(i -> { - // wait for SDS to reach the KDS wait block before processing any key. - GenericTestUtils.waitFor(keyDeletionWaitStarted::get, 1000, 100000); - keyDeletionStarted.set(true); - return i.callRealMethod(); - }); - return keyDeletingService; - } - - @SuppressWarnings("checkstyle:parameternumber") - private SnapshotDeletingService getMockedSnapshotDeletingService(KeyDeletingService keyDeletingService, - DirectoryDeletingService directoryDeletingService, - AtomicBoolean snapshotDeletionStarted, - AtomicBoolean keyDeletionWaitStarted, - AtomicBoolean dirDeletionWaitStarted, - AtomicBoolean keyDeletionStarted, - AtomicBoolean dirDeletionStarted, - OzoneBucket testBucket) - throws InterruptedException, TimeoutException, IOException { - OzoneManager ozoneManager = Mockito.spy(om); - om.getKeyManager().getSnapshotDeletingService().shutdown(); - GenericTestUtils.waitFor(() -> om.getKeyManager().getSnapshotDeletingService().getThreadCount() == 0, 1000, - 100000); - KeyManager keyManager = Mockito.spy(om.getKeyManager()); - OmMetadataManagerImpl omMetadataManager = Mockito.spy((OmMetadataManagerImpl)om.getMetadataManager()); - SnapshotChainManager unMockedSnapshotChainManager = - ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(); - SnapshotChainManager snapshotChainManager = Mockito.spy(unMockedSnapshotChainManager); - OmSnapshotManager omSnapshotManager = Mockito.spy(om.getOmSnapshotManager()); - when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); - when(ozoneManager.getKeyManager()).thenReturn(keyManager); - when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager); - when(omMetadataManager.getSnapshotChainManager()).thenReturn(snapshotChainManager); - when(keyManager.getDeletingService()).thenReturn(keyDeletingService); - when(keyManager.getDirDeletingService()).thenReturn(directoryDeletingService); - SnapshotDeletingService snapshotDeletingService = Mockito.spy(new SnapshotDeletingService(10000, - 100000, ozoneManager)); - snapshotDeletingService.shutdown(); - GenericTestUtils.waitFor(() -> snapshotDeletingService.getThreadCount() == 0, 1000, - 100000); - when(snapshotChainManager.iterator(anyBoolean())).thenAnswer(i -> { - Iterator itr = (Iterator) i.callRealMethod(); - return Lists.newArrayList(itr).stream().filter(uuid -> { - try { - SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(om, snapshotChainManager, uuid); - return snapshotInfo.getBucketName().equals(testBucket.getName()) && - snapshotInfo.getVolumeName().equals(testBucket.getVolumeName()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }).iterator(); - }); - when(snapshotChainManager.getLatestGlobalSnapshotId()) - .thenAnswer(i -> unMockedSnapshotChainManager.getLatestGlobalSnapshotId()); - when(snapshotChainManager.getOldestGlobalSnapshotId()) - .thenAnswer(i -> unMockedSnapshotChainManager.getOldestGlobalSnapshotId()); - doAnswer(i -> { - // KDS wait block reached in SDS. - GenericTestUtils.waitFor(() -> { - return keyDeletingService.isRunningOnAOS(); - }, 1000, 100000); - keyDeletionWaitStarted.set(true); - return i.callRealMethod(); - }).when(snapshotDeletingService).waitForKeyDeletingService(); - doAnswer(i -> { - // DDS wait block reached in SDS. - GenericTestUtils.waitFor(directoryDeletingService::isRunningOnAOS, 1000, 100000); - dirDeletionWaitStarted.set(true); - return i.callRealMethod(); - }).when(snapshotDeletingService).waitForDirDeletingService(); - doAnswer(i -> { - // Assert KDS & DDS is not running when SDS starts moving entries & assert all wait block, KDS processing - // AOS block & DDS AOS block have been executed. - Assertions.assertTrue(keyDeletionWaitStarted.get()); - Assertions.assertTrue(dirDeletionWaitStarted.get()); - Assertions.assertTrue(keyDeletionStarted.get()); - Assertions.assertTrue(dirDeletionStarted.get()); - Assertions.assertFalse(keyDeletingService.isRunningOnAOS()); - Assertions.assertFalse(directoryDeletingService.isRunningOnAOS()); - snapshotDeletionStarted.set(true); - return i.callRealMethod(); - }).when(omSnapshotManager).getSnapshot(anyString(), anyString(), anyString()); - return snapshotDeletingService; - } - - @Test - @Order(4) - @Flaky("HDDS-11847") - public void testParallelExcecutionOfKeyDeletionAndSnapshotDeletion() throws Exception { - AtomicBoolean keyDeletionWaitStarted = new AtomicBoolean(false); - AtomicBoolean dirDeletionWaitStarted = new AtomicBoolean(false); - AtomicBoolean keyDeletionStarted = new AtomicBoolean(false); - AtomicBoolean dirDeletionStarted = new AtomicBoolean(false); - AtomicBoolean snapshotDeletionStarted = new AtomicBoolean(false); - Random random = new Random(); - String bucketName = "bucket" + random.nextInt(); - BucketArgs bucketArgs = new BucketArgs.Builder() - .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) - .build(); - OzoneBucket testBucket = TestDataUtil.createBucket( - client, VOLUME_NAME, bucketArgs, bucketName); - // mock keyDeletingService - KeyDeletingService keyDeletingService = getMockedKeyDeletingService(keyDeletionWaitStarted, keyDeletionStarted); - - // mock dirDeletingService - DirectoryDeletingService directoryDeletingService = getMockedDirectoryDeletingService(dirDeletionWaitStarted, - dirDeletionStarted); - - // mock snapshotDeletingService. - SnapshotDeletingService snapshotDeletingService = getMockedSnapshotDeletingService(keyDeletingService, - directoryDeletingService, snapshotDeletionStarted, keyDeletionWaitStarted, dirDeletionWaitStarted, - keyDeletionStarted, dirDeletionStarted, testBucket); - createSnapshotFSODataForBucket(testBucket); - List> renamesKeyEntries; - List>> deletedKeyEntries; - List> deletedDirEntries; - try (UncheckedAutoCloseableSupplier snapshot = - om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), - testBucket.getName() + "snap2")) { - renamesKeyEntries = snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - deletedKeyEntries = snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - deletedDirEntries = snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000); - } - Thread keyDeletingThread = new Thread(() -> { - try { - keyDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - Thread directoryDeletingThread = new Thread(() -> { - try { - directoryDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - ExecutorService snapshotDeletingThread = Executors.newFixedThreadPool(1); - Runnable snapshotDeletionRunnable = () -> { - try { - snapshotDeletingService.runPeriodicalTaskNow(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }; - keyDeletingThread.start(); - directoryDeletingThread.start(); - Future future = snapshotDeletingThread.submit(snapshotDeletionRunnable); - GenericTestUtils.waitFor(snapshotDeletionStarted::get, 1000, 30000); - future.get(); - try (UncheckedAutoCloseableSupplier snapshot = - om.getOmSnapshotManager().getSnapshot(testBucket.getVolumeName(), testBucket.getName(), - testBucket.getName() + "snap2")) { - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000)); - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000)); - Assertions.assertEquals(Collections.emptyList(), - snapshot.get().getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000)); - } - List> aosRenamesKeyEntries = - om.getKeyManager().getRenamesKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - List>> aosDeletedKeyEntries = - om.getKeyManager().getDeletedKeyEntries(testBucket.getVolumeName(), - testBucket.getName(), "", (kv) -> true, 1000); - List> aosDeletedDirEntries = - om.getKeyManager().getDeletedDirEntries(testBucket.getVolumeName(), - testBucket.getName(), 1000); - renamesKeyEntries.forEach(entry -> Assertions.assertTrue(aosRenamesKeyEntries.contains(entry))); - deletedKeyEntries.forEach(entry -> Assertions.assertTrue(aosDeletedKeyEntries.contains(entry))); - deletedDirEntries.forEach(entry -> Assertions.assertTrue(aosDeletedDirEntries.contains(entry))); - Mockito.reset(snapshotDeletingService); - SnapshotInfo snap2 = SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), - testBucket.getName(), testBucket.getName() + "snap2"); - Assertions.assertEquals(snap2.getSnapshotStatus(), SnapshotInfo.SnapshotStatus.SNAPSHOT_DELETED); - future = snapshotDeletingThread.submit(snapshotDeletionRunnable); - future.get(); - Assertions.assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, testBucket.getVolumeName(), - testBucket.getName(), testBucket.getName() + "snap2")); - cluster.restartOzoneManager(); - } - /* Flow ---- diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 4b0f32976455..d7cfc32aa6cb 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -37,7 +37,6 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -103,7 +102,6 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // from parent directory info from deleted directory table concurrently // and send deletion requests. private int ratisByteLimit; - private final AtomicBoolean isRunningOnAOS; private final SnapshotChainManager snapshotChainManager; private final boolean deepCleanSnapshots; private final ExecutorService deletionThreadPool; @@ -127,7 +125,6 @@ public DirectoryDeletingService(long interval, TimeUnit unit, // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); - this.isRunningOnAOS = new AtomicBoolean(false); this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.deepCleanSnapshots = deepCleanSnapshots; this.deletedDirsCount = new AtomicLong(0); @@ -139,14 +136,10 @@ public void setRatisByteLimit(int ratisByteLimit) { this.ratisByteLimit = ratisByteLimit; } - public boolean isRunningOnAOS() { - return isRunningOnAOS.get(); - } - @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); - queue.add(new DirDeletingTask(this, null)); + queue.add(new DirDeletingTask(null)); if (deepCleanSnapshots) { Iterator iterator = null; try { @@ -157,7 +150,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new DirDeletingTask(this, snapshotId)); + queue.add(new DirDeletingTask(snapshotId)); } } return queue; @@ -407,11 +400,9 @@ private OzoneManagerProtocolProtos.OMResponse submitPurgePaths(List= 0, OZONE_KEY_DELETING_LIMIT_PER_TASK + " cannot be negative."); this.deletedKeyCount = new AtomicLong(0); - this.isRunningOnAOS = new AtomicBoolean(false); this.deepCleanSnapshots = deepCleanSnapshots; this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); this.scmClient = scmClient; @@ -118,10 +115,6 @@ public AtomicLong getDeletedKeyCount() { return deletedKeyCount; } - public boolean isRunningOnAOS() { - return isRunningOnAOS.get(); - } - Pair processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { @@ -260,7 +253,7 @@ private Pair submitPurgeKeysRequest(List iterator = null; try { @@ -271,7 +264,7 @@ public BackgroundTaskQueue getTasks() { } while (iterator.hasNext()) { UUID snapshotId = iterator.next(); - queue.add(new KeyDeletingTask(this, snapshotId)); + queue.add(new KeyDeletingTask(snapshotId)); } } return queue; @@ -294,11 +287,9 @@ public void setKeyLimitPerTask(int keyLimitPerTask) { */ @VisibleForTesting final class KeyDeletingTask implements BackgroundTask { - private final KeyDeletingService deletingService; private final UUID snapshotId; - KeyDeletingTask(KeyDeletingService service, UUID snapshotId) { - this.deletingService = service; + KeyDeletingTask(UUID snapshotId) { this.snapshotId = snapshotId; } @@ -431,7 +422,6 @@ public BackgroundTaskResult call() { final long run = getRunCount().incrementAndGet(); if (snapshotId == null) { LOG.debug("Running KeyDeletingService for active object store, {}", run); - isRunningOnAOS.set(true); } else { LOG.debug("Running KeyDeletingService for snapshot : {}, {}", snapshotId, run); } @@ -468,13 +458,6 @@ public BackgroundTaskResult call() { } catch (IOException e) { LOG.error("Error while running delete files background task for store {}. Will retry at next run.", snapInfo, e); - } finally { - if (snapshotId == null) { - isRunningOnAOS.set(false); - synchronized (deletingService) { - this.deletingService.notify(); - } - } } } // By design, no one cares about the results of this call back. diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index 96ae98a19b6b..6d9b3cf787a3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -22,14 +22,17 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.SNAPSHOT_DELETING_LIMIT_PER_TASK_DEFAULT; +import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.FlatResource.SNAPSHOT_GC_LOCK; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -52,6 +55,8 @@ import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; +import org.apache.hadoop.ozone.om.snapshot.MultiSnapshotLocks; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; @@ -86,7 +91,7 @@ public class SnapshotDeletingService extends AbstractKeyDeletingService { private final int keyLimitPerTask; private final int snapshotDeletionPerTask; private final int ratisByteLimit; - private final long serviceTimeout; + private MultiSnapshotLocks snapshotIdLocks; public SnapshotDeletingService(long interval, long serviceTimeout, OzoneManager ozoneManager) @@ -112,32 +117,8 @@ public SnapshotDeletingService(long interval, long serviceTimeout, this.keyLimitPerTask = conf.getInt( OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); - this.serviceTimeout = serviceTimeout; - } - - // Wait for a notification from KeyDeletingService if the key deletion is running. This is to ensure, merging of - // entries do not start while the AOS is still processing the deleted keys. - @VisibleForTesting - public void waitForKeyDeletingService() throws InterruptedException { - KeyDeletingService keyDeletingService = getOzoneManager().getKeyManager().getDeletingService(); - synchronized (keyDeletingService) { - while (keyDeletingService.isRunningOnAOS()) { - keyDeletingService.wait(serviceTimeout); - } - } - } - - // Wait for a notification from DirectoryDeletingService if the directory deletion is running. This is to ensure, - // merging of entries do not start while the AOS is still processing the deleted keys. - @VisibleForTesting - public void waitForDirDeletingService() throws InterruptedException { - DirectoryDeletingService directoryDeletingService = getOzoneManager().getKeyManager() - .getDirDeletingService(); - synchronized (directoryDeletingService) { - while (directoryDeletingService.isRunningOnAOS()) { - directoryDeletingService.wait(serviceTimeout); - } - } + IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); + this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true); } private class SnapshotDeletingTask implements BackgroundTask { @@ -174,11 +155,16 @@ public BackgroundTaskResult call() throws InterruptedException { continue; } + // Acquire write lock on current snapshot and next snapshot in chain. + if (!snapshotIdLocks.acquireLock(Arrays.asList(snapInfo.getSnapshotId(), + Optional.ofNullable(nextSnapshot).map(SnapshotInfo::getSnapshotId).orElse(null))) + .isLockAcquired()) { + continue; + } + // nextSnapshot = null means entries would be moved to AOS. if (nextSnapshot == null) { LOG.info("Snapshot: {} entries will be moved to AOS.", snapInfo.getTableKey()); - waitForKeyDeletingService(); - waitForDirDeletingService(); } else { LOG.info("Snapshot: {} entries will be moved to next active snapshot: {}", snapInfo.getTableKey(), nextSnapshot.getTableKey()); @@ -229,6 +215,8 @@ public BackgroundTaskResult call() throws InterruptedException { } else { snapshotsToBePurged.add(snapInfo.getTableKey()); } + } finally { + snapshotIdLocks.releaseLock(); } successRunCount.incrementAndGet(); snapshotLimit--; diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 3c32a70e047c..b150d46f4bd4 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -347,7 +347,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() keyDeletingService.suspend(); SnapshotDeletingService snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); snapshotDeletingService.suspend(); - GenericTestUtils.waitFor(() -> !keyDeletingService.isRunningOnAOS(), 1000, 10000); + final String volumeName = getTestName(); final String bucketName = uniqueObjectName("bucket"); OzoneManager ozoneManager = Mockito.spy(om); @@ -622,7 +622,7 @@ public void testKeyDeletingServiceWithDeepCleanedSnapshots() throws Exception { when(kds.getTasks()).thenAnswer(i -> { BackgroundTaskQueue queue = new BackgroundTaskQueue(); for (UUID id : snapshotIds) { - queue.add(kds.new KeyDeletingTask(kds, id)); + queue.add(kds.new KeyDeletingTask(id)); } return queue; }); From f4e9f4ad551952d7d39551d02bfcb4482f46687a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Fri, 6 Jun 2025 14:13:20 -0400 Subject: [PATCH 25/35] HDDS-13034. Address review comments Change-Id: I2b38ebc6bfeaba0935d568864605efc53e9eb222 --- .../apache/hadoop/ozone/om/OMConfigKeys.java | 4 ++ .../TestDirectoryDeletingServiceWithFSO.java | 4 +- .../om/service/DirectoryDeletingService.java | 67 +++++++++++++++---- 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 748d5f7d6c95..1a7cafd8ee27 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -387,6 +387,10 @@ public final class OMConfigKeys { */ public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = false; + /** + * DirectoryDeepCleaning snapshots have been moved from SnapshotDirectoryCleaningService to DirectoryDeletingService. + * Configs related to SnapshotDirectoryCleaningService are deprecated as this won't be used anywhere. + */ @Deprecated public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = "ozone.snapshot.directory.service.interval"; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index d7c12d0b81f4..fa1f6b9022e2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -752,13 +752,13 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { fs.delete(root, true); // After delete. 5 sub files are still in keyTable. - // 4 dirs in dirTable. + // 0 dirs in dirTable. assertTableRowCount(keyTable, 5); assertTableRowCount(dirTable, 0); // KeyDeletingService and DirectoryDeletingService will not // clean up because the paths are part of a snapshot. - // As a result on 1 deleted dir and 3 deleted files will + // As a result on 5 deleted dir and 3 deleted files will // remain in dirTable and keyTable respectively. long prevDDSRunCount = dirDeletingService.getRunCount().get(); long prevKDSRunCount = keyDeletingService.getRunCount().get(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 2a4a4b43b9df..7edbe7761175 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -73,21 +73,64 @@ import org.slf4j.LoggerFactory; /** - * This is a background service to delete orphan directories and its - * sub paths(sub-dirs and sub-files). + Background service responsible for purging deleted directories and files + * in the Ozone Manager (OM) and associated snapshots. * *

- * This will scan the metadata of om periodically to get the orphan dirs from - * DeletedDirectoryTable and find its sub paths. It will fetch all sub-files - * from FileTable and move those to DeletedTable so that OM's - * KeyDeletingService will cleanup those files later. It will fetch all - * sub-directories from the DirectoryTable and move those to - * DeletedDirectoryTable so that these will be visited in next iterations. + * This service periodically scans the deleted directory table and submits + * purge requests for directories and their sub-entries (subdirectories and files). + * It operates in both the active object store (AOS) and across all deep-clean enabled + * snapshots. The service supports parallel processing using a thread pool and + * coordinates exclusive size calculations and cleanup status updates for + * snapshots. + *

* - *

- * After moving all sub-files and sub-dirs the parent orphan directory will be - * deleted by this service. It will continue traversing until all the leaf path - * components of an orphan directory is visited. + *

Key Features

+ *
    + *
  • Processes deleted directories in both the active OM and all snapshots + * with deep cleaning enabled.
  • + *
  • Uses a thread pool to parallelize deletion tasks within each store or snapshot.
  • + *
  • Employs filters to determine reclaimability of directories and files, + * ensuring safety with respect to snapshot chains.
  • + *
  • Tracks and updates exclusive size and replicated exclusive size for each + * snapshot as directories and files are reclaimed.
  • + *
  • Updates the "deep cleaned" flag for snapshots after a successful run.
  • + *
  • Handles error and race conditions gracefully, deferring work if necessary.
  • + *
+ * + *

Constructor Parameters

+ *
    + *
  • interval - How often the service runs.
  • + *
  • unit - Time unit for the interval.
  • + *
  • serviceTimeout - Service timeout in the given time unit.
  • + *
  • ozoneManager - The OzoneManager instance.
  • + *
  • configuration - Ozone configuration object.
  • + *
  • dirDeletingServiceCorePoolSize - Number of parallel threads for deletion per store or snapshot.
  • + *
  • deepCleanSnapshots - Whether to enable deep cleaning for snapshots.
  • + *
+ * + *

Threading and Parallelism

+ *
    + *
  • Uses a configurable thread pool for parallel deletion tasks within each store/snapshot.
  • + *
  • Each snapshot and AOS get a separate background task for deletion.
  • + *
+ * + *

Snapshot Integration

+ *
    + *
  • Iterates all snapshots in the chain if deep cleaning is enabled.
  • + *
  • Skips snapshots that are already deep-cleaned or not yet flushed to disk.
  • + *
  • Updates snapshot metadata to reflect size changes and cleaning status.
  • + *
+ * + *

Usage

+ *
    + *
  • Should be scheduled as a background service in OM.
  • + *
  • Intended to be run only on the OM leader node.
  • + *
+ * + * @see org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter + * @see org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter + * @see org.apache.hadoop.ozone.om.SnapshotChainManager */ public class DirectoryDeletingService extends AbstractKeyDeletingService { private static final Logger LOG = From 4d1dda7a3945ae4c06c08412aec8df6da5b3eb1a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sat, 7 Jun 2025 12:53:29 -0400 Subject: [PATCH 26/35] HDDS-13035. Add test case Change-Id: Ie14a52d73a5944a0d558639d43aa5f6ead3c98e9 --- ...napshotDeletingServiceIntegrationTest.java | 148 +++++++++++++++++- .../om/service/SnapshotDeletingService.java | 16 +- 2 files changed, 155 insertions(+), 9 deletions(-) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/{snapshot => service}/TestSnapshotDeletingServiceIntegrationTest.java (79%) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java similarity index 79% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java index fca519b36b32..cc4371bc89ff 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.om.snapshot; +package org.apache.hadoop.ozone.om.service; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; @@ -23,21 +23,34 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; +import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.FlatResource.SNAPSHOT_GC_LOCK; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mockConstruction; +import static org.mockito.Mockito.when; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.utils.BackgroundTaskResult; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; @@ -47,6 +60,8 @@ import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneSnapshot; +import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; @@ -57,17 +72,24 @@ import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; +import org.apache.hadoop.ozone.om.lock.OMLockDetails; +import org.apache.hadoop.ozone.om.snapshot.MultiSnapshotLocks; +import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.MockedConstruction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -583,6 +605,128 @@ private synchronized void createSnapshotDataForBucket(OzoneBucket bucket) throws bucket.getName())); } + private MockedConstruction getMockedReclaimableKeyFilter(String volume, String bucket, + SnapshotInfo snapshotInfo, AtomicBoolean kdsWaitStarted, AtomicBoolean sdsLockWaitStarted, + AtomicBoolean kdsFinished) throws IOException { + ReclaimableKeyFilter keyFilter = new ReclaimableKeyFilter(om, om.getOmSnapshotManager(), + ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(), + snapshotInfo, om.getKeyManager(), om.getMetadataManager().getLock()); + return mockConstruction(ReclaimableKeyFilter.class, + (mocked, context) -> { + when(mocked.apply(any())).thenAnswer(i -> { + Table.KeyValue keyInfo = i.getArgument(0); + if (!keyInfo.getValue().getVolumeName().equals(volume) || + !keyInfo.getValue().getBucketName().equals(bucket)) { + return keyFilter.apply(i.getArgument(0)); + } + keyFilter.apply(i.getArgument(0)); + //Notify SDS that Kds has started for the bucket. + kdsWaitStarted.set(true); + GenericTestUtils.waitFor(sdsLockWaitStarted::get, 1000, 10000); + // Wait for 1 more second so that the command moves to lock wait. + Thread.sleep(1000); + return keyFilter.apply(i.getArgument(0)); + }); + doAnswer(i -> { + kdsFinished.set(true); + keyFilter.close(); + return null; + }).when(mocked).close(); + when(mocked.getExclusiveReplicatedSizeMap()).thenAnswer(i -> keyFilter.getExclusiveReplicatedSizeMap()); + when(mocked.getExclusiveSizeMap()).thenAnswer(i -> keyFilter.getExclusiveSizeMap()); + }); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + @DisplayName("Tests Snapshot Deleting Service while KeyDeletingService is already running.") + public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRunningOnAOS) throws Exception { + // Suspend the services first + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); + om.getKeyManager().getSnapshotDeletingService().suspend(); + + String volume = "vol" + RandomStringUtils.secure().nextNumeric(3), + bucket = "bucket" + RandomStringUtils.secure().nextNumeric(3); + client.getObjectStore().createVolume(volume); + OzoneVolume ozoneVolume = client.getObjectStore().getVolume(volume); + ozoneVolume.createBucket(bucket); + OzoneBucket ozoneBucket = ozoneVolume.getBucket(bucket); + + // Create snap1 + client.getObjectStore().createSnapshot(volume, bucket, "snap1"); + OzoneSnapshot snap1 = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap1"); + // Create snap2 + TestDataUtil.createKey(ozoneBucket, "key", CONTENT.array()); + client.getObjectStore().createSnapshot(volume, bucket, "snap2"); + UUID snap2Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap2").getSnapshotId(); + + ozoneBucket.renameKey("key", "renamedKey"); + ozoneBucket.deleteKey("renamedKey"); + UUID snap3Id; + // Create snap3 to test snapshot 3 deep cleaning otherwise just run on AOS. + if (kdsRunningOnAOS) { + snap3Id = null; + } else { + client.getObjectStore().createSnapshot(volume, bucket, "snap3"); + snap3Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap3").getSnapshotId(); + om.awaitDoubleBufferFlush(); + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap3"); + snap.setDeepCleanedDeletedDir(true); + om.getMetadataManager().getSnapshotInfoTable().put(snap.getTableKey(), snap); + assertTrue(om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap3") + .isDeepCleanedDeletedDir()); + } + + + MultiSnapshotLocks sdsMultiLocks = new MultiSnapshotLocks(cluster.getOzoneManager().getMetadataManager().getLock(), + SNAPSHOT_GC_LOCK, true); + AtomicBoolean kdsWaitStarted = new AtomicBoolean(false); + AtomicBoolean kdsFinished = new AtomicBoolean(false); + AtomicBoolean sdsLockWaitStarted = new AtomicBoolean(false); + AtomicBoolean sdsLockAcquired = new AtomicBoolean(true); + + try (MockedConstruction mockedMultiSnapshotLock = mockConstruction(MultiSnapshotLocks.class, + (mocked, context) -> when(mocked.acquireLock(anyList())).thenAnswer(i -> { + List ids = i.getArgument(0); + List expectedIds = Arrays.asList(snap2Id, snap3Id); + if (expectedIds.equals(ids)) { + sdsLockWaitStarted.set(true); + OMLockDetails lockDetails = sdsMultiLocks.acquireLock(ids); + assertTrue(kdsFinished::get); + sdsLockAcquired.set(true); + return lockDetails; + } + return sdsMultiLocks.acquireLock(ids); + }))) { + KeyDeletingService kds = new KeyDeletingService(om, om.getScmClient().getBlockClient(), 500, 10000, + om.getConfiguration(), 1, true); + kds.shutdown(); + KeyDeletingService.KeyDeletingTask task = kds.new KeyDeletingTask(snap3Id); + SnapshotInfo snapInfo = kdsRunningOnAOS ? null : SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap3"); + CompletableFuture future = CompletableFuture.supplyAsync(() -> { + try (MockedConstruction mockedReclaimableFilter = getMockedReclaimableKeyFilter( + volume, bucket, snapInfo, kdsWaitStarted, sdsLockWaitStarted, kdsFinished)) { + return task.call(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + SnapshotDeletingService sds = new SnapshotDeletingService(500, 10000, om); + sds.shutdown(); + GenericTestUtils.waitFor(kdsWaitStarted::get, 1000, 10000); + client.getObjectStore().deleteSnapshot(volume, bucket, "snap2"); + sds.runPeriodicalTaskNow(); + assertTrue(sdsLockAcquired.get()); + assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap2")); + } + // Resume services + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + om.getKeyManager().getSnapshotDeletingService().resume(); + } + /* Flow ---- diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index 6d9b3cf787a3..6626c4eeeba4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -28,11 +28,9 @@ import com.google.protobuf.ServiceException; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.Optional; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -91,7 +89,8 @@ public class SnapshotDeletingService extends AbstractKeyDeletingService { private final int keyLimitPerTask; private final int snapshotDeletionPerTask; private final int ratisByteLimit; - private MultiSnapshotLocks snapshotIdLocks; + private final MultiSnapshotLocks snapshotIdLocks; + private final List lockIds; public SnapshotDeletingService(long interval, long serviceTimeout, OzoneManager ozoneManager) @@ -119,6 +118,7 @@ public SnapshotDeletingService(long interval, long serviceTimeout, OZONE_SNAPSHOT_KEY_DELETING_LIMIT_PER_TASK_DEFAULT); IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); this.snapshotIdLocks = new MultiSnapshotLocks(lock, SNAPSHOT_GC_LOCK, true); + this.lockIds = new ArrayList<>(2); } private class SnapshotDeletingTask implements BackgroundTask { @@ -154,11 +154,13 @@ public BackgroundTaskResult call() throws InterruptedException { snapInfo.getTableKey()); continue; } - + lockIds.clear(); + lockIds.add(snapInfo.getSnapshotId()); + if (nextSnapshot != null) { + lockIds.add(nextSnapshot.getSnapshotId()); + } // Acquire write lock on current snapshot and next snapshot in chain. - if (!snapshotIdLocks.acquireLock(Arrays.asList(snapInfo.getSnapshotId(), - Optional.ofNullable(nextSnapshot).map(SnapshotInfo::getSnapshotId).orElse(null))) - .isLockAcquired()) { + if (!snapshotIdLocks.acquireLock(lockIds).isLockAcquired()) { continue; } From 7c471060935e469387097f5496ba119db258da39 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Sat, 7 Jun 2025 22:25:04 -0400 Subject: [PATCH 27/35] HDDS-13034. Address review comments Change-Id: I76b3d086daf4c2b90af2ef5df0e53542e164c52b --- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- .../apache/hadoop/ozone/om/service/KeyDeletingService.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 5086282b8db3..1544492cd72b 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3788,7 +3788,7 @@ 300s OZONE, PERFORMANCE, OM, DEPRECATED - Timeout value for SnapshotDirectoryCleaningService. + DEPRECATED. Timeout value for SnapshotDirectoryCleaningService.
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index 60b2ab55efd7..40205c4aa776 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -122,7 +122,7 @@ public BackgroundTaskQueue getTasks() { try { iterator = snapshotChainManager.iterator(true); } catch (IOException e) { - LOG.error("Error while initializing snapshot chain iterator."); + LOG.error("Error while initializing snapshot chain iterator. DirDeletingTask will only process AOS this run."); return queue; } while (iterator.hasNext()) { @@ -323,8 +323,8 @@ public BackgroundTaskResult call() { SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); if (snapInfo != null) { if (snapInfo.isDeepCleaned()) { - LOG.info("Snapshot {} has already been deep cleaned. Skipping the snapshot in this iteration. " + - "Snapshot name : {}", snapInfo.getSnapshotId(), snapInfo.getName()); + LOG.info("Snapshot '{}' ({}) has already been deep cleaned. Skipping the snapshot in this iteration. ", + snapInfo.getTableKey(), snapInfo.getSnapshotId()); return EmptyTaskResult.newResult(); } if (!OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), snapInfo)) { From 428e46d78ad4fcf2d0876b0795f5781484b4f2c8 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 00:59:13 -0400 Subject: [PATCH 28/35] HDDS-13036. Address review comments Change-Id: I218dfbf383800d2097df47141ee909fc45f8cfa7 --- .../org/apache/hadoop/ozone/om/service/KeyDeletingService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index 40205c4aa776..5e34c1ff741a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -323,7 +323,7 @@ public BackgroundTaskResult call() { SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); if (snapInfo != null) { if (snapInfo.isDeepCleaned()) { - LOG.info("Snapshot '{}' ({}) has already been deep cleaned. Skipping the snapshot in this iteration. ", + LOG.info("Snapshot '{}' ({}) has already been deep cleaned. Skipping the snapshot in this iteration.", snapInfo.getTableKey(), snapInfo.getSnapshotId()); return EmptyTaskResult.newResult(); } From 90c874688403ff61138e4c9c7b2e9e617e9a6ec5 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 01:11:56 -0400 Subject: [PATCH 29/35] HDDS-13160. Fix checkstyle Change-Id: I2ebcf278aaa610c84ab0da329c2837e7ef97dfa1 --- .../hadoop/ozone/om/service/DirectoryDeletingService.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 44a164608903..7e9e39ab80c6 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -17,11 +17,10 @@ package org.apache.hadoop.ozone.om.service; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; - import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; import com.google.protobuf.ServiceException; From 50d783c0d0b24b12e3ac142c03f133f66d683952 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 01:14:35 -0400 Subject: [PATCH 30/35] HDDS-13160. Remove unused Change-Id: Id45068e45efa8d121dcb179cb1e286f4076b8979 --- .../hadoop/ozone/om/service/DirectoryDeletingService.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 7e9e39ab80c6..f96099323d54 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -200,10 +200,6 @@ private synchronized void updateAndRestart(OzoneConfiguration conf) { start(); } - public void setRatisByteLimit(int ratisByteLimit) { - this.ratisByteLimit = ratisByteLimit; - } - public boolean isRunningOnAOS() { return isRunningOnAOS.get(); } From c1808b71f166d08e0a325fdef053af5ac08c4cea Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 01:27:37 -0400 Subject: [PATCH 31/35] HDDS-13035. Fix findbugs Change-Id: I1abad0f11b09d8aff0bf5ab0e7619efe29b6d0fc --- ...napshotDeletingServiceIntegrationTest.java | 74 +------------------ 1 file changed, 2 insertions(+), 72 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java index cc4371bc89ff..77c06fb0aefd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java @@ -655,7 +655,7 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun // Create snap1 client.getObjectStore().createSnapshot(volume, bucket, "snap1"); - OzoneSnapshot snap1 = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap1"); + client.getObjectStore().getSnapshotInfo(volume, bucket, "snap1"); // Create snap2 TestDataUtil.createKey(ozoneBucket, "key", CONTENT.array()); client.getObjectStore().createSnapshot(volume, bucket, "snap2"); @@ -704,7 +704,7 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun kds.shutdown(); KeyDeletingService.KeyDeletingTask task = kds.new KeyDeletingTask(snap3Id); SnapshotInfo snapInfo = kdsRunningOnAOS ? null : SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap3"); - CompletableFuture future = CompletableFuture.supplyAsync(() -> { + CompletableFuture.supplyAsync(() -> { try (MockedConstruction mockedReclaimableFilter = getMockedReclaimableKeyFilter( volume, bucket, snapInfo, kdsWaitStarted, sdsLockWaitStarted, kdsFinished)) { return task.call(); @@ -727,76 +727,6 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun om.getKeyManager().getSnapshotDeletingService().resume(); } - /* - Flow - ---- - create dir0/key0 - create dir1/key1 - overwrite dir0/key0 - create dir2/key2 - create snap1 - rename dir1/key1 -> dir1/key10 - delete dir1/key10 - delete dir2 - create snap2 - delete snap2 - */ - private synchronized void createSnapshotFSODataForBucket(OzoneBucket bucket) throws Exception { - Table snapshotInfoTable = - om.getMetadataManager().getSnapshotInfoTable(); - Table deletedTable = - om.getMetadataManager().getDeletedTable(); - Table deletedDirTable = - om.getMetadataManager().getDeletedDirTable(); - Table keyTable = - om.getMetadataManager().getKeyTable(BucketLayout.FILE_SYSTEM_OPTIMIZED); - Table dirTable = - om.getMetadataManager().getDirectoryTable(); - Table renameTable = om.getMetadataManager().getSnapshotRenamedTable(); - OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) - om.getMetadataManager(); - Map countMap = - metadataManager.listTables().entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, e -> { - try { - return (int)metadataManager.countRowsInTable(e.getValue()); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - })); - TestDataUtil.createKey(bucket, "dir0/" + bucket.getName() + "key0", CONTENT.array()); - TestDataUtil.createKey(bucket, "dir1/" + bucket.getName() + "key1", CONTENT.array()); - assertTableRowCount(keyTable, countMap.get(keyTable.getName()) + 2); - assertTableRowCount(dirTable, countMap.get(dirTable.getName()) + 2); - - // Overwrite bucket1key0, This is a newer version of the key which should - // reclaimed as this is a different version of the key. - TestDataUtil.createKey(bucket, "dir0/" + bucket.getName() + "key0", CONTENT.array()); - TestDataUtil.createKey(bucket, "dir2/" + bucket.getName() + "key2", CONTENT.array()); - assertTableRowCount(keyTable, countMap.get(keyTable.getName()) + 3); - assertTableRowCount(dirTable, countMap.get(dirTable.getName()) + 3); - assertTableRowCount(deletedTable, countMap.get(deletedTable.getName()) + 1); - // create snap1 - client.getProxy().createSnapshot(bucket.getVolumeName(), bucket.getName(), - bucket.getName() + "snap1"); - bucket.renameKey("dir1/" + bucket.getName() + "key1", "dir1/" + bucket.getName() + "key10"); - bucket.renameKey("dir1/", "dir10/"); - assertTableRowCount(renameTable, countMap.get(renameTable.getName()) + 2); - client.getProxy().deleteKey(bucket.getVolumeName(), bucket.getName(), - "dir10/" + bucket.getName() + "key10", false); - assertTableRowCount(deletedTable, countMap.get(deletedTable.getName()) + 1); - // Key 2 is deleted here, which will be reclaimed here as - // it is not being referenced by previous snapshot. - client.getProxy().deleteKey(bucket.getVolumeName(), bucket.getName(), "dir2", true); - assertTableRowCount(deletedDirTable, countMap.get(deletedDirTable.getName()) + 1); - client.getProxy().createSnapshot(bucket.getVolumeName(), bucket.getName(), - bucket.getName() + "snap2"); - // Delete Snapshot 2. - client.getProxy().deleteSnapshot(bucket.getVolumeName(), bucket.getName(), - bucket.getName() + "snap2"); - assertTableRowCount(snapshotInfoTable, countMap.get(snapshotInfoTable.getName()) + 2); - } - private void verifySnapshotChain(SnapshotInfo deletedSnapshot, String nextSnapshot) throws Exception { From 6bfa32239b85bfd668c085e82ef15ff85d8a6e61 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 01:54:05 -0400 Subject: [PATCH 32/35] HDDS-13035. Fix checkstyle Change-Id: Ibcc5f5d3893b532894133298f0c41f967cbdf287 --- .../service/TestSnapshotDeletingServiceIntegrationTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java index 77c06fb0aefd..016347ddebb1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java @@ -38,7 +38,6 @@ import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.UUID; import java.util.concurrent.CompletableFuture; @@ -46,11 +45,9 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; -import org.apache.hadoop.hdds.utils.BackgroundTaskResult; import org.apache.hadoop.hdds.utils.IOUtils; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; @@ -60,7 +57,6 @@ import org.apache.hadoop.ozone.client.BucketArgs; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.client.OzoneSnapshot; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; From 6a9721f5f08e910cb57896aac2ad01d87139e8f4 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 9 Jun 2025 03:57:17 -0400 Subject: [PATCH 33/35] HDDS-13160. Fix checkstyle Change-Id: If3b294f9501cd8fb2c0a728e03013dcd911edc58 --- .../TestDirectoryDeletingServiceWithFSO.java | 4 ---- .../om/service/AbstractKeyDeletingService.java | 17 ----------------- .../om/service/DirectoryDeletingService.java | 6 ------ 3 files changed, 27 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java index 03a1de84f4cd..1933925384fc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestDirectoryDeletingServiceWithFSO.java @@ -79,10 +79,6 @@ import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; -import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; -import org.apache.hadoop.ozone.om.service.KeyDeletingService; -import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; -import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 5af1711de58f..3ee50ffd04f4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -21,19 +21,9 @@ import com.google.protobuf.ServiceException; import java.util.List; import java.util.concurrent.ExecutionException; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.HddsUtils; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.DeletingServiceMetrics; @@ -43,14 +33,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgeKeysRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; -import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; -import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.util.function.CheckedFunction; /** * Abstracts common code from KeyDeletingService and DirectoryDeletingService diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index d3d5cad316d4..f96099323d54 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -23,8 +23,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; -import java.io.Closeable; -import com.google.common.collect.Maps; import com.google.protobuf.ServiceException; import java.io.Closeable; import java.io.IOException; @@ -42,9 +40,6 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.commons.lang3.StringUtils; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -61,7 +56,6 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; -import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.DeleteKeysResult; From 954763d309cb0f09feb228c3c5cc2b64462ff4d3 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 12 Jun 2025 20:30:49 -0400 Subject: [PATCH 34/35] HDDS-13035. Address review comments Change-Id: If6942f6e88d96103ecdfd43f54d1502b8429f0ff --- .../TestSnapshotDeletingServiceIntegrationTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java index 33d4e96bd5ed..a73bbbbba59c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java @@ -26,6 +26,7 @@ import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.FlatResource.SNAPSHOT_GC_LOCK; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; @@ -621,7 +622,7 @@ private synchronized void createSnapshotDataForBucket(OzoneBucket bucket) throws private MockedConstruction getMockedReclaimableKeyFilter(String volume, String bucket, SnapshotInfo snapshotInfo, AtomicBoolean kdsWaitStarted, AtomicBoolean sdsLockWaitStarted, - AtomicBoolean kdsFinished) throws IOException { + AtomicBoolean sdsLockAcquired, AtomicBoolean kdsFinished) throws IOException { ReclaimableKeyFilter keyFilter = new ReclaimableKeyFilter(om, om.getOmSnapshotManager(), ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(), snapshotInfo, om.getKeyManager(), om.getMetadataManager().getLock()); @@ -642,6 +643,8 @@ private MockedConstruction getMockedReclaimableKeyFilter(S return keyFilter.apply(i.getArgument(0)); }); doAnswer(i -> { + assertTrue(sdsLockWaitStarted.get()); + assertFalse(sdsLockAcquired.get()); kdsFinished.set(true); keyFilter.close(); return null; @@ -698,7 +701,7 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun AtomicBoolean kdsWaitStarted = new AtomicBoolean(false); AtomicBoolean kdsFinished = new AtomicBoolean(false); AtomicBoolean sdsLockWaitStarted = new AtomicBoolean(false); - AtomicBoolean sdsLockAcquired = new AtomicBoolean(true); + AtomicBoolean sdsLockAcquired = new AtomicBoolean(false); try (MockedConstruction mockedMultiSnapshotLock = mockConstruction(MultiSnapshotLocks.class, (mocked, context) -> when(mocked.acquireLock(anyList())).thenAnswer(i -> { @@ -720,7 +723,7 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun SnapshotInfo snapInfo = kdsRunningOnAOS ? null : SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap3"); CompletableFuture.supplyAsync(() -> { try (MockedConstruction mockedReclaimableFilter = getMockedReclaimableKeyFilter( - volume, bucket, snapInfo, kdsWaitStarted, sdsLockWaitStarted, kdsFinished)) { + volume, bucket, snapInfo, kdsWaitStarted, sdsLockWaitStarted, sdsLockAcquired, kdsFinished)) { return task.call(); } catch (IOException e) { throw new RuntimeException(e); From c9d89bbbddc9020f525637d72ac4f39b8f365cd8 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 12 Jun 2025 22:56:34 -0400 Subject: [PATCH 35/35] HDDS-13035. Address review comments Change-Id: I586687c65af1a19b10a32738ac8dceef6066eddf --- ...napshotDeletingServiceIntegrationTest.java | 126 ++++++++++++------ .../om/service/SnapshotDeletingService.java | 15 +-- 2 files changed, 91 insertions(+), 50 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java index a73bbbbba59c..080844d1f5f7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestSnapshotDeletingServiceIntegrationTest.java @@ -37,9 +37,8 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Arrays; import java.util.ArrayDeque; -import java.util.Collections; +import java.util.Arrays; import java.util.Deque; import java.util.Iterator; import java.util.List; @@ -50,6 +49,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -68,6 +68,7 @@ import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -82,7 +83,6 @@ import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; @@ -91,7 +91,7 @@ import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestMethodOrder; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.CsvSource; import org.mockito.MockedConstruction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -162,6 +162,10 @@ public void closeAllSnapshots() { while (!rcSnaps.isEmpty()) { rcSnaps.pop().close(); } + // Resume services + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + om.getKeyManager().getSnapshotDeletingService().resume(); } private UncheckedAutoCloseableSupplier getOmSnapshot(String volume, String bucket, String snapshotName) @@ -621,11 +625,9 @@ private synchronized void createSnapshotDataForBucket(OzoneBucket bucket) throws } private MockedConstruction getMockedReclaimableKeyFilter(String volume, String bucket, - SnapshotInfo snapshotInfo, AtomicBoolean kdsWaitStarted, AtomicBoolean sdsLockWaitStarted, - AtomicBoolean sdsLockAcquired, AtomicBoolean kdsFinished) throws IOException { - ReclaimableKeyFilter keyFilter = new ReclaimableKeyFilter(om, om.getOmSnapshotManager(), - ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(), - snapshotInfo, om.getKeyManager(), om.getMetadataManager().getLock()); + AtomicBoolean kdsWaitStarted, AtomicBoolean sdsLockWaitStarted, + AtomicBoolean sdsLockAcquired, AtomicBoolean kdsFinished, ReclaimableKeyFilter keyFilter) throws IOException { + return mockConstruction(ReclaimableKeyFilter.class, (mocked, context) -> { when(mocked.apply(any())).thenAnswer(i -> { @@ -655,14 +657,30 @@ private MockedConstruction getMockedReclaimableKeyFilter(S } @ParameterizedTest - @ValueSource(booleans = {true, false}) + @CsvSource({"true, 0", "true, 1", "false, 0", "false, 1", "false, 2"}) @DisplayName("Tests Snapshot Deleting Service while KeyDeletingService is already running.") - public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRunningOnAOS) throws Exception { + @Order(4) + public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRunningOnAOS, + int snasphotDeleteIndex) throws Exception { + SnapshotChainManager snapshotChainManager = + ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(); + GenericTestUtils.waitFor(() -> { + try { + Iterator itr = snapshotChainManager.iterator(false); + while (itr.hasNext()) { + SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(om, snapshotChainManager, itr.next()); + assertEquals(SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE, snapshotInfo.getSnapshotStatus()); + } + return true; + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 30000); + om.awaitDoubleBufferFlush(); // Suspend the services first om.getKeyManager().getDirDeletingService().suspend(); om.getKeyManager().getDeletingService().suspend(); om.getKeyManager().getSnapshotDeletingService().suspend(); - String volume = "vol" + RandomStringUtils.secure().nextNumeric(3), bucket = "bucket" + RandomStringUtils.secure().nextNumeric(3); client.getObjectStore().createVolume(volume); @@ -670,29 +688,44 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun ozoneVolume.createBucket(bucket); OzoneBucket ozoneBucket = ozoneVolume.getBucket(bucket); + // Create snap0 + client.getObjectStore().createSnapshot(volume, bucket, "snap0"); + client.getObjectStore().getSnapshotInfo(volume, bucket, "snap0"); + UUID snap1Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap0").getSnapshotId(); + // Create snap1 - client.getObjectStore().createSnapshot(volume, bucket, "snap1"); - client.getObjectStore().getSnapshotInfo(volume, bucket, "snap1"); - // Create snap2 TestDataUtil.createKey(ozoneBucket, "key", CONTENT.array()); - client.getObjectStore().createSnapshot(volume, bucket, "snap2"); - UUID snap2Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap2").getSnapshotId(); + client.getObjectStore().createSnapshot(volume, bucket, "snap1"); + UUID snap2Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap1").getSnapshotId(); ozoneBucket.renameKey("key", "renamedKey"); ozoneBucket.deleteKey("renamedKey"); + om.awaitDoubleBufferFlush(); UUID snap3Id; + ReclaimableKeyFilter keyFilter; + SnapshotInfo snapInfo; // Create snap3 to test snapshot 3 deep cleaning otherwise just run on AOS. if (kdsRunningOnAOS) { snap3Id = null; + snapInfo = null; + keyFilter = new ReclaimableKeyFilter(om, om.getOmSnapshotManager(), + ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(), + snapInfo, om.getKeyManager(), om.getMetadataManager().getLock()); } else { - client.getObjectStore().createSnapshot(volume, bucket, "snap3"); - snap3Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap3").getSnapshotId(); + + client.getObjectStore().createSnapshot(volume, bucket, "snap2"); + snap3Id = client.getObjectStore().getSnapshotInfo(volume, bucket, "snap2").getSnapshotId(); om.awaitDoubleBufferFlush(); - SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap3"); + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap2"); snap.setDeepCleanedDeletedDir(true); om.getMetadataManager().getSnapshotInfoTable().put(snap.getTableKey(), snap); - assertTrue(om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap3") + assertTrue(om.getMetadataManager().getSnapshotInfo(volume, bucket, "snap2") .isDeepCleanedDeletedDir()); + snapInfo = SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap2"); + keyFilter = new ReclaimableKeyFilter(om, om.getOmSnapshotManager(), + ((OmMetadataManagerImpl)om.getMetadataManager()).getSnapshotChainManager(), + snapInfo, getOmSnapshot(volume, bucket, "snap2").get().getKeyManager(), + om.getMetadataManager().getLock()); } @@ -704,26 +737,33 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun AtomicBoolean sdsLockAcquired = new AtomicBoolean(false); try (MockedConstruction mockedMultiSnapshotLock = mockConstruction(MultiSnapshotLocks.class, - (mocked, context) -> when(mocked.acquireLock(anyList())).thenAnswer(i -> { - List ids = i.getArgument(0); - List expectedIds = Arrays.asList(snap2Id, snap3Id); - if (expectedIds.equals(ids)) { - sdsLockWaitStarted.set(true); - OMLockDetails lockDetails = sdsMultiLocks.acquireLock(ids); - assertTrue(kdsFinished::get); - sdsLockAcquired.set(true); - return lockDetails; - } - return sdsMultiLocks.acquireLock(ids); - }))) { + (mocked, context) -> { + when(mocked.acquireLock(anyList())).thenAnswer(i -> { + List ids = i.getArgument(0); + List expectedIds = Arrays.asList(snap1Id, snap2Id, snap3Id).subList(snasphotDeleteIndex, Math.min(3, + snasphotDeleteIndex + 2)).stream().filter(Objects::nonNull).collect(Collectors.toList()); + if (expectedIds.equals(ids) && !sdsLockWaitStarted.get() && !sdsLockAcquired.get()) { + sdsLockWaitStarted.set(true); + OMLockDetails lockDetails = sdsMultiLocks.acquireLock(ids); + assertTrue(kdsFinished::get); + sdsLockAcquired.set(true); + return lockDetails; + } + return sdsMultiLocks.acquireLock(ids); + }); + doAnswer(i -> { + sdsMultiLocks.releaseLock(); + return null; + }).when(mocked).releaseLock(); + })) { KeyDeletingService kds = new KeyDeletingService(om, om.getScmClient().getBlockClient(), 500, 10000, om.getConfiguration(), 1, true); kds.shutdown(); KeyDeletingService.KeyDeletingTask task = kds.new KeyDeletingTask(snap3Id); - SnapshotInfo snapInfo = kdsRunningOnAOS ? null : SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap3"); + CompletableFuture.supplyAsync(() -> { try (MockedConstruction mockedReclaimableFilter = getMockedReclaimableKeyFilter( - volume, bucket, snapInfo, kdsWaitStarted, sdsLockWaitStarted, sdsLockAcquired, kdsFinished)) { + volume, bucket, kdsWaitStarted, sdsLockWaitStarted, sdsLockAcquired, kdsFinished, keyFilter)) { return task.call(); } catch (IOException e) { throw new RuntimeException(e); @@ -732,16 +772,18 @@ public void testSnapshotDeletingServiceWaitsForKeyDeletingService(boolean kdsRun SnapshotDeletingService sds = new SnapshotDeletingService(500, 10000, om); sds.shutdown(); - GenericTestUtils.waitFor(kdsWaitStarted::get, 1000, 10000); - client.getObjectStore().deleteSnapshot(volume, bucket, "snap2"); + GenericTestUtils.waitFor(kdsWaitStarted::get, 1000, 30000); + client.getObjectStore().deleteSnapshot(volume, bucket, "snap" + snasphotDeleteIndex); sds.runPeriodicalTaskNow(); + om.awaitDoubleBufferFlush(); + if (snasphotDeleteIndex == 2) { + sds.runPeriodicalTaskNow(); + } + assertTrue(sdsLockWaitStarted.get()); assertTrue(sdsLockAcquired.get()); - assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, volume, bucket, "snap2")); + assertThrows(IOException.class, () -> SnapshotUtils.getSnapshotInfo(om, volume, bucket, + "snap" + snasphotDeleteIndex)); } - // Resume services - om.getKeyManager().getDirDeletingService().resume(); - om.getKeyManager().getDeletingService().resume(); - om.getKeyManager().getSnapshotDeletingService().resume(); } private void verifySnapshotChain(SnapshotInfo deletedSnapshot, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index 6626c4eeeba4..75e9a20cdf12 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -154,6 +154,13 @@ public BackgroundTaskResult call() throws InterruptedException { snapInfo.getTableKey()); continue; } + // nextSnapshot = null means entries would be moved to AOS. + if (nextSnapshot == null) { + LOG.info("Snapshot: {} entries will be moved to AOS.", snapInfo.getTableKey()); + } else { + LOG.info("Snapshot: {} entries will be moved to next active snapshot: {}", + snapInfo.getTableKey(), nextSnapshot.getTableKey()); + } lockIds.clear(); lockIds.add(snapInfo.getSnapshotId()); if (nextSnapshot != null) { @@ -163,14 +170,6 @@ public BackgroundTaskResult call() throws InterruptedException { if (!snapshotIdLocks.acquireLock(lockIds).isLockAcquired()) { continue; } - - // nextSnapshot = null means entries would be moved to AOS. - if (nextSnapshot == null) { - LOG.info("Snapshot: {} entries will be moved to AOS.", snapInfo.getTableKey()); - } else { - LOG.info("Snapshot: {} entries will be moved to next active snapshot: {}", - snapInfo.getTableKey(), nextSnapshot.getTableKey()); - } try (UncheckedAutoCloseableSupplier snapshot = omSnapshotManager.getSnapshot( snapInfo.getVolumeName(), snapInfo.getBucketName(), snapInfo.getName())) { KeyManager snapshotKeyManager = snapshot.get().getKeyManager();