diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index bda52dcf9b42..1544492cd72b 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3786,18 +3786,18 @@ ozone.snapshot.directory.service.timeout 300s - OZONE, PERFORMANCE, OM + OZONE, PERFORMANCE, OM, DEPRECATED - Timeout value for SnapshotDirectoryCleaningService. + DEPRECATED. Timeout value for SnapshotDirectoryCleaningService. ozone.snapshot.directory.service.interval 24h - OZONE, PERFORMANCE, OM + OZONE, PERFORMANCE, OM, DEPRECATED - The time interval between successive SnapshotDirectoryCleaningService + DEPRECATED. The time interval between successive SnapshotDirectoryCleaningService thread run. diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 12a809043761..1a7cafd8ee27 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -387,12 +387,20 @@ public final class OMConfigKeys { */ public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = false; + /** + * DirectoryDeepCleaning snapshots have been moved from SnapshotDirectoryCleaningService to DirectoryDeletingService. + * Configs related to SnapshotDirectoryCleaningService are deprecated as this won't be used anywhere. + */ + @Deprecated public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = "ozone.snapshot.directory.service.interval"; + @Deprecated public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT = "24h"; + @Deprecated public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT = "ozone.snapshot.directory.service.timeout"; + @Deprecated public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT = "300s"; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index 4c6a21f1cbb5..fa1f6b9022e2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -79,6 +79,8 @@ import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.hadoop.ozone.om.service.KeyDeletingService; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; @@ -275,11 +277,9 @@ public void testDeleteWithLargeSubPathsThanBatchSize() throws Exception { assertEquals(18, metrics.getNumSubDirsMovedToDeletedDirTable()); assertEquals(18, metrics.getNumSubDirsSentForPurge()); - - long elapsedRunCount = dirDeletingService.getRunCount().get() - preRunCount; assertThat(dirDeletingService.getRunCount().get()).isGreaterThan(1); // Ensure dir deleting speed, here provide a backup value for safe CI - assertThat(elapsedRunCount).isGreaterThanOrEqualTo(7); + GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() - preRunCount >= 7, 1000, 100000); } @Test @@ -592,8 +592,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() OmSnapshotManager omSnapshotManager = Mockito.spy(ozoneManager.getOmSnapshotManager()); when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> omSnapshotManager); DirectoryDeletingService service = Mockito.spy(new DirectoryDeletingService(1000, TimeUnit.MILLISECONDS, 1000, - ozoneManager, - cluster.getConf(), 1)); + ozoneManager, cluster.getConf(), 1, false)); service.shutdown(); final int initialSnapshotCount = (int) cluster.getOzoneManager().getMetadataManager().countRowsInTable(snapshotInfoTable); @@ -627,7 +626,8 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() } return null; }).when(service).optimizeDirDeletesAndSubmitRequest(anyLong(), anyLong(), - anyLong(), anyList(), anyList(), eq(null), anyLong(), anyLong(), Mockito.any(), any(), + anyLong(), anyList(), anyList(), eq(null), anyLong(), anyLong(), any(), + any(ReclaimableDirFilter.class), any(ReclaimableKeyFilter.class), any(), anyLong()); Mockito.doAnswer(i -> { @@ -651,8 +651,8 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() } }, 1000, 10000); return i.callRealMethod(); - }).when(omSnapshotManager).getSnapshot(ArgumentMatchers.eq(testVolumeName), ArgumentMatchers.eq(testBucketName), - ArgumentMatchers.eq(snap1)); + }).when(omSnapshotManager).getActiveSnapshot(ArgumentMatchers.eq(testVolumeName), + ArgumentMatchers.eq(testBucketName), ArgumentMatchers.eq(snap1)); assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 1); service.runPeriodicalTaskNow(); service.runPeriodicalTaskNow(); @@ -729,7 +729,6 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { DirectoryDeletingService dirDeletingService = (DirectoryDeletingService) cluster.getOzoneManager().getKeyManager() .getDirDeletingService(); - // After delete. 5 more files left out under the root dir assertTableRowCount(keyTable, 5); assertTableRowCount(dirTable, 5); @@ -749,22 +748,21 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { assertSubPathsCount(dirDeletingService::getMovedFilesCount, 0); assertSubPathsCount(dirDeletingService::getMovedDirsCount, 0); assertSubPathsCount(dirDeletingService::getDeletedDirsCount, 0); - // Case-2) Delete dir fs.delete(root, true); // After delete. 5 sub files are still in keyTable. - // 4 dirs in dirTable. + // 0 dirs in dirTable. assertTableRowCount(keyTable, 5); - assertTableRowCount(dirTable, 4); + assertTableRowCount(dirTable, 0); // KeyDeletingService and DirectoryDeletingService will not // clean up because the paths are part of a snapshot. - // As a result on 1 deleted dir and 3 deleted files will + // As a result on 5 deleted dir and 3 deleted files will // remain in dirTable and keyTable respectively. long prevDDSRunCount = dirDeletingService.getRunCount().get(); long prevKDSRunCount = keyDeletingService.getRunCount().get(); - assertTableRowCount(deletedDirTable, 1); + assertTableRowCount(deletedDirTable, 5); assertTableRowCount(deletedKeyTable, 3); GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() > prevDDSRunCount, 100, 10000); @@ -772,7 +770,7 @@ public void testDirDeletedTableCleanUpForSnapshot() throws Exception { prevKDSRunCount, 100, 10000); assertSubPathsCount(dirDeletingService::getMovedFilesCount, 0); - assertSubPathsCount(dirDeletingService::getMovedDirsCount, 0); + assertSubPathsCount(dirDeletingService::getMovedDirsCount, 4); assertSubPathsCount(dirDeletingService::getDeletedDirsCount, 0); // Manual cleanup deletedDirTable for next tests diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 3c7b35dd23ed..805f3f474747 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -127,8 +127,7 @@ public void setup() throws Exception { 500, TimeUnit.MILLISECONDS); conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT, - 10000, TimeUnit.MILLISECONDS); - conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, 500); + 500, TimeUnit.MILLISECONDS); conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 500); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 500, TimeUnit.MILLISECONDS); @@ -251,14 +250,18 @@ public void testSnapshotWithFSO() throws Exception { om.getMetadataManager().getDeletedDirTable(); Table renamedTable = om.getMetadataManager().getSnapshotRenamedTable(); - BucketArgs bucketArgs = new BucketArgs.Builder() .setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED) .build(); - OzoneBucket bucket2 = TestDataUtil.createBucket( client, VOLUME_NAME, bucketArgs, BUCKET_NAME_FSO); + assertTableRowCount(snapshotInfoTable, 0); + assertTableRowCount(deletedDirTable, 0); + assertTableRowCount(deletedTable, 0); + + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); // Create 10 keys for (int i = 1; i <= 10; i++) { TestDataUtil.createKey(bucket2, "key" + i, CONTENT.array()); @@ -382,8 +385,35 @@ public void testSnapshotWithFSO() throws Exception { SnapshotInfo deletedSnap = om.getMetadataManager() .getSnapshotInfoTable().get("/vol1/bucketfso/snap2"); + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + for (int i = 1; i <= 3; i++) { + String snapshotName = "snap" + i; + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(VOLUME_NAME, BUCKET_NAME_FSO, snapshotName); + LOG.info("SnapshotInfo for {} is {}", snapshotName, snap.getSnapshotId()); + return snap.isDeepCleaned() && snap.isDeepCleanedDeletedDir(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 2000, 100000); + } + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); + + OmSnapshot snap2 = om.getOmSnapshotManager() + .getSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap2").get(); + //Child directories should have moved to deleted Directory table to deleted directory table of snap2 + assertTableRowCount(dirTable, 0); + assertTableRowCount(keyTable, 11); + assertTableRowCount(snap2.getMetadataManager().getDeletedDirTable(), 12); + assertTableRowCount(snap2.getMetadataManager().getDeletedTable(), 11); + client.getObjectStore().deleteSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap2"); + + assertTableRowCount(snapshotInfoTable, 2); // Delete 2 overwritten keys @@ -407,7 +437,28 @@ public void testSnapshotWithFSO() throws Exception { snap3.getMetadataManager().getDeletedTable(); assertTableRowCount(snapRenamedTable, 4); - assertTableRowCount(snapDeletedDirTable, 3); + assertTableRowCount(snapDeletedDirTable, 12); + // All the keys deleted before snapshot2 is moved to snap3 + assertTableRowCount(snapDeletedTable, 18); + + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); + for (int snapshotIndex : new int[] {1, 3}) { + String snapshotName = "snap" + snapshotIndex; + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snap = om.getMetadataManager().getSnapshotInfo(VOLUME_NAME, BUCKET_NAME_FSO, snapshotName); + return snap.isDeepCleaned() && snap.isDeepCleanedDeletedDir(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 2000, 100000); + } + om.getKeyManager().getDirDeletingService().suspend(); + om.getKeyManager().getDeletingService().suspend(); + + assertTableRowCount(snapRenamedTable, 4); + assertTableRowCount(snapDeletedDirTable, 12); // All the keys deleted before snapshot2 is moved to snap3 assertTableRowCount(snapDeletedTable, 15); @@ -418,11 +469,13 @@ public void testSnapshotWithFSO() throws Exception { // Delete Snapshot3 and check entries moved to active DB client.getObjectStore().deleteSnapshot(VOLUME_NAME, BUCKET_NAME_FSO, "snap3"); - + om.getKeyManager().getDirDeletingService().resume(); + om.getKeyManager().getDeletingService().resume(); // Check entries moved to active DB assertTableRowCount(snapshotInfoTable, 1); assertTableRowCount(renamedTable, 4); - assertTableRowCount(deletedDirTable, 3); + assertTableRowCount(deletedDirTable, 12); + assertTableRowCount(deletedTable, 15); UncheckedAutoCloseableSupplier rcSnap1 = om.getOmSnapshotManager().getSnapshot( @@ -469,10 +522,12 @@ private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean throws InterruptedException, TimeoutException, IOException { OzoneManager ozoneManager = Mockito.spy(om); om.getKeyManager().getDirDeletingService().shutdown(); + KeyManager keyManager = Mockito.spy(om.getKeyManager()); + when(ozoneManager.getKeyManager()).thenReturn(keyManager); GenericTestUtils.waitFor(() -> om.getKeyManager().getDirDeletingService().getThreadCount() == 0, 1000, 100000); DirectoryDeletingService directoryDeletingService = Mockito.spy(new DirectoryDeletingService(10000, - TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1)); + TimeUnit.MILLISECONDS, 100000, ozoneManager, cluster.getConf(), 1, false)); directoryDeletingService.shutdown(); GenericTestUtils.waitFor(() -> directoryDeletingService.getThreadCount() == 0, 1000, 100000); @@ -481,7 +536,7 @@ private DirectoryDeletingService getMockedDirectoryDeletingService(AtomicBoolean GenericTestUtils.waitFor(dirDeletionWaitStarted::get, 1000, 100000); dirDeletionStarted.set(true); return i.callRealMethod(); - }).when(directoryDeletingService).getPendingDeletedDirInfo(); + }).when(keyManager).getDeletedDirEntries(); return directoryDeletingService; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index 8591c6d1e88b..80a88871590b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -44,12 +45,14 @@ import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; +import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.ozone.test.GenericTestUtils; import org.apache.ozone.test.tag.Flaky; import org.junit.jupiter.api.AfterAll; @@ -76,7 +79,7 @@ public class TestSnapshotDirectoryCleaningService { @BeforeAll public static void init() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, 2500); + conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2500); conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 2500, TimeUnit.MILLISECONDS); @@ -140,8 +143,10 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { cluster.getOzoneManager().getMetadataManager().getDeletedTable(); Table snapshotInfoTable = cluster.getOzoneManager().getMetadataManager().getSnapshotInfoTable(); - SnapshotDirectoryCleaningService snapshotDirectoryCleaningService = - cluster.getOzoneManager().getKeyManager().getSnapshotDirectoryService(); + DirectoryDeletingService directoryDeletingService = + cluster.getOzoneManager().getKeyManager().getDirDeletingService(); + SnapshotChainManager snapshotChainManager = ((OmMetadataManagerImpl)cluster.getOzoneManager().getMetadataManager()) + .getSnapshotChainManager(); /* DirTable /v/b/snapDir @@ -220,11 +225,9 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { fs.delete(root, true); assertTableRowCount(deletedKeyTable, 10); client.getObjectStore().createSnapshot(volumeName, bucketName, "snap3"); - long prevRunCount = snapshotDirectoryCleaningService.getRunCount().get(); - GenericTestUtils.waitFor(() -> snapshotDirectoryCleaningService.getRunCount().get() + long prevRunCount = directoryDeletingService.getRunCount().get(); + GenericTestUtils.waitFor(() -> directoryDeletingService.getRunCount().get() > prevRunCount + 1, 100, 10000); - - Thread.sleep(2000); Map expectedSize = new HashMap() {{ // /v/b/snapDir/appRoot0/parentDir0-2/childFile contribute // exclusive size, /v/b/snapDir/appRoot0/parentDir0-2/childFile0-4 @@ -234,11 +237,22 @@ public void testExclusiveSizeWithDirectoryDeepClean() throws Exception { put("snap2", 5L); put("snap3", 0L); }}; + try (TableIterator> iterator = snapshotInfoTable.iterator()) { while (iterator.hasNext()) { Table.KeyValue snapshotEntry = iterator.next(); String snapshotName = snapshotEntry.getValue().getName(); + + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo nextSnapshot = SnapshotUtils.getNextSnapshot(cluster.getOzoneManager(), snapshotChainManager, + snapshotEntry.getValue()); + return nextSnapshot == null || (nextSnapshot.isDeepCleanedDeletedDir() && nextSnapshot.isDeepCleaned()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 10000); SnapshotInfo snapshotInfo = snapshotInfoTable.get(snapshotEntry.getKey()); assertEquals(expectedSize.get(snapshotName), snapshotInfo.getExclusiveSize() + snapshotInfo.getExclusiveSizeDeltaFromDirDeepCleaning()); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java index 0af075035704..7e76885c49bd 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManager.java @@ -41,7 +41,6 @@ import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket; import org.apache.ratis.util.function.CheckedFunction; @@ -274,7 +273,14 @@ OmMultipartUploadListParts listParts(String volumeName, String bucketName, void refresh(OmKeyInfo key) throws IOException; /** - * Returns an iterator for pending deleted directories. + * Returns an iterator for pending deleted directories all buckets. + */ + default TableIterator> getDeletedDirEntries() throws IOException { + return getDeletedDirEntries(null, null); + } + + /** + * Returns an iterator for pending deleted directories for volume and bucket. * @throws IOException */ TableIterator> getDeletedDirEntries( @@ -301,7 +307,8 @@ default List> getDeletedDirEntries(String volu * @throws IOException */ DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, - OmKeyInfo parentInfo, long remainingBufLimit) throws IOException; + OmKeyInfo parentInfo, CheckedFunction, Boolean, IOException> filter, + long remainingBufLimit) throws IOException; /** * Returns all sub files under the given parent directory. @@ -311,7 +318,8 @@ DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, * @throws IOException */ DeleteKeysResult getPendingDeletionSubFiles(long volumeId, - long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) + long bucketId, OmKeyInfo parentInfo, + CheckedFunction, Boolean, IOException> filter, long remainingBufLimit) throws IOException; /** @@ -344,12 +352,6 @@ DeleteKeysResult getPendingDeletionSubFiles(long volumeId, */ SnapshotDeletingService getSnapshotDeletingService(); - /** - * Returns the instance of Snapshot Directory service. - * @return Background service. - */ - SnapshotDirectoryCleaningService getSnapshotDirectoryService(); - /** * Returns the instance of CompactionService. * @return BackgroundService diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index da080be68cac..578afc630a1c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -58,10 +58,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_THREAD_NUMBER_DIR_DELETION; @@ -171,7 +167,6 @@ import org.apache.hadoop.ozone.om.service.MultipartUploadCleanupService; import org.apache.hadoop.ozone.om.service.OpenKeyCleanupService; import org.apache.hadoop.ozone.om.service.SnapshotDeletingService; -import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PartKeyInfo; import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer; @@ -214,7 +209,6 @@ public class KeyManagerImpl implements KeyManager { private BackgroundService openKeyCleanupService; private BackgroundService multipartUploadCleanupService; - private SnapshotDirectoryCleaningService snapshotDirectoryCleaningService; private DNSToSwitchMapping dnsToSwitchMapping; private CompactionService compactionService; @@ -292,7 +286,7 @@ public void start(OzoneConfiguration configuration) { dirDeletingService = new DirectoryDeletingService(dirDeleteInterval, TimeUnit.MILLISECONDS, serviceTimeout, ozoneManager, configuration, - dirDeletingServiceCorePoolSize); + dirDeletingServiceCorePoolSize, isSnapshotDeepCleaningEnabled); dirDeletingService.start(); } @@ -350,22 +344,6 @@ public void start(OzoneConfiguration configuration) { } } - if (isSnapshotDeepCleaningEnabled && snapshotDirectoryCleaningService == null && - ozoneManager.isFilesystemSnapshotEnabled()) { - long dirDeleteInterval = configuration.getTimeDuration( - OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, - OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT, - TimeUnit.MILLISECONDS); - long serviceTimeout = configuration.getTimeDuration( - OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT, - OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT_DEFAULT, - TimeUnit.MILLISECONDS); - snapshotDirectoryCleaningService = new SnapshotDirectoryCleaningService( - dirDeleteInterval, TimeUnit.MILLISECONDS, serviceTimeout, - ozoneManager, scmClient.getBlockClient()); - snapshotDirectoryCleaningService.start(); - } - if (multipartUploadCleanupService == null) { long serviceInterval = configuration.getTimeDuration( OZONE_OM_MPU_CLEANUP_SERVICE_INTERVAL, @@ -443,10 +421,6 @@ public void stop() throws IOException { multipartUploadCleanupService.shutdown(); multipartUploadCleanupService = null; } - if (snapshotDirectoryCleaningService != null) { - snapshotDirectoryCleaningService.shutdown(); - snapshotDirectoryCleaningService = null; - } if (compactionService != null) { compactionService.shutdown(); compactionService = null; @@ -955,11 +929,6 @@ public SnapshotDeletingService getSnapshotDeletingService() { return snapshotDeletingService; } - @Override - public SnapshotDirectoryCleaningService getSnapshotDirectoryService() { - return snapshotDirectoryCleaningService; - } - @Override public CompactionService getCompactionService() { return compactionService; @@ -2197,14 +2166,19 @@ private void slimLocationVersion(OmKeyInfo... keyInfos) { @Override public DeleteKeysResult getPendingDeletionSubDirs(long volumeId, long bucketId, - OmKeyInfo parentInfo, long remainingBufLimit) throws IOException { + OmKeyInfo parentInfo, CheckedFunction, Boolean, IOException> filter, + long remainingBufLimit) throws IOException { return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getDirectoryTable(), - omDirectoryInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, omDirectoryInfo), remainingBufLimit); + kv -> Table.newKeyValue(metadataManager.getOzoneDeletePathKey(kv.getValue().getObjectID(), kv.getKey()), + OMFileRequest.getKeyInfoWithFullPath(parentInfo, kv.getValue())), + filter, remainingBufLimit); } private DeleteKeysResult gatherSubPathsWithIterator( long volumeId, long bucketId, OmKeyInfo parentInfo, - Table table, Function deleteKeyTransformer, + Table table, + CheckedFunction, KeyValue, IOException> deleteKeyTransformer, + CheckedFunction, Boolean, IOException> deleteKeyFilter, long remainingBufLimit) throws IOException { List keyInfos = new ArrayList<>(); String seekFileInDB = metadataManager.getOzonePathKey(volumeId, bucketId, @@ -2227,10 +2201,12 @@ private DeleteKeysResult gatherSubPathsWithIterat if (remainingBufLimit - objectSerializedSize < 0) { break; } - OmKeyInfo keyInfo = deleteKeyTransformer.apply(withParentObjectId); - keyInfos.add(keyInfo); - remainingBufLimit -= objectSerializedSize; - consumedSize += objectSerializedSize; + KeyValue keyInfo = deleteKeyTransformer.apply(entry); + if (deleteKeyFilter.apply(keyInfo)) { + keyInfos.add(keyInfo.getValue()); + remainingBufLimit -= objectSerializedSize; + consumedSize += objectSerializedSize; + } } processedSubPaths = processedSubPaths || (!iterator.hasNext()); return new DeleteKeysResult(keyInfos, consumedSize, processedSubPaths); @@ -2239,11 +2215,17 @@ private DeleteKeysResult gatherSubPathsWithIterat @Override public DeleteKeysResult getPendingDeletionSubFiles(long volumeId, - long bucketId, OmKeyInfo parentInfo, long remainingBufLimit) + long bucketId, OmKeyInfo parentInfo, + CheckedFunction, Boolean, IOException> filter, long remainingBufLimit) throws IOException { - return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getFileTable(), - keyInfo -> OMFileRequest.getKeyInfoWithFullPath(parentInfo, keyInfo), - remainingBufLimit); + CheckedFunction, KeyValue, IOException> tranformer = kv -> { + OmKeyInfo keyInfo = OMFileRequest.getKeyInfoWithFullPath(parentInfo, kv.getValue()); + String deleteKey = metadataManager.getOzoneDeletePathKey(keyInfo.getObjectID(), + metadataManager.getOzoneKey(keyInfo.getVolumeName(), keyInfo.getBucketName(), keyInfo.getKeyName())); + return Table.newKeyValue(deleteKey, keyInfo); + }; + return gatherSubPathsWithIterator(volumeId, bucketId, parentInfo, metadataManager.getFileTable(), tranformer, + filter, remainingBufLimit); } public boolean isBucketFSOptimized(String volName, String buckName) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 6117a7e373bf..bcff75fd0399 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -704,9 +704,9 @@ static class Lock extends BootstrapStateHandler.Lock { locks = Stream.of( om.getKeyManager().getDeletingService(), + om.getKeyManager().getDirDeletingService(), om.getKeyManager().getSnapshotSstFilteringService(), om.getKeyManager().getSnapshotDeletingService(), - om.getKeyManager().getSnapshotDirectoryService(), om.getMetadataManager().getStore().getRocksDBCheckpointDiffer() ) .filter(Objects::nonNull) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 536406111a96..ee699e16c31d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -28,11 +28,13 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -58,12 +60,14 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgeKeysRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveKeyInfos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.util.function.CheckedFunction; /** * Abstracts common code from KeyDeletingService and DirectoryDeletingService @@ -103,7 +107,7 @@ public AbstractKeyDeletingService(String serviceName, long interval, protected Pair processKeyDeletes(List keyBlocksList, Map keysToModify, List renameEntries, - String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException, InterruptedException { + String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { long startTime = Time.monotonicNow(); Pair purgeResult = Pair.of(0, false); @@ -143,7 +147,7 @@ protected Pair processKeyDeletes(List keyBlocksLis */ private Pair submitPurgeKeysRequest(List results, Map keysToModify, List renameEntriesToBeDeleted, - String snapTableKey, UUID expectedPreviousSnapshotId) throws InterruptedException { + String snapTableKey, UUID expectedPreviousSnapshotId) { List purgeKeys = new ArrayList<>(); // Put all keys to be purged in a list @@ -224,13 +228,13 @@ private Pair submitPurgeKeysRequest(List submitPurgeKeysRequest(List requests, - String snapTableKey, - UUID expectedPreviousSnapshotId) { + protected OMResponse submitPurgePaths(List requests, + String snapTableKey, UUID expectedPreviousSnapshotId) { OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); @@ -267,12 +270,13 @@ protected void submitPurgePaths(List requests, .setClientId(clientId.toString()) .build(); - // Submit Purge paths request to OM - try { - submitRequest(omRequest); - } catch (ServiceException e) { + // Submit Purge paths request to OM. Acquire bootstrap lock when processing deletes for snapshots. + try (BootstrapStateHandler.Lock lock = snapTableKey != null ? getBootstrapStateLock().lock() : null) { + return submitRequest(omRequest); + } catch (ServiceException | InterruptedException e) { LOG.error("PurgePaths request failed. Will retry at next run.", e); } + return null; } private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( @@ -305,10 +309,12 @@ private OzoneManagerProtocolProtos.PurgePathRequest wrapPurgeRequest( return purgePathsRequest.build(); } - protected PurgePathRequest prepareDeleteDirRequest( - OmKeyInfo pendingDeletedDirInfo, String delDirName, + protected Optional prepareDeleteDirRequest( + OmKeyInfo pendingDeletedDirInfo, String delDirName, boolean purgeDir, List> subDirList, - KeyManager keyManager, long remainingBufLimit) throws IOException { + KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableFileFilter, + long remainingBufLimit) throws IOException { // step-0: Get one pending deleted directory if (LOG.isDebugEnabled()) { LOG.debug("Pending deleted dir name: {}", @@ -322,7 +328,7 @@ protected PurgePathRequest prepareDeleteDirRequest( // step-1: get all sub directories under the deletedDir DeleteKeysResult subDirDeleteResult = keyManager.getPendingDeletionSubDirs(volumeId, bucketId, - pendingDeletedDirInfo, remainingBufLimit); + pendingDeletedDirInfo, keyInfo -> true, remainingBufLimit); List subDirs = subDirDeleteResult.getKeysToDelete(); remainingBufLimit -= subDirDeleteResult.getConsumedSize(); @@ -337,9 +343,10 @@ protected PurgePathRequest prepareDeleteDirRequest( } // step-2: get all sub files under the deletedDir + // Only remove sub files if the parent directory is going to be deleted or can be reclaimed. DeleteKeysResult subFileDeleteResult = keyManager.getPendingDeletionSubFiles(volumeId, bucketId, - pendingDeletedDirInfo, remainingBufLimit); + pendingDeletedDirInfo, keyInfo -> purgeDir || reclaimableFileFilter.apply(keyInfo), remainingBufLimit); List subFiles = subFileDeleteResult.getKeysToDelete(); if (LOG.isDebugEnabled()) { @@ -350,10 +357,13 @@ protected PurgePathRequest prepareDeleteDirRequest( // step-3: If both sub-dirs and sub-files are exhausted under a parent // directory, only then delete the parent. - String purgeDeletedDir = subDirDeleteResult.isProcessedKeys() && + String purgeDeletedDir = purgeDir && subDirDeleteResult.isProcessedKeys() && subFileDeleteResult.isProcessedKeys() ? delDirName : null; - return wrapPurgeRequest(volumeId, bucketId, - purgeDeletedDir, subFiles, subDirs); + if (purgeDeletedDir == null && subFiles.isEmpty() && subDirs.isEmpty()) { + return Optional.empty(); + } + return Optional.of(wrapPurgeRequest(volumeId, bucketId, + purgeDeletedDir, subFiles, subDirs)); } @SuppressWarnings("checkstyle:ParameterNumber") @@ -363,6 +373,8 @@ public void optimizeDirDeletesAndSubmitRequest( List purgePathRequestList, String snapTableKey, long startTime, long remainingBufLimit, KeyManager keyManager, + CheckedFunction, Boolean, IOException> reclaimableDirChecker, + CheckedFunction, Boolean, IOException> reclaimableFileChecker, UUID expectedPreviousSnapshotId, long rnCnt) { // Optimization to handle delete sub-dir and keys to remove quickly @@ -372,30 +384,31 @@ public void optimizeDirDeletesAndSubmitRequest( int consumedSize = 0; while (subDirRecursiveCnt < allSubDirList.size() && remainingBufLimit > 0) { try { - Pair stringOmKeyInfoPair - = allSubDirList.get(subDirRecursiveCnt); - PurgePathRequest request = prepareDeleteDirRequest( - stringOmKeyInfoPair.getValue(), - stringOmKeyInfoPair.getKey(), allSubDirList, keyManager, - remainingBufLimit); - consumedSize += request.getSerializedSize(); + Pair stringOmKeyInfoPair = allSubDirList.get(subDirRecursiveCnt++); + Boolean subDirectoryReclaimable = reclaimableDirChecker.apply(Table.newKeyValue(stringOmKeyInfoPair.getKey(), + stringOmKeyInfoPair.getValue())); + Optional request = prepareDeleteDirRequest( + stringOmKeyInfoPair.getValue(), stringOmKeyInfoPair.getKey(), subDirectoryReclaimable, allSubDirList, + keyManager, reclaimableFileChecker, remainingBufLimit); + if (!request.isPresent()) { + continue; + } + PurgePathRequest requestVal = request.get(); + consumedSize += requestVal.getSerializedSize(); remainingBufLimit -= consumedSize; - purgePathRequestList.add(request); + purgePathRequestList.add(requestVal); // Count up the purgeDeletedDir, subDirs and subFiles - if (request.getDeletedDir() != null - && !request.getDeletedDir().isEmpty()) { + if (requestVal.hasDeletedDir() && !StringUtils.isBlank(requestVal.getDeletedDir())) { subdirDelNum++; } - subDirNum += request.getMarkDeletedSubDirsCount(); - subFileNum += request.getDeletedSubFilesCount(); - subDirRecursiveCnt++; + subDirNum += requestVal.getMarkDeletedSubDirsCount(); + subFileNum += requestVal.getDeletedSubFilesCount(); } catch (IOException e) { LOG.error("Error while running delete directories and files " + "background task. Will retry at next run for subset.", e); break; } } - if (!purgePathRequestList.isEmpty()) { submitPurgePaths(purgePathRequestList, snapTableKey, expectedPreviousSnapshotId); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index ad90490101c4..7edbe7761175 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -21,15 +21,26 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL_DEFAULT; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Maps; +import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; import java.util.List; -import java.util.Objects; +import java.util.Map; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.ReconfigurationHandler; @@ -41,15 +52,20 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.Table.KeyValue; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.lock.IOzoneManagerLock; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter; +import org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.util.Time; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; @@ -57,21 +73,64 @@ import org.slf4j.LoggerFactory; /** - * This is a background service to delete orphan directories and its - * sub paths(sub-dirs and sub-files). + Background service responsible for purging deleted directories and files + * in the Ozone Manager (OM) and associated snapshots. * *

- * This will scan the metadata of om periodically to get the orphan dirs from - * DeletedDirectoryTable and find its sub paths. It will fetch all sub-files - * from FileTable and move those to DeletedTable so that OM's - * KeyDeletingService will cleanup those files later. It will fetch all - * sub-directories from the DirectoryTable and move those to - * DeletedDirectoryTable so that these will be visited in next iterations. + * This service periodically scans the deleted directory table and submits + * purge requests for directories and their sub-entries (subdirectories and files). + * It operates in both the active object store (AOS) and across all deep-clean enabled + * snapshots. The service supports parallel processing using a thread pool and + * coordinates exclusive size calculations and cleanup status updates for + * snapshots. + *

* - *

- * After moving all sub-files and sub-dirs the parent orphan directory will be - * deleted by this service. It will continue traversing until all the leaf path - * components of an orphan directory is visited. + *

Key Features

+ *
    + *
  • Processes deleted directories in both the active OM and all snapshots + * with deep cleaning enabled.
  • + *
  • Uses a thread pool to parallelize deletion tasks within each store or snapshot.
  • + *
  • Employs filters to determine reclaimability of directories and files, + * ensuring safety with respect to snapshot chains.
  • + *
  • Tracks and updates exclusive size and replicated exclusive size for each + * snapshot as directories and files are reclaimed.
  • + *
  • Updates the "deep cleaned" flag for snapshots after a successful run.
  • + *
  • Handles error and race conditions gracefully, deferring work if necessary.
  • + *
+ * + *

Constructor Parameters

+ *
    + *
  • interval - How often the service runs.
  • + *
  • unit - Time unit for the interval.
  • + *
  • serviceTimeout - Service timeout in the given time unit.
  • + *
  • ozoneManager - The OzoneManager instance.
  • + *
  • configuration - Ozone configuration object.
  • + *
  • dirDeletingServiceCorePoolSize - Number of parallel threads for deletion per store or snapshot.
  • + *
  • deepCleanSnapshots - Whether to enable deep cleaning for snapshots.
  • + *
+ * + *

Threading and Parallelism

+ *
    + *
  • Uses a configurable thread pool for parallel deletion tasks within each store/snapshot.
  • + *
  • Each snapshot and AOS get a separate background task for deletion.
  • + *
+ * + *

Snapshot Integration

+ *
    + *
  • Iterates all snapshots in the chain if deep cleaning is enabled.
  • + *
  • Skips snapshots that are already deep-cleaned or not yet flushed to disk.
  • + *
  • Updates snapshot metadata to reflect size changes and cleaning status.
  • + *
+ * + *

Usage

+ *
    + *
  • Should be scheduled as a background service in OM.
  • + *
  • Intended to be run only on the OM leader node.
  • + *
+ * + * @see org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableDirFilter + * @see org.apache.hadoop.ozone.om.snapshot.filter.ReclaimableKeyFilter + * @see org.apache.hadoop.ozone.om.SnapshotChainManager */ public class DirectoryDeletingService extends AbstractKeyDeletingService { private static final Logger LOG = @@ -80,32 +139,34 @@ public class DirectoryDeletingService extends AbstractKeyDeletingService { // Using multi thread for DirDeletion. Multiple threads would read // from parent directory info from deleted directory table concurrently // and send deletion requests. - private final int dirDeletingCorePoolSize; private int ratisByteLimit; private final AtomicBoolean suspended; - private AtomicBoolean isRunningOnAOS; - - private final DeletedDirSupplier deletedDirSupplier; - - private AtomicInteger taskCount = new AtomicInteger(0); + private final AtomicBoolean isRunningOnAOS; + private final SnapshotChainManager snapshotChainManager; + private final boolean deepCleanSnapshots; + private final ExecutorService deletionThreadPool; + private final int numberOfParallelThreadsPerStore; public DirectoryDeletingService(long interval, TimeUnit unit, long serviceTimeout, OzoneManager ozoneManager, - OzoneConfiguration configuration, int dirDeletingServiceCorePoolSize) { + OzoneConfiguration configuration, int dirDeletingServiceCorePoolSize, boolean deepCleanSnapshots) { super(DirectoryDeletingService.class.getSimpleName(), interval, unit, dirDeletingServiceCorePoolSize, serviceTimeout, ozoneManager, null); int limit = (int) configuration.getStorageSize( OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT, OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT, StorageUnit.BYTES); + this.numberOfParallelThreadsPerStore = dirDeletingServiceCorePoolSize; + this.deletionThreadPool = new ThreadPoolExecutor(0, numberOfParallelThreadsPerStore, interval, unit, + new LinkedBlockingDeque<>(Integer.MAX_VALUE)); + // always go to 90% of max limit for request as other header will be added this.ratisByteLimit = (int) (limit * 0.9); this.suspended = new AtomicBoolean(false); this.isRunningOnAOS = new AtomicBoolean(false); - this.dirDeletingCorePoolSize = dirDeletingServiceCorePoolSize; - deletedDirSupplier = new DeletedDirSupplier(); registerReconfigCallbacks(ozoneManager.getReconfigurationHandler(), configuration); - taskCount.set(0); + this.snapshotChainManager = ((OmMetadataManagerImpl)ozoneManager.getMetadataManager()).getSnapshotChainManager(); + this.deepCleanSnapshots = deepCleanSnapshots; } public void registerReconfigCallbacks(ReconfigurationHandler handler, OzoneConfiguration conf) { @@ -138,10 +199,6 @@ public boolean isRunningOnAOS() { return isRunningOnAOS.get(); } - public AtomicInteger getTaskCount() { - return taskCount; - } - /** * Suspend the service. */ @@ -165,20 +222,19 @@ public void setRatisByteLimit(int ratisByteLimit) { @Override public BackgroundTaskQueue getTasks() { BackgroundTaskQueue queue = new BackgroundTaskQueue(); - if (taskCount.get() > 0) { - LOG.info("{} Directory deleting task(s) already in progress.", - taskCount.get()); - return queue; - } - try { - deletedDirSupplier.reInitItr(); - } catch (IOException ex) { - LOG.error("Unable to get the iterator.", ex); - return queue; - } - taskCount.set(dirDeletingCorePoolSize); - for (int i = 0; i < dirDeletingCorePoolSize; i++) { - queue.add(new DirectoryDeletingService.DirDeletingTask(this)); + queue.add(new DirDeletingTask(this, null)); + if (deepCleanSnapshots) { + Iterator iterator = null; + try { + iterator = snapshotChainManager.iterator(true); + } catch (IOException e) { + LOG.error("Error while initializing snapshot chain iterator."); + return queue; + } + while (iterator.hasNext()) { + UUID snapshotId = iterator.next(); + queue.add(new DirDeletingTask(this, snapshotId)); + } } return queue; } @@ -186,39 +242,36 @@ public BackgroundTaskQueue getTasks() { @Override public void shutdown() { super.shutdown(); - deletedDirSupplier.closeItr(); } - private final class DeletedDirSupplier { + private static final class DeletedDirSupplier implements Closeable { private TableIterator> deleteTableIterator; - private synchronized Table.KeyValue get() - throws IOException { + private DeletedDirSupplier(TableIterator> deleteTableIterator) { + this.deleteTableIterator = deleteTableIterator; + } + + private synchronized Table.KeyValue get() { if (deleteTableIterator.hasNext()) { return deleteTableIterator.next(); } return null; } - private synchronized void closeItr() { + @Override + public void close() { IOUtils.closeQuietly(deleteTableIterator); - deleteTableIterator = null; - } - - private synchronized void reInitItr() throws IOException { - closeItr(); - deleteTableIterator = - getOzoneManager().getMetadataManager().getDeletedDirTable() - .iterator(); } } private final class DirDeletingTask implements BackgroundTask { private final DirectoryDeletingService directoryDeletingService; + private final UUID snapshotId; - private DirDeletingTask(DirectoryDeletingService service) { + private DirDeletingTask(DirectoryDeletingService service, UUID snapshotId) { this.directoryDeletingService = service; + this.snapshotId = snapshotId; } @Override @@ -226,147 +279,217 @@ public int getPriority() { return 0; } - @Override - public BackgroundTaskResult call() { - try { - if (shouldRun()) { - isRunningOnAOS.set(true); - long rnCnt = getRunCount().incrementAndGet(); - if (LOG.isDebugEnabled()) { - LOG.debug("Running DirectoryDeletingService. {}", rnCnt); - } - long dirNum = 0L; - long subDirNum = 0L; - long subFileNum = 0L; - long remainingBufLimit = ratisByteLimit; - int consumedSize = 0; - List purgePathRequestList = new ArrayList<>(); - List> allSubDirList = - new ArrayList<>(); - - Table.KeyValue pendingDeletedDirInfo; - // This is to avoid race condition b/w purge request and snapshot chain updation. For AOS taking the global - // snapshotId since AOS could process multiple buckets in one iteration. - try { - UUID expectedPreviousSnapshotId = - ((OmMetadataManagerImpl) getOzoneManager().getMetadataManager()).getSnapshotChainManager() - .getLatestGlobalSnapshotId(); - - long startTime = Time.monotonicNow(); - while (remainingBufLimit > 0) { - pendingDeletedDirInfo = getPendingDeletedDirInfo(); - if (pendingDeletedDirInfo == null) { - break; - } - // Do not reclaim if the directory is still being referenced by - // the previous snapshot. - if (previousSnapshotHasDir(pendingDeletedDirInfo)) { - continue; - } - - PurgePathRequest request = prepareDeleteDirRequest( - pendingDeletedDirInfo.getValue(), - pendingDeletedDirInfo.getKey(), allSubDirList, - getOzoneManager().getKeyManager(), remainingBufLimit); - - consumedSize += request.getSerializedSize(); - remainingBufLimit -= consumedSize; - purgePathRequestList.add(request); - // Count up the purgeDeletedDir, subDirs and subFiles - if (request.getDeletedDir() != null && !request.getDeletedDir() - .isEmpty()) { - dirNum++; - } - subDirNum += request.getMarkDeletedSubDirsCount(); - subFileNum += request.getDeletedSubFilesCount(); - } + private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequestUpdatingExclusiveSize( + long exclusiveSize, long exclusiveReplicatedSize, UUID snapshotID) { + OzoneManagerProtocolProtos.SnapshotSize snapshotSize = OzoneManagerProtocolProtos.SnapshotSize.newBuilder() + .setExclusiveSize(exclusiveSize) + .setExclusiveReplicatedSize(exclusiveReplicatedSize) + .build(); + return OzoneManagerProtocolProtos.SetSnapshotPropertyRequest.newBuilder() + .setSnapshotKey(snapshotChainManager.getTableKey(snapshotID)) + .setSnapshotSizeDeltaFromDirDeepCleaning(snapshotSize) + .build(); + } - optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, - subFileNum, allSubDirList, purgePathRequestList, null, - startTime, remainingBufLimit, - getOzoneManager().getKeyManager(), expectedPreviousSnapshotId, - rnCnt); + /** + * + * @param currentSnapshotInfo if null, deleted directories in AOS should be processed. + * @param keyManager KeyManager of the underlying store. + */ + private void processDeletedDirsForStore(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, + long remainingBufLimit, long rnCnt) throws IOException, ExecutionException, InterruptedException { + String volume, bucket; String snapshotTableKey; + if (currentSnapshotInfo != null) { + volume = currentSnapshotInfo.getVolumeName(); + bucket = currentSnapshotInfo.getBucketName(); + snapshotTableKey = currentSnapshotInfo.getTableKey(); + } else { + volume = null; bucket = null; snapshotTableKey = null; + } - } catch (IOException e) { - LOG.error( - "Error while running delete directories and files " + "background task. Will retry at next run.", - e); + try (DeletedDirSupplier dirSupplier = new DeletedDirSupplier(currentSnapshotInfo == null ? + keyManager.getDeletedDirEntries() : keyManager.getDeletedDirEntries(volume, bucket))) { + // This is to avoid race condition b/w purge request and snapshot chain update. For AOS taking the global + // snapshotId since AOS could process multiple buckets in one iteration. While using path + // previous snapshotId for a snapshot since it would process only one bucket. + UUID expectedPreviousSnapshotId = currentSnapshotInfo == null ? + snapshotChainManager.getLatestGlobalSnapshotId() : + SnapshotUtils.getPreviousSnapshotId(currentSnapshotInfo, snapshotChainManager); + Map> exclusiveSizeMap = Maps.newConcurrentMap(); + + CompletableFuture processedAllDeletedDirs = CompletableFuture.completedFuture(true); + for (int i = 0; i < numberOfParallelThreadsPerStore; i++) { + CompletableFuture future = CompletableFuture.supplyAsync(() -> { + try { + return processDeletedDirectories(currentSnapshotInfo, keyManager, dirSupplier, remainingBufLimit, + expectedPreviousSnapshotId, exclusiveSizeMap, rnCnt); + } catch (Throwable e) { + return false; + } + }, deletionThreadPool); + processedAllDeletedDirs = future.thenCombine(future, (a, b) -> a && b); + } + // If AOS or all directories have been processed for snapshot, update snapshot size delta and deep clean flag + // if it is a snapshot. + if (processedAllDeletedDirs.get()) { + List setSnapshotPropertyRequests = new ArrayList<>(); + + for (Map.Entry> entry : exclusiveSizeMap.entrySet()) { + UUID snapshotID = entry.getKey(); + long exclusiveSize = entry.getValue().getLeft(); + long exclusiveReplicatedSize = entry.getValue().getRight(); + setSnapshotPropertyRequests.add(getSetSnapshotRequestUpdatingExclusiveSize( + exclusiveSize, exclusiveReplicatedSize, snapshotID)); } - isRunningOnAOS.set(false); - synchronized (directoryDeletingService) { - this.directoryDeletingService.notify(); + + // Updating directory deep clean flag of snapshot. + if (currentSnapshotInfo != null) { + setSnapshotPropertyRequests.add(OzoneManagerProtocolProtos.SetSnapshotPropertyRequest.newBuilder() + .setSnapshotKey(snapshotTableKey) + .setDeepCleanedDeletedDir(true) + .build()); } + submitSetSnapshotRequests(setSnapshotPropertyRequests); } - } finally { - taskCount.getAndDecrement(); } - // place holder by returning empty results of this call back. - return BackgroundTaskResult.EmptyTaskResult.newResult(); } - private boolean previousSnapshotHasDir( - KeyValue pendingDeletedDirInfo) throws IOException { - String key = pendingDeletedDirInfo.getKey(); - OmKeyInfo deletedDirInfo = pendingDeletedDirInfo.getValue(); - OmSnapshotManager omSnapshotManager = - getOzoneManager().getOmSnapshotManager(); - OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) - getOzoneManager().getMetadataManager(); - SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); - if (previousSnapshotInfo == null) { + /** + * Processes deleted directories for snapshot management, determining whether + * directories and files can be purged, and calculates exclusive size mappings + * for snapshots. + * + * @param currentSnapshotInfo Information about the current snapshot whose deleted directories are being processed. + * @param keyManager Key manager of the underlying storage system to handle key operations. + * @param dirSupplier Supplier for fetching pending deleted directories to be processed. + * @param remainingBufLimit Remaining buffer limit for processing directories and files. + * @param expectedPreviousSnapshotId The UUID of the previous snapshot expected in the chain. + * @param totalExclusiveSizeMap A map for storing total exclusive size and exclusive replicated size + * for each snapshot. + * @param runCount The number of times the processing task has been executed. + * @return A boolean indicating whether the processed directory list is empty. + */ + private boolean processDeletedDirectories(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, + DeletedDirSupplier dirSupplier, long remainingBufLimit, UUID expectedPreviousSnapshotId, + Map> totalExclusiveSizeMap, long runCount) { + OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); + IOzoneManagerLock lock = getOzoneManager().getMetadataManager().getLock(); + String snapshotTableKey = currentSnapshotInfo == null ? null : currentSnapshotInfo.getTableKey(); + try (ReclaimableDirFilter reclaimableDirFilter = new ReclaimableDirFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock); + ReclaimableKeyFilter reclaimableFileFilter = new ReclaimableKeyFilter(getOzoneManager(), + omSnapshotManager, snapshotChainManager, currentSnapshotInfo, keyManager, lock)) { + long startTime = Time.monotonicNow(); + long dirNum = 0L; + long subDirNum = 0L; + long subFileNum = 0L; + int consumedSize = 0; + List purgePathRequestList = new ArrayList<>(); + List> allSubDirList = new ArrayList<>(); + while (remainingBufLimit > 0) { + KeyValue pendingDeletedDirInfo = dirSupplier.get(); + if (pendingDeletedDirInfo == null) { + break; + } + boolean isDirReclaimable = reclaimableDirFilter.apply(pendingDeletedDirInfo); + Optional request = prepareDeleteDirRequest( + pendingDeletedDirInfo.getValue(), + pendingDeletedDirInfo.getKey(), isDirReclaimable, allSubDirList, + getOzoneManager().getKeyManager(), reclaimableFileFilter, remainingBufLimit); + if (!request.isPresent()) { + continue; + } + PurgePathRequest purgePathRequest = request.get(); + consumedSize += purgePathRequest.getSerializedSize(); + remainingBufLimit -= consumedSize; + purgePathRequestList.add(purgePathRequest); + // Count up the purgeDeletedDir, subDirs and subFiles + if (purgePathRequest.hasDeletedDir() && !StringUtils.isBlank(purgePathRequest.getDeletedDir())) { + dirNum++; + } + subDirNum += purgePathRequest.getMarkDeletedSubDirsCount(); + subFileNum += purgePathRequest.getDeletedSubFilesCount(); + } + + optimizeDirDeletesAndSubmitRequest(dirNum, subDirNum, + subFileNum, allSubDirList, purgePathRequestList, snapshotTableKey, + startTime, remainingBufLimit, getOzoneManager().getKeyManager(), + reclaimableDirFilter, reclaimableFileFilter, expectedPreviousSnapshotId, + runCount); + Map exclusiveReplicatedSizeMap = reclaimableFileFilter.getExclusiveReplicatedSizeMap(); + Map exclusiveSizeMap = reclaimableFileFilter.getExclusiveSizeMap(); + List previousPathSnapshotsInChain = + Stream.of(exclusiveSizeMap.keySet(), exclusiveReplicatedSizeMap.keySet()) + .flatMap(Collection::stream).distinct().collect(Collectors.toList()); + for (UUID snapshot : previousPathSnapshotsInChain) { + totalExclusiveSizeMap.compute(snapshot, (k, v) -> { + long exclusiveSize = exclusiveSizeMap.getOrDefault(snapshot, 0L); + long exclusiveReplicatedSize = exclusiveReplicatedSizeMap.getOrDefault(snapshot, 0L); + if (v == null) { + return Pair.of(exclusiveSize, exclusiveReplicatedSize); + } + return Pair.of(v.getLeft() + exclusiveSize, v.getRight() + exclusiveReplicatedSize); + }); + } + + return purgePathRequestList.isEmpty(); + } catch (IOException e) { + LOG.error("Error while running delete directories for store : {} and files background task. " + + "Will retry at next run. ", snapshotTableKey, e); return false; } - // previous snapshot is not active or it has not been flushed to disk then don't process the key in this - // iteration. - if (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || - !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), - previousSnapshotInfo)) { - return true; - } - try (UncheckedAutoCloseableSupplier rcLatestSnapshot = - omSnapshotManager.getSnapshot( - deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), - previousSnapshotInfo.getName())) { - - if (rcLatestSnapshot != null) { - String dbRenameKey = metadataManager - .getRenameKey(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), deletedDirInfo.getObjectID()); - Table prevDirTable = - rcLatestSnapshot.get().getMetadataManager().getDirectoryTable(); - Table prevDeletedDirTable = - rcLatestSnapshot.get().getMetadataManager().getDeletedDirTable(); - OmKeyInfo prevDeletedDirInfo = prevDeletedDirTable.get(key); - if (prevDeletedDirInfo != null) { - return true; + } + + @Override + public BackgroundTaskResult call() { + // Check if this is the Leader OM. If not leader, no need to execute this + // task. + if (shouldRun()) { + final long run = getRunCount().incrementAndGet(); + if (snapshotId == null) { + LOG.debug("Running DirectoryDeletingService for active object store, {}", run); + isRunningOnAOS.set(true); + } else { + LOG.debug("Running DirectoryDeletingService for snapshot : {}, {}", snapshotId, run); + } + OmSnapshotManager omSnapshotManager = getOzoneManager().getOmSnapshotManager(); + SnapshotInfo snapInfo = null; + try { + snapInfo = snapshotId == null ? null : + SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); + if (snapInfo != null) { + if (snapInfo.isDeepCleanedDeletedDir()) { + LOG.info("Snapshot {} has already been deep cleaned directory. Skipping the snapshot in this iteration.", + snapInfo.getSnapshotId()); + return BackgroundTaskResult.EmptyTaskResult.newResult(); + } + if (!OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), snapInfo)) { + LOG.info("Skipping snapshot processing since changes to snapshot {} have not been flushed to disk", + snapInfo); + return BackgroundTaskResult.EmptyTaskResult.newResult(); + } + } + try (UncheckedAutoCloseableSupplier omSnapshot = snapInfo == null ? null : + omSnapshotManager.getActiveSnapshot(snapInfo.getVolumeName(), snapInfo.getBucketName(), + snapInfo.getName())) { + KeyManager keyManager = snapInfo == null ? getOzoneManager().getKeyManager() + : omSnapshot.get().getKeyManager(); + processDeletedDirsForStore(snapInfo, keyManager, ratisByteLimit, run); + } + } catch (IOException | ExecutionException | InterruptedException e) { + LOG.error("Error while running delete files background task for store {}. Will retry at next run.", + snapInfo, e); + } finally { + if (snapshotId == null) { + isRunningOnAOS.set(false); + synchronized (directoryDeletingService) { + this.directoryDeletingService.notify(); + } } - String prevDirTableDBKey = metadataManager.getSnapshotRenamedTable() - .get(dbRenameKey); - // In OMKeyDeleteResponseWithFSO OzonePathKey is converted to - // OzoneDeletePathKey. Changing it back to check the previous DirTable - String prevDbKey = prevDirTableDBKey == null ? - metadataManager.getOzoneDeletePathDirKey(key) : prevDirTableDBKey; - OmDirectoryInfo prevDirInfo = prevDirTable.get(prevDbKey); - //Checking if the previous snapshot in the chain hasn't changed while checking if the deleted directory is - // present in the previous snapshot. If the chain has changed, the deleted directory could have been moved - // to the newly created snapshot. - SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), - deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); - return (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), - Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId))) || (prevDirInfo != null && - prevDirInfo.getObjectID() == deletedDirInfo.getObjectID()); } } - - return false; + // By design, no one cares about the results of this call back. + return BackgroundTaskResult.EmptyTaskResult.newResult(); } } - - public KeyValue getPendingDeletedDirInfo() - throws IOException { - return deletedDirSupplier.get(); - } - } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index d89726fd35ef..5e34c1ff741a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -122,7 +122,7 @@ public BackgroundTaskQueue getTasks() { try { iterator = snapshotChainManager.iterator(true); } catch (IOException e) { - LOG.error("Error while initializing snapshot chain iterator."); + LOG.error("Error while initializing snapshot chain iterator. DirDeletingTask will only process AOS this run."); return queue; } while (iterator.hasNext()) { @@ -204,7 +204,7 @@ private OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequ * @param keyManager KeyManager of the underlying store. */ private void processDeletedKeysForStore(SnapshotInfo currentSnapshotInfo, KeyManager keyManager, - int remainNum) throws IOException, InterruptedException { + int remainNum) throws IOException { String volume = null, bucket = null, snapshotTableKey = null; if (currentSnapshotInfo != null) { volume = currentSnapshotInfo.getVolumeName(); @@ -323,8 +323,8 @@ public BackgroundTaskResult call() { SnapshotUtils.getSnapshotInfo(getOzoneManager(), snapshotChainManager, snapshotId); if (snapInfo != null) { if (snapInfo.isDeepCleaned()) { - LOG.info("Snapshot {} has already been deep cleaned. Skipping the snapshot in this iteration.", - snapInfo.getSnapshotId()); + LOG.info("Snapshot '{}' ({}) has already been deep cleaned. Skipping the snapshot in this iteration.", + snapInfo.getTableKey(), snapInfo.getSnapshotId()); return EmptyTaskResult.newResult(); } if (!OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), snapInfo)) { @@ -345,7 +345,7 @@ public BackgroundTaskResult call() { : omSnapshot.get().getKeyManager(); processDeletedKeysForStore(snapInfo, keyManager, remainNum); } - } catch (IOException | InterruptedException e) { + } catch (IOException e) { LOG.error("Error while running delete files background task for store {}. Will retry at next run.", snapInfo, e); } finally { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 68d9306584ae..42e76377e14d 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -21,8 +21,8 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; -import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -140,7 +140,7 @@ class TestKeyDeletingService extends OzoneTestBase { private KeyManager keyManager; private OMMetadataManager metadataManager; private KeyDeletingService keyDeletingService; - private SnapshotDirectoryCleaningService snapshotDirectoryCleaningService; + private DirectoryDeletingService directoryDeletingService; private ScmBlockLocationTestingClient scmBlockTestingClient; @BeforeAll @@ -156,7 +156,7 @@ private void createConfig(File testDir) { 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); - conf.setTimeDuration(OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, + conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL, 1, TimeUnit.SECONDS); @@ -170,7 +170,7 @@ private void createSubject() throws Exception { OmTestManagers omTestManagers = new OmTestManagers(conf, scmBlockTestingClient, null); keyManager = omTestManagers.getKeyManager(); keyDeletingService = keyManager.getDeletingService(); - snapshotDirectoryCleaningService = keyManager.getSnapshotDirectoryService(); + directoryDeletingService = keyManager.getDirDeletingService(); writeClient = omTestManagers.getWriteClient(); om = omTestManagers.getOzoneManager(); metadataManager = omTestManagers.getMetadataManager(); @@ -524,6 +524,7 @@ void testSnapshotDeepClean() throws Exception { // Suspend KeyDeletingService keyDeletingService.suspend(); + directoryDeletingService.suspend(); final long initialSnapshotCount = metadataManager.countRowsInTable(snapshotInfoTable); final long initialKeyCount = metadataManager.countRowsInTable(keyTable); @@ -571,6 +572,7 @@ void testSnapshotDeepClean() throws Exception { checkSnapDeepCleanStatus(snapshotInfoTable, volumeName, false); keyDeletingService.resume(); + directoryDeletingService.resume(); try (UncheckedAutoCloseableSupplier rcOmSnapshot = om.getOmSnapshotManager().getSnapshot(volumeName, bucketName, snap3)) { @@ -640,6 +642,7 @@ void testSnapshotExclusiveSize() throws Exception { // Supspend KDS keyDeletingService.suspend(); + directoryDeletingService.suspend(); final long initialSnapshotCount = metadataManager.countRowsInTable(snapshotInfoTable); final long initialKeyCount = metadataManager.countRowsInTable(keyTable); @@ -711,10 +714,11 @@ void testSnapshotExclusiveSize() throws Exception { createAndCommitKey(testVolumeName, testBucketName, uniqueObjectName("key"), 3); long prevKdsRunCount = getRunCount(); - long prevSnapshotDirectorServiceCnt = snapshotDirectoryCleaningService.getRunCount().get(); + long prevSnapshotDirectorServiceCnt = directoryDeletingService.getRunCount().get(); + directoryDeletingService.resume(); // Let SnapshotDirectoryCleaningService to run for some iterations GenericTestUtils.waitFor( - () -> (snapshotDirectoryCleaningService.getRunCount().get() > prevSnapshotDirectorServiceCnt + 20), + () -> (directoryDeletingService.getRunCount().get() > prevSnapshotDirectorServiceCnt + 100), 100, 100000); keyDeletingService.resume(); @@ -779,7 +783,7 @@ void cleanup() { @Test @DisplayName("Should not update keys when purge request times out during key deletion") - public void testFailingModifiedKeyPurge() throws IOException, InterruptedException { + public void testFailingModifiedKeyPurge() throws IOException { try (MockedStatic mocked = mockStatic(OzoneManagerRatisUtils.class, CALLS_REAL_METHODS)) {