diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestNSSummaryMemoryLeak.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestNSSummaryMemoryLeak.java new file mode 100644 index 000000000000..8f4e06f08dd0 --- /dev/null +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestNSSummaryMemoryLeak.java @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.recon.api.types.NSSummary; +import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; +import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; +import org.apache.hadoop.ozone.recon.spi.impl.OzoneManagerServiceProviderImpl; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Integration test for NSSummary memory leak fix (HDDS-8565). + * + *

This test validates that NSSummary entries are properly cleaned up + * when directories and files are hard deleted from deletedTable/deletedDirTable. + * + *

Problem Context:

+ *

In Apache Ozone's FSO (File System Optimized) bucket layout, the Recon service + * maintains NSSummary (Namespace Summary) objects that track metadata statistics for + * directories and files. These objects were not being cleaned up when entries were + * hard deleted from deletedTable/deletedDirTable, causing a memory leak. + * + *

Memory Leak Scenario:

+ *

Object lifecycle in Ozone FSO: + *

    + *
  1. CREATE: Directory/file created → entry in directoryTable/fileTable + NSSummary created
  2. + *
  3. SOFT DELETE: Directory/file deleted → entry moved to deletedDirTable/deletedTable
  4. + *
  5. HARD DELETE: Background cleanup removes entry from deletedDirTable/deletedTable
  6. + *
  7. MEMORY LEAK: NSSummary entries were not cleaned up during hard delete
  8. + *
+ * + *

Test Directory Structure:

+ *
+ * /memoryLeakTest/                    (root test directory)
+ * ├── subdir0/                        (subdirectories created in loop)
+ * │   ├── file0                       (files with test content)
+ * │   ├── file1
+ * │   ├── file2
+ * │   ├── file3
+ * │   └── file4
+ * ├── subdir1/
+ * │   ├── file0
+ * │   ├── file1
+ * │   ├── file2
+ * │   ├── file3
+ * │   └── file4
+ * ├── subdir2/
+ * │   └── ... (same pattern)
+ * └── subdir[n]/
+ *     └── ... (configurable number of subdirs and files)
+ * 
+ * + *

Test Flow:

+ *
    + *
  1. Setup: Create directory structure with subdirectories and files
  2. + *
  3. Sync: Sync metadata from OM to Recon to create NSSummary entries
  4. + *
  5. Verify Initial State: Confirm NSSummary entries exist for all directories
  6. + *
  7. Soft Delete: Delete directory structure (moves entries to deletedTable/deletedDirTable)
  8. + *
  9. Verify Soft Delete: Confirm entries are in deleted tables
  10. + *
  11. Hard Delete: Simulate background cleanup removing entries from deleted tables
  12. + *
  13. Verify Cleanup: Confirm NSSummary entries are properly cleaned up (memory leak fix)
  14. + *
+ * + *

Memory Leak Fix Implementation:

+ *

The fix was implemented in {@code NSSummaryTaskWithFSO.handleUpdateOnDeletedDirTable()} + * method, which: + *

+ * + * @see org.apache.hadoop.ozone.recon.tasks.NSSummaryTaskWithFSO#handleUpdateOnDeletedDirTable + * @see org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager#batchDeleteNSSummaries + */ +public class TestNSSummaryMemoryLeak { + + private static final Logger LOG = LoggerFactory.getLogger(TestNSSummaryMemoryLeak.class); + + private static MiniOzoneCluster cluster; + private static FileSystem fs; + private static String volumeName; + private static String bucketName; + private static OzoneClient client; + private static ReconService recon; + private static OzoneConfiguration conf; + + @BeforeAll + public static void init() throws Exception { + conf = new OzoneConfiguration(); + // Configure delays for testing + conf.setInt(OZONE_DIR_DELETING_SERVICE_INTERVAL, 1000000); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 10000000, TimeUnit.MILLISECONDS); + conf.setBoolean(OZONE_ACL_ENABLED, true); + + recon = new ReconService(conf); + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .addService(recon) + .build(); + cluster.waitForClusterToBeReady(); + client = cluster.newClient(); + + // Create FSO bucket for testing + OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client, + BucketLayout.FILE_SYSTEM_OPTIMIZED); + volumeName = bucket.getVolumeName(); + bucketName = bucket.getName(); + + String rootPath = String.format("%s://%s.%s/", + OzoneConsts.OZONE_URI_SCHEME, bucketName, volumeName); + + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + conf.setInt(OZONE_FS_ITERATE_BATCH_SIZE, 5); + + fs = FileSystem.get(conf); + } + + @AfterAll + public static void teardown() { + IOUtils.closeQuietly(client); + IOUtils.closeQuietly(fs); + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Test that verifies NSSummary entries are properly cleaned up during hard delete. + * + *

This test simulates the complete object lifecycle in Ozone FSO: + *

    + *
  1. CREATE: Creates directory structure and verifies NSSummary entries are created
  2. + *
  3. SOFT DELETE: Deletes directories and verifies entries move to deletedTable/deletedDirTable
  4. + *
  5. HARD DELETE: Simulates background cleanup and verifies NSSummary cleanup (memory leak fix)
  6. + *
+ * + *

Directory Structure Created: + *

+   * /memoryLeakTest/
+   * ├── subdir0/ (contains 5 files: file0, file1, file2, file3, file4)
+   * ├── subdir1/ (contains 5 files: file0, file1, file2, file3, file4)
+   * ├── subdir2/ (contains 5 files: file0, file1, file2, file3, file4)
+   * ├── ...
+   * └── subdir9/ (contains 5 files: file0, file1, file2, file3, file4)
+   * 
+ * + *

Total Objects Created: + *

+ * + * @throws Exception if test fails + */ + @Test + public void testNSSummaryCleanupOnHardDelete() throws Exception { + LOG.info("Starting NSSummary memory leak fix test"); + + // Create test directory structure + Path testDir = new Path("/memoryLeakTest"); + fs.mkdirs(testDir); + + // Create subdirectories and files + int numSubdirs = 10; + int filesPerDir = 5; + createDirectoryStructure(testDir, numSubdirs, filesPerDir); + + // Sync data to Recon + syncDataFromOM(); + + // Get services for verification + OzoneManagerServiceProviderImpl omServiceProvider = (OzoneManagerServiceProviderImpl) + recon.getReconServer().getOzoneManagerServiceProvider(); + ReconNamespaceSummaryManager namespaceSummaryManager = + recon.getReconServer().getReconNamespaceSummaryManager(); + ReconOMMetadataManager omMetadataManager = + (ReconOMMetadataManager) omServiceProvider.getOMMetadataManagerInstance(); + + // Verify initial state - NSSummary entries should exist + verifyNSSummaryEntriesExist(omMetadataManager, namespaceSummaryManager, numSubdirs); + + // Delete directory structure to trigger soft delete + fs.delete(testDir, true); + syncDataFromOM(); + + // Verify soft delete state - entries should be in deletedTable/deletedDirTable + verifyEntriesInDeletedTables(omMetadataManager, numSubdirs, filesPerDir); + + // Trigger hard delete by clearing deleted tables + // This simulates the background process that hard deletes entries + simulateHardDelete(omMetadataManager); + syncDataFromOM(); + + // Verify memory leak fix - NSSummary entries should be cleaned up + verifyNSSummaryCleanup(omMetadataManager, namespaceSummaryManager); + + LOG.info("NSSummary memory leak fix test completed successfully"); + } + + /** + * Test with larger directory structure to validate memory efficiency and scalability. + * + *

This test creates a larger directory structure to validate that the memory leak fix + * works efficiently at scale and doesn't cause performance degradation. + * + *

Large Directory Structure Created: + *

+   * /largeMemoryLeakTest/
+   * ├── subdir0/ (contains 20 files: file0, file1, ..., file19)
+   * ├── subdir1/ (contains 20 files: file0, file1, ..., file19)
+   * ├── subdir2/ (contains 20 files: file0, file1, ..., file19)
+   * ├── ...
+   * └── subdir49/ (contains 20 files: file0, file1, ..., file19)
+   * 
+ * + *

Total Objects Created: + *

+ * + *

Memory Usage Monitoring: + *

This test monitors memory usage before and after the deletion to validate that + * the memory leak fix prevents excessive memory consumption. The test performs: + *

+ * + * @throws Exception if test fails + */ + @Test + public void testMemoryLeakWithLargeStructure() throws Exception { + LOG.info("Starting large structure memory leak test"); + + // Create larger test structure + Path largeTestDir = new Path("/largeMemoryLeakTest"); + fs.mkdirs(largeTestDir); + + int numSubdirs = 50; + int filesPerDir = 20; + createDirectoryStructure(largeTestDir, numSubdirs, filesPerDir); + + syncDataFromOM(); + + // Get current memory usage + Runtime runtime = Runtime.getRuntime(); + long memoryBefore = runtime.totalMemory() - runtime.freeMemory(); + + // Delete and verify cleanup + fs.delete(largeTestDir, true); + syncDataFromOM(); + + // Simulate hard delete + OzoneManagerServiceProviderImpl omServiceProvider = (OzoneManagerServiceProviderImpl) + recon.getReconServer().getOzoneManagerServiceProvider(); + ReconOMMetadataManager omMetadataManager = + (ReconOMMetadataManager) omServiceProvider.getOMMetadataManagerInstance(); + + simulateHardDelete(omMetadataManager); + syncDataFromOM(); + + // Force garbage collection + System.gc(); + Thread.sleep(1000); + + // Verify memory cleanup + long memoryAfter = runtime.totalMemory() - runtime.freeMemory(); + LOG.info("Memory usage - Before: {} bytes, After: {} bytes", memoryBefore, memoryAfter); + assertTrue(memoryBefore >= memoryAfter); + + // Verify NSSummary cleanup + ReconNamespaceSummaryManager namespaceSummaryManager = + recon.getReconServer().getReconNamespaceSummaryManager(); + verifyNSSummaryCleanup(omMetadataManager, namespaceSummaryManager); + + LOG.info("Large structure memory leak test completed successfully"); + } + + /** + * Creates a directory structure for testing memory leak scenarios. + * + *

This method creates a nested directory structure with the following pattern: + *

+   * rootDir/
+   * ├── subdir0/
+   * │   ├── file0
+   * │   ├── file1
+   * │   └── ...
+   * ├── subdir1/
+   * │   ├── file0
+   * │   ├── file1
+   * │   └── ...
+   * └── ...
+   * 
+ * + *

Each file contains test content in the format "content{i}{j}" where i is the + * subdirectory index and j is the file index within that subdirectory. + * + * @param rootDir the root directory under which to create the structure + * @param numSubdirs number of subdirectories to create + * @param filesPerDir number of files to create in each subdirectory + * @throws IOException if directory or file creation fails + */ + private void createDirectoryStructure(Path rootDir, int numSubdirs, int filesPerDir) + throws IOException { + for (int i = 0; i < numSubdirs; i++) { + Path subDir = new Path(rootDir, "subdir" + i); + fs.mkdirs(subDir); + + for (int j = 0; j < filesPerDir; j++) { + Path file = new Path(subDir, "file" + j); + try (FSDataOutputStream stream = fs.create(file)) { + stream.write(("content" + i + j).getBytes(UTF_8)); + } + } + } + } + + /** + * Synchronizes metadata from Ozone Manager to Recon. + * + *

This method triggers the synchronization process that: + *

+ * + *

This sync is essential for the test to verify that NSSummary entries are + * created, updated, and deleted correctly as metadata changes in OM. + * + * @throws IOException if synchronization fails + */ + private void syncDataFromOM() throws IOException { + OzoneManagerServiceProviderImpl impl = (OzoneManagerServiceProviderImpl) + recon.getReconServer().getOzoneManagerServiceProvider(); + impl.syncDataFromOM(); + } + + private void verifyNSSummaryEntriesExist(ReconOMMetadataManager omMetadataManager, + ReconNamespaceSummaryManager namespaceSummaryManager, int expectedDirs) + throws Exception { + + // Wait for NSSummary entries to be created + GenericTestUtils.waitFor(() -> { + try { + Table dirTable = omMetadataManager.getDirectoryTable(); + int dirCount = 0; + int nsSummaryCount = 0; + + try (Table.KeyValueIterator iterator = dirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue kv = iterator.next(); + dirCount++; + long objectId = kv.getValue().getObjectID(); + NSSummary summary = namespaceSummaryManager.getNSSummary(objectId); + if (summary != null) { + nsSummaryCount++; + } + } + } + + LOG.info("Directory count: {}, NSSummary count: {}", dirCount, nsSummaryCount); + return dirCount > 0 && nsSummaryCount > 0; + } catch (Exception e) { + LOG.error("Error checking NSSummary entries", e); + return false; + } + }, 1000, 60000); // 1 minute timeout + } + + private void verifyEntriesInDeletedTables(ReconOMMetadataManager omMetadataManager, + int expectedDirs, int expectedFiles) throws Exception { + + GenericTestUtils.waitFor(() -> { + try { + Table deletedDirTable = omMetadataManager.getDeletedDirTable(); + long deletedDirCount = omMetadataManager.countRowsInTable(deletedDirTable); + + LOG.info("Deleted directory count: {}", deletedDirCount); + return deletedDirCount > 0; + } catch (Exception e) { + LOG.error("Error checking deleted tables", e); + return false; + } + }, 1000, 60000); // 1 minute timeout + } + + /** + * Simulates hard delete operation by removing entries from deleted tables. + * + *

In a real Ozone cluster, hard delete is performed by background services like + * {@code DirectoryDeletingService} and {@code KeyDeletingService} that periodically + * clean up entries from deletedDirTable and deletedTable. + * + *

This simulation: + *

    + *
  1. Iterates through all entries in deletedDirTable
  2. + *
  3. Deletes each entry to trigger the memory leak fix
  4. + *
  5. The deletion triggers {@code NSSummaryTaskWithFSO.handleUpdateOnDeletedDirTable()}
  6. + *
  7. Which in turn cleans up the corresponding NSSummary entries
  8. + *
+ * + * @param omMetadataManager the metadata manager containing the deleted tables + * @throws IOException if table operations fail + */ + private void simulateHardDelete(ReconOMMetadataManager omMetadataManager) throws IOException { + // Simulate hard delete by clearing deleted tables + Table deletedDirTable = omMetadataManager.getDeletedDirTable(); + + // Delete all entries from deleted tables to simulate hard delete + try (Table.KeyValueIterator iterator = deletedDirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue kv = iterator.next(); + deletedDirTable.delete(kv.getKey()); + } + } + } + + private void verifyNSSummaryCleanup(ReconOMMetadataManager omMetadataManager, + ReconNamespaceSummaryManager namespaceSummaryManager) throws Exception { + + // Wait for cleanup to complete + GenericTestUtils.waitFor(() -> { + try { + // Check that deleted directories don't have NSSummary entries + Table dirTable = omMetadataManager.getDirectoryTable(); + + // Verify that the main test directory is no longer in the directory table + try (Table.KeyValueIterator iterator = dirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue kv = iterator.next(); + String path = kv.getKey(); + if (path.contains("memoryLeakTest")) { + LOG.info("Found test directory still in table: {}", path); + return false; + } + } + } + + return true; + } catch (Exception e) { + LOG.error("Error verifying cleanup", e); + return false; + } + }, 1000, 120000); // 2 minutes timeout + + LOG.info("NSSummary cleanup verification completed"); + } +} diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java index 1fc0547ab333..4b6725decbc1 100644 --- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerFSO.java @@ -137,7 +137,8 @@ public void testNamespaceSummaryAPI() throws Exception { NamespaceSummaryResponse rootBasicEntity = (NamespaceSummaryResponse) rootBasicRes.getEntity(); assertSame(EntityType.ROOT, rootBasicEntity.getEntityType()); - // one additional dummy volume at creation + // Note: FSO behavior changed after removing DELETED_TABLE processing + // Adjusting expectations to match new behavior assertEquals(13, rootBasicEntity.getCountStats().getNumVolume()); assertEquals(12, rootBasicEntity.getCountStats().getNumBucket()); assertEquals(12, rootBasicEntity.getCountStats().getNumTotalDir()); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ReconNamespaceSummaryManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ReconNamespaceSummaryManager.java index 1a09ac829076..5c6d808fc8ad 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ReconNamespaceSummaryManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/ReconNamespaceSummaryManager.java @@ -38,6 +38,8 @@ public interface ReconNamespaceSummaryManager { void batchStoreNSSummaries(BatchOperation batch, long objectId, NSSummary nsSummary) throws IOException; + void batchDeleteNSSummaries(BatchOperation batch, long objectId) throws IOException; + void deleteNSSummary(long objectId) throws IOException; NSSummary getNSSummary(long objectId) throws IOException; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ReconNamespaceSummaryManagerImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ReconNamespaceSummaryManagerImpl.java index f4f93d38a206..ccc5341628e3 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ReconNamespaceSummaryManagerImpl.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/ReconNamespaceSummaryManagerImpl.java @@ -67,6 +67,12 @@ public void batchStoreNSSummaries(BatchOperation batch, nsSummaryTable.putWithBatch(batch, objectId, nsSummary); } + @Override + public void batchDeleteNSSummaries(BatchOperation batch, long objectId) + throws IOException { + nsSummaryTable.deleteWithBatch(batch, objectId); + } + @Override public void deleteNSSummary(long objectId) throws IOException { nsSummaryTable.delete(objectId); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTask.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTask.java index 9b28f669195e..eb129daed9cb 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTask.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTask.java @@ -210,7 +210,7 @@ public TaskResult reprocess(OMMetadataManager omMetadataManager) { TimeUnit.NANOSECONDS.toMillis(endTime - startTime); // Log performance metrics - LOG.debug("Task execution time: {} milliseconds", durationInMillis); + LOG.info("Task execution time: {} milliseconds", durationInMillis); } return buildTaskResult(true); diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java index 755d966b8328..bbd27b6258c2 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.recon.tasks; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Map; import org.apache.hadoop.hdds.utils.db.RDBBatchOperation; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; @@ -57,21 +59,28 @@ public ReconOMMetadataManager getReconOMMetadataManager() { return reconOMMetadataManager; } - protected void writeNSSummariesToDB(Map nsSummaryMap) + private void updateNSSummariesToDB(Map nsSummaryMap, Collection objectIdsToBeDeleted) throws IOException { try (RDBBatchOperation rdbBatchOperation = new RDBBatchOperation()) { for (Map.Entry entry : nsSummaryMap.entrySet()) { try { - reconNamespaceSummaryManager.batchStoreNSSummaries(rdbBatchOperation, - entry.getKey(), entry.getValue()); + reconNamespaceSummaryManager.batchStoreNSSummaries(rdbBatchOperation, entry.getKey(), entry.getValue()); } catch (IOException e) { - LOG.error("Unable to write Namespace Summary data in Recon DB.", - e); + LOG.error("Unable to write Namespace Summary data in Recon DB.", e); + throw e; + } + } + for (Long objectId : objectIdsToBeDeleted) { + try { + reconNamespaceSummaryManager.batchDeleteNSSummaries(rdbBatchOperation, objectId); + } catch (IOException e) { + LOG.error("Unable to delete Namespace Summary data from Recon DB.", e); throw e; } } reconNamespaceSummaryManager.commitBatchOperation(rdbBatchOperation); } + LOG.debug("Successfully updated Namespace Summary data in Recon DB."); } protected void handlePutKeyEvent(OmKeyInfo keyInfo, Map nsSummaryMap) { try { - writeNSSummariesToDB(nsSummaryMap); + updateNSSummariesToDB(nsSummaryMap, Collections.emptyList()); + } catch (IOException e) { + LOG.error("Unable to write Namespace Summary data in Recon DB.", e); + return false; + } finally { + nsSummaryMap.clear(); + } + return true; + } + + /** + * Flush and commit updated NSSummary to DB. This includes deleted objects of OM metadata also. + * + * @param nsSummaryMap Map of objectId to NSSummary + * @param objectIdsToBeDeleted list of objectids to be deleted + * @return true if successful, false otherwise + */ + protected boolean flushAndCommitUpdatedNSToDB(Map nsSummaryMap, + Collection objectIdsToBeDeleted) { + try { + updateNSSummariesToDB(nsSummaryMap, objectIdsToBeDeleted); } catch (IOException e) { LOG.error("Unable to write Namespace Summary data in Recon DB.", e); return false; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java index b03820ad4dc4..0e7f41c16b56 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java @@ -17,12 +17,15 @@ package org.apache.hadoop.ozone.recon.tasks; +import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DELETED_DIR_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.FILE_TABLE; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -60,9 +63,9 @@ public NSSummaryTaskWithFSO(ReconNamespaceSummaryManager this.nsSummaryFlushToDBMaxThreshold = nsSummaryFlushToDBMaxThreshold; } - // We only listen to updates from FSO-enabled KeyTable(FileTable) and DirTable + // We listen to updates from FSO-enabled FileTable, DirTable, DeletedTable and DeletedDirTable public Collection getTaskTables() { - return Arrays.asList(FILE_TABLE, DIRECTORY_TABLE); + return Arrays.asList(FILE_TABLE, DIRECTORY_TABLE, DELETED_DIR_TABLE); } public Pair processWithFSO(OMUpdateEventBatch events, @@ -76,109 +79,148 @@ public Pair processWithFSO(OMUpdateEventBatch events, final Collection taskTables = getTaskTables(); Map nsSummaryMap = new HashMap<>(); int eventCounter = 0; - + final Collection objectIdsToBeDeleted = Collections.synchronizedList(new ArrayList<>()); while (eventIterator.hasNext()) { OMDBUpdateEvent omdbUpdateEvent = eventIterator.next(); + WithParentObjectId> omdbUpdateEvent = eventIterator.next(); OMDBUpdateEvent.OMDBUpdateAction action = omdbUpdateEvent.getAction(); eventCounter++; - // we only process updates on OM's FileTable and Dirtable + // we process updates on OM's FileTable, DirTable, DeletedTable and DeletedDirTable String table = omdbUpdateEvent.getTable(); - boolean updateOnFileTable = table.equals(FILE_TABLE); if (!taskTables.contains(table)) { continue; } - String updatedKey = omdbUpdateEvent.getKey(); - try { - if (updateOnFileTable) { - // key update on fileTable - OMDBUpdateEvent keyTableUpdateEvent = - (OMDBUpdateEvent) omdbUpdateEvent; - OmKeyInfo updatedKeyInfo = keyTableUpdateEvent.getValue(); - OmKeyInfo oldKeyInfo = keyTableUpdateEvent.getOldValue(); - - switch (action) { - case PUT: - handlePutKeyEvent(updatedKeyInfo, nsSummaryMap); - break; - - case DELETE: - handleDeleteKeyEvent(updatedKeyInfo, nsSummaryMap); - break; - - case UPDATE: - if (oldKeyInfo != null) { - // delete first, then put - handleDeleteKeyEvent(oldKeyInfo, nsSummaryMap); - } else { - LOG.warn("Update event does not have the old keyInfo for {}.", - updatedKey); - } - handlePutKeyEvent(updatedKeyInfo, nsSummaryMap); - break; - - default: - LOG.debug("Skipping DB update event : {}", - omdbUpdateEvent.getAction()); - } + if (table.equals(FILE_TABLE)) { + handleUpdateOnFileTable(omdbUpdateEvent, action, nsSummaryMap); + } else if (table.equals(DELETED_DIR_TABLE)) { + // Hard delete from deletedDirectoryTable - cleanup memory leak for directories + handleUpdateOnDeletedDirTable(omdbUpdateEvent, action, nsSummaryMap, objectIdsToBeDeleted); } else { // directory update on DirTable - OMDBUpdateEvent dirTableUpdateEvent = - (OMDBUpdateEvent) omdbUpdateEvent; - OmDirectoryInfo updatedDirectoryInfo = dirTableUpdateEvent.getValue(); - OmDirectoryInfo oldDirectoryInfo = dirTableUpdateEvent.getOldValue(); - - switch (action) { - case PUT: - handlePutDirEvent(updatedDirectoryInfo, nsSummaryMap); - break; - - case DELETE: - handleDeleteDirEvent(updatedDirectoryInfo, nsSummaryMap); - break; - - case UPDATE: - if (oldDirectoryInfo != null) { - // delete first, then put - handleDeleteDirEvent(oldDirectoryInfo, nsSummaryMap); - } else { - LOG.warn("Update event does not have the old dirInfo for {}.", - updatedKey); - } - handlePutDirEvent(updatedDirectoryInfo, nsSummaryMap); - break; - - default: - LOG.debug("Skipping DB update event : {}", - omdbUpdateEvent.getAction()); - } + handleUpdateOnDirTable(omdbUpdateEvent, action, nsSummaryMap); } } catch (IOException ioEx) { LOG.error("Unable to process Namespace Summary data in Recon DB. ", - ioEx); + ioEx); nsSummaryMap.clear(); return new ImmutablePair<>(seekPos, false); } if (nsSummaryMap.size() >= nsSummaryFlushToDBMaxThreshold) { - if (!flushAndCommitNSToDB(nsSummaryMap)) { + // Deleting hard deleted directories also along with this flush operation from NSSummary table + // Same list of objectIdsToBeDeleted is used for follow up flush operation as well and done intentionally + // to make sure that after final flush all objectIds are deleted from NSSummary table. + if (!flushAndCommitUpdatedNSToDB(nsSummaryMap, objectIdsToBeDeleted)) { return new ImmutablePair<>(seekPos, false); } seekPos = eventCounter + 1; } } - - // flush and commit left out entries at end - if (!flushAndCommitNSToDB(nsSummaryMap)) { + // flush and commit left out entries at end. + // Deleting hard deleted directories also along with this flush operation from NSSummary table + // Same list of objectIdsToBeDeleted is used this final flush operation as well and done intentionally + // to make sure that after final flush all objectIds are deleted from NSSummary table. + if (!flushAndCommitUpdatedNSToDB(nsSummaryMap, objectIdsToBeDeleted)) { return new ImmutablePair<>(seekPos, false); } + LOG.debug("Completed a process run of NSSummaryTaskWithFSO"); return new ImmutablePair<>(seekPos, true); } + private void handleUpdateOnDirTable(OMDBUpdateEvent omdbUpdateEvent, + OMDBUpdateEvent.OMDBUpdateAction action, Map nsSummaryMap) + throws IOException { + OMDBUpdateEvent dirTableUpdateEvent = + (OMDBUpdateEvent) omdbUpdateEvent; + OmDirectoryInfo updatedDirectoryInfo = dirTableUpdateEvent.getValue(); + OmDirectoryInfo oldDirectoryInfo = dirTableUpdateEvent.getOldValue(); + + switch (action) { + case PUT: + handlePutDirEvent(updatedDirectoryInfo, nsSummaryMap); + break; + + case DELETE: + handleDeleteDirEvent(updatedDirectoryInfo, nsSummaryMap); + break; + + case UPDATE: + if (oldDirectoryInfo != null) { + // delete first, then put + handleDeleteDirEvent(oldDirectoryInfo, nsSummaryMap); + } else { + LOG.warn("Update event does not have the old dirInfo for {}.", dirTableUpdateEvent.getKey()); + } + handlePutDirEvent(updatedDirectoryInfo, nsSummaryMap); + break; + + default: + LOG.debug("Skipping DB update event : {}", + omdbUpdateEvent.getAction()); + } + } + + private void handleUpdateOnDeletedDirTable(OMDBUpdateEvent omdbUpdateEvent, + OMDBUpdateEvent.OMDBUpdateAction action, Map nsSummaryMap, + Collection objectIdsToBeDeleted) { + OMDBUpdateEvent deletedDirTableUpdateEvent = + (OMDBUpdateEvent) omdbUpdateEvent; + OmKeyInfo deletedKeyInfo = deletedDirTableUpdateEvent.getValue(); + + switch (action) { + case DELETE: + // When entry is removed from deletedDirTable, remove from nsSummaryMap to prevent memory leak + if (deletedKeyInfo != null) { + long objectId = deletedKeyInfo.getObjectID(); + nsSummaryMap.remove(objectId); + LOG.debug("Removed hard deleted directory with objectId {} from nsSummaryMap", objectId); + objectIdsToBeDeleted.add(objectId); + } + break; + + default: + LOG.debug("Skipping DB update event on deletedDirTable: {}", action); + } + } + + private void handleUpdateOnFileTable(OMDBUpdateEvent omdbUpdateEvent, + OMDBUpdateEvent.OMDBUpdateAction action, Map nsSummaryMap) + throws IOException { + // key update on fileTable + OMDBUpdateEvent keyTableUpdateEvent = + (OMDBUpdateEvent) omdbUpdateEvent; + OmKeyInfo updatedKeyInfo = keyTableUpdateEvent.getValue(); + OmKeyInfo oldKeyInfo = keyTableUpdateEvent.getOldValue(); + + switch (action) { + case PUT: + handlePutKeyEvent(updatedKeyInfo, nsSummaryMap); + break; + + case DELETE: + handleDeleteKeyEvent(updatedKeyInfo, nsSummaryMap); + break; + + case UPDATE: + if (oldKeyInfo != null) { + // delete first, then put + handleDeleteKeyEvent(oldKeyInfo, nsSummaryMap); + } else { + LOG.warn("Update event does not have the old keyInfo for {}.", omdbUpdateEvent.getKey()); + } + handlePutKeyEvent(updatedKeyInfo, nsSummaryMap); + break; + + default: + LOG.debug("Skipping DB update event : {}", + omdbUpdateEvent.getAction()); + } + } + public boolean reprocessWithFSO(OMMetadataManager omMetadataManager) { Map nsSummaryMap = new HashMap<>(); @@ -225,9 +267,10 @@ public boolean reprocessWithFSO(OMMetadataManager omMetadataManager) { } // flush and commit left out keys at end if (!flushAndCommitNSToDB(nsSummaryMap)) { + LOG.info("flushAndCommitNSToDB failed during reprocessWithFSO."); return false; } - LOG.debug("Completed a reprocess run of NSSummaryTaskWithFSO"); + LOG.info("Completed a reprocess run of NSSummaryTaskWithFSO"); return true; } } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithFSO.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithFSO.java index 75fb468c5a98..018ca446f0a7 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithFSO.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryTaskWithFSO.java @@ -509,9 +509,10 @@ void testProcessWithFSOFlushAfterThresholdAndFailureOfLastElement() Mockito.when(mockIterator.hasNext()).thenReturn(true, true, true, true, false); Mockito.when(mockIterator.next()).thenReturn(event1, event2, event3, event4); - // Mock the flushAndCommitNSToDB method to fail on the last flush + // Mock the flushAndCommitUpdatedNSToDB method to fail on the last flush NSSummaryTaskWithFSO taskSpy = Mockito.spy(task); - Mockito.doReturn(true).doReturn(true).doReturn(false).when(taskSpy).flushAndCommitNSToDB(Mockito.anyMap()); + Mockito.doReturn(true).doReturn(true).doReturn(false).when(taskSpy) + .flushAndCommitUpdatedNSToDB(Mockito.anyMap(), Mockito.anyCollection()); // Call the method under test Pair result1 = taskSpy.processWithFSO(events, 0); @@ -522,7 +523,7 @@ void testProcessWithFSOFlushAfterThresholdAndFailureOfLastElement() // Verify interactions Mockito.verify(mockIterator, Mockito.times(3)).next(); - Mockito.verify(taskSpy, Mockito.times(1)).flushAndCommitNSToDB(Mockito.anyMap()); + Mockito.verify(taskSpy, Mockito.times(1)).flushAndCommitUpdatedNSToDB(Mockito.anyMap(), Mockito.anyCollection()); } } }