apache · peterxcli · May 31, 2025 · Mar 23, 2025 · Apr 30, 2025 · May 11, 2025
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1561,6 +1561,15 @@
     </description>
   </property>
 
+  <property>
+    <name>ozone.om.snapshot.compact.non.snapshot.diff.tables</name>
+    <value>false</value>
+    <tag>OZONE, OM, PERFORMANCE</tag>
+    <description>
+      Enable or disable compaction of tables that are not tracked by snapshot diff when their snapshots are evicted from cache.
+    </description>
+  </property>
+
   <property>
     <name>ozone.om.snapshot.rocksdb.metrics.enabled</name>
     <value>false</value>

diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java
@@ -25,6 +25,7 @@
 import org.apache.hadoop.hdds.annotation.InterfaceStability;
 import org.apache.hadoop.hdds.utils.db.cache.TableCache;
 import org.apache.hadoop.hdds.utils.db.cache.TableCache.CacheType;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions;
 import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer;
 
 /**
@@ -98,6 +99,23 @@ <KEY, VALUE> TypedTable<KEY, VALUE> getTable(
    */
   void compactDB() throws IOException;
 
+  /**
+   * Compact the specific table.
+   *
+   * @param tableName - Name of the table to compact.
+   * @throws IOException on Failure
+   */
+  void compactTable(String tableName) throws IOException;
+
+  /**
+   * Compact the specific table.
+   *
+   * @param tableName - Name of the table to compact.
+   * @param options - Options for the compact operation.
+   * @throws IOException on Failure
+   */
+  void compactTable(String tableName, ManagedCompactRangeOptions options) throws IOException;
+
   /**
    * Moves a key from the Source Table to the destination Table.
    *

diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
@@ -222,6 +222,22 @@ public void compactDB() throws IOException {
     }
   }
 
+  @Override
+  public void compactTable(String tableName) throws IOException {
+    try (ManagedCompactRangeOptions options = new ManagedCompactRangeOptions()) {
+      compactTable(tableName, options);
+    }
+  }
+
+  @Override
+  public void compactTable(String tableName, ManagedCompactRangeOptions options) throws IOException {
+    RocksDatabase.ColumnFamily columnFamily = db.getColumnFamily(tableName);
+    if (columnFamily == null) {
+      throw new IOException("Table not found: " + tableName);
+    }
+    db.compactRange(columnFamily, null, null, options);
+  }
+
   @Override
   public void close() throws IOException {
     if (metrics != null) {

diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java
@@ -151,6 +151,23 @@ public void compactDB() throws Exception {
 
   }
 
+  @Test
+  public void compactTable() throws Exception {
+    assertNotNull(rdbStore, "DBStore cannot be null");
+
+    for (int j = 0; j <= 20; j++) {
+      insertRandomData(rdbStore, 0);
+      rdbStore.flushDB();
+    }
+
+    int metaSizeBeforeCompact = rdbStore.getDb().getLiveFilesMetaDataSize();
+    rdbStore.compactTable(StringUtils.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY));
+    int metaSizeAfterCompact = rdbStore.getDb().getLiveFilesMetaDataSize();
+
+    assertThat(metaSizeAfterCompact).isLessThan(metaSizeBeforeCompact);
+    assertThat(metaSizeAfterCompact).isEqualTo(1);
+  }
+
   @Test
   public void close() throws Exception {
     assertNotNull(rdbStore, "DBStore cannot be null");

diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -637,6 +637,13 @@ public final class OMConfigKeys {
   public static final String OZONE_OM_COMPACTION_SERVICE_COLUMNFAMILIES_DEFAULT =
       "keyTable,fileTable,directoryTable,deletedTable,deletedDirectoryTable,multipartInfoTable";
 
+  /**
+   * Configuration to enable/disable non-snapshot diff table compaction when snapshots are evicted from cache.
+   */
+  public static final String OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES = 
+      "ozone.om.snapshot.compact.non.snapshot.diff.tables";
+  public static final boolean OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT = false;
+
   /**
    * Never constructed.
    */

diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java
@@ -30,6 +30,8 @@
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL_DEFAULT;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE_DEFAULT;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES;
+import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DB_MAX_OPEN_FILES;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DB_MAX_OPEN_FILES_DEFAULT;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DIFF_CLEANUP_SERVICE_RUN_INTERVAL;
@@ -289,8 +291,11 @@ public OmSnapshotManager(OzoneManager ozoneManager) {
         .getTimeDuration(OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL,
             OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL_DEFAULT,
             TimeUnit.MILLISECONDS);
+    boolean compactNonSnapshotDiffTables = ozoneManager.getConfiguration()
+        .getBoolean(OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES,
+            OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT);
     this.snapshotCache = new SnapshotCache(loader, softCacheSize, ozoneManager.getMetrics(),
-        cacheCleanupServiceInterval);
+        cacheCleanupServiceInterval, compactNonSnapshotDiffTables);
 
     this.snapshotDiffManager = new SnapshotDiffManager(snapshotDiffDb, differ,
         ozoneManager, snapDiffJobCf, snapDiffReportCf,

diff --git a/...-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/...-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.ozone.om.snapshot;
 
 import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
+import static org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.COLUMN_FAMILIES_TO_TRACK_IN_DAG;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.cache.CacheLoader;
@@ -28,6 +29,8 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.hadoop.hdds.utils.Scheduler;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
 import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.OmSnapshot;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
@@ -57,16 +60,47 @@ public class SnapshotCache implements ReferenceCountedCallback, AutoCloseable {
   private final Scheduler scheduler;
   private static final String SNAPSHOT_CACHE_CLEANUP_SERVICE =
       "SnapshotCacheCleanupService";
+  private final boolean compactNonSnapshotDiffTables;
 
   private final OMMetrics omMetrics;
 
+  private boolean shouldCompactTable(String tableName) {
+    return !COLUMN_FAMILIES_TO_TRACK_IN_DAG.contains(tableName);
+  }
+
+  /**
+   * Compacts the RocksDB tables in the given snapshot that are not part of the snapshot diff DAG.
+   * This operation is performed outside of the main snapshot operations to avoid blocking reads.
+   * Only tables that are not tracked in the DAG (determined by {@link #shouldCompactTable}) will be compacted.
+   *
+   * @param snapshot The OmSnapshot instance whose tables need to be compacted
+   * @throws IOException if there is an error accessing the metadata manager
+   */
+  private void compactSnapshotDB(OmSnapshot snapshot) throws IOException {
+    if (!compactNonSnapshotDiffTables) {
+      return;
+    }
+    OMMetadataManager metadataManager = snapshot.getMetadataManager();
+    for (Table<?, ?> table : metadataManager.getStore().listTables()) {
+      if (shouldCompactTable(table.getName())) {
+        try {
+          metadataManager.getStore().compactTable(table.getName());
+        } catch (IOException e) {
+          LOG.warn("Failed to compact table {} in snapshot {}: {}", 
+              table.getName(), snapshot.getSnapshotID(), e.getMessage());
+        }
+      }
+    }
+  }
+
   public SnapshotCache(CacheLoader<UUID, OmSnapshot> cacheLoader, int cacheSizeLimit, OMMetrics omMetrics,
-                       long cleanupInterval) {
+                       long cleanupInterval, boolean compactNonSnapshotDiffTables) {
     this.dbMap = new ConcurrentHashMap<>();
     this.cacheLoader = cacheLoader;
     this.cacheSizeLimit = cacheSizeLimit;
     this.omMetrics = omMetrics;
     this.pendingEvictionQueue = ConcurrentHashMap.newKeySet();
+    this.compactNonSnapshotDiffTables = compactNonSnapshotDiffTables;
     if (cleanupInterval > 0) {
       this.scheduler = new Scheduler(SNAPSHOT_CACHE_CLEANUP_SERVICE,
           true, 1);
@@ -224,6 +258,16 @@ public void release(UUID key) {
   void cleanup() {
     if (dbMap.size() > cacheSizeLimit) {
       for (UUID evictionKey : pendingEvictionQueue) {
+        ReferenceCounted<OmSnapshot> snapshot = dbMap.get(evictionKey);
+        if (snapshot != null && snapshot.getTotalRefCount() == 0) {
+          try {
+            compactSnapshotDB(snapshot.get());
+          } catch (IOException e) {
+            LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", 
+                evictionKey, e.getMessage());
+          }
+        }
+
         dbMap.compute(evictionKey, (k, v) -> {
           pendingEvictionQueue.remove(k);
           if (v == null) {

diff --git a/...ne/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/...ne/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java
@@ -18,18 +18,27 @@
 package org.apache.hadoop.ozone.om.snapshot;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.fail;
-import static org.mockito.Mockito.any;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.common.cache.CacheLoader;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.UUID;
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
 import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.OmSnapshot;
 import org.apache.ozone.test.GenericTestUtils;
@@ -68,6 +77,23 @@ static void beforeAll() throws Exception {
           when(omSnapshot.getSnapshotTableKey()).thenReturn(snapshotID.toString());
           when(omSnapshot.getSnapshotID()).thenReturn(snapshotID);
 
+          OMMetadataManager metadataManager = mock(OMMetadataManager.class);
+          org.apache.hadoop.hdds.utils.db.DBStore store = mock(org.apache.hadoop.hdds.utils.db.DBStore.class);
+          when(omSnapshot.getMetadataManager()).thenReturn(metadataManager);
+          when(metadataManager.getStore()).thenReturn(store);
+
+          Table<?, ?> table1 = mock(Table.class);
+          Table<?, ?> table2 = mock(Table.class);
+          Table<?, ?> keyTable = mock(Table.class);
+          when(table1.getName()).thenReturn("table1");
+          when(table2.getName()).thenReturn("table2");
+          when(keyTable.getName()).thenReturn("keyTable"); // This is in COLUMN_FAMILIES_TO_TRACK_IN_DAG
+          ArrayList tables = new ArrayList();
+          tables.add(table1);
+          tables.add(table2);
+          tables.add(keyTable);
+          when(store.listTables()).thenReturn(tables);
+
           return omSnapshot;
         }
     );
@@ -80,7 +106,7 @@ static void beforeAll() throws Exception {
   void setUp() {
     // Reset cache for each test case
     omMetrics = OMMetrics.create();
-    snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics, 50);
+    snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics, 50, true);
   }
 
   @AfterEach
@@ -209,7 +235,7 @@ private void assertEntryExistence(UUID key, boolean shouldExist) {
   void testEviction1() throws IOException, InterruptedException, TimeoutException {
 
     final UUID dbKey1 = UUID.randomUUID();
-    snapshotCache.get(dbKey1);
+    UncheckedAutoCloseableSupplier<OmSnapshot> snapshot1 = snapshotCache.get(dbKey1);
     assertEquals(1, snapshotCache.size());
     assertEquals(1, omMetrics.getNumSnapshotCacheSize());
     snapshotCache.release(dbKey1);
@@ -240,6 +266,13 @@ void testEviction1() throws IOException, InterruptedException, TimeoutException
     assertEquals(1, snapshotCache.size());
     assertEquals(1, omMetrics.getNumSnapshotCacheSize());
     assertEntryExistence(dbKey1, false);
+
+    // Verify compaction was called on the tables
+    org.apache.hadoop.hdds.utils.db.DBStore store1 = snapshot1.get().getMetadataManager().getStore();
+    verify(store1, times(1)).compactTable("table1");
+    verify(store1, times(1)).compactTable("table2");
+    // Verify compaction was NOT called on the reserved table
+    verify(store1, times(0)).compactTable("keyTable");
   }
 
   @Test
@@ -333,4 +366,54 @@ void testEviction3WithClose() throws IOException, InterruptedException, TimeoutE
     assertEquals(1, snapshotCache.size());
     assertEquals(1, omMetrics.getNumSnapshotCacheSize());
   }
+
+  @Test
+  @DisplayName("Snapshot operations not blocked during compaction")
+  void testSnapshotOperationsNotBlockedDuringCompaction() throws IOException, InterruptedException, TimeoutException {
+    omMetrics = OMMetrics.create();
+    snapshotCache = new SnapshotCache(cacheLoader, 1, omMetrics, 50, true);
+    final UUID dbKey1 = UUID.randomUUID();
+    UncheckedAutoCloseableSupplier<OmSnapshot> snapshot1 = snapshotCache.get(dbKey1);
+    assertEquals(1, snapshotCache.size());
+    assertEquals(1, omMetrics.getNumSnapshotCacheSize());
+    snapshotCache.release(dbKey1);
+    assertEquals(1, snapshotCache.size());
+    assertEquals(1, omMetrics.getNumSnapshotCacheSize());
+
+    // Simulate compaction blocking
+    final Semaphore compactionLock = new Semaphore(1);
+    final AtomicBoolean table1Compacting = new AtomicBoolean(false);
+    final AtomicBoolean table1CompactedFinish = new AtomicBoolean(false);
+    org.apache.hadoop.hdds.utils.db.DBStore store1 = snapshot1.get().getMetadataManager().getStore();
+    doAnswer(invocation -> {
+      table1Compacting.set(true);
+      // Simulate compaction lock
+      compactionLock.acquire();
+      table1CompactedFinish.set(true);
+      return null;
+    }).when(store1).compactTable("table1");
+    compactionLock.acquire();
+
+    final UUID dbKey2 = UUID.randomUUID();
+    snapshotCache.get(dbKey2);
+    assertEquals(2, snapshotCache.size());
+    assertEquals(2, omMetrics.getNumSnapshotCacheSize());
+    snapshotCache.release(dbKey2);
+    assertEquals(2, snapshotCache.size());
+    assertEquals(2, omMetrics.getNumSnapshotCacheSize());
+
+    // wait for compaction to start
+    GenericTestUtils.waitFor(() -> table1Compacting.get(), 50, 3000);
+
+    snapshotCache.get(dbKey1); // this should not be blocked
+
+    // wait for compaction to finish
+    assertFalse(table1CompactedFinish.get());
+    compactionLock.release();
+    GenericTestUtils.waitFor(() -> table1CompactedFinish.get(), 50, 3000);
+
+    verify(store1, times(1)).compactTable("table1");
+    verify(store1, times(1)).compactTable("table2");
+    verify(store1, times(0)).compactTable("keyTable");
+  }
 }
diff --git a/...ne-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/...ne-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java
@@ -370,7 +370,7 @@ public void init() throws RocksDBException, IOException, ExecutionException {
 
     omSnapshotManager = mock(OmSnapshotManager.class);
     when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager);
-    SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics, 0);
+    SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics, 0, true);
 
     when(omSnapshotManager.getActiveSnapshot(anyString(), anyString(), anyString()))
         .thenAnswer(invocationOnMock -> {