diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ae81dd2cc2ac..a20ae7391d97 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1561,6 +1561,15 @@ + + ozone.om.snapshot.compact.non.snapshot.diff.tables + false + OZONE, OM, PERFORMANCE + + Enable or disable compaction of tables that are not tracked by snapshot diff when their snapshots are evicted from cache. + + + ozone.om.snapshot.rocksdb.metrics.enabled false diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java index 0b61822fb519..e33ef15898ff 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.utils.db.cache.TableCache; import org.apache.hadoop.hdds.utils.db.cache.TableCache.CacheType; +import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions; import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer; /** @@ -98,6 +99,23 @@ TypedTable getTable( */ void compactDB() throws IOException; + /** + * Compact the specific table. + * + * @param tableName - Name of the table to compact. + * @throws IOException on Failure + */ + void compactTable(String tableName) throws IOException; + + /** + * Compact the specific table. + * + * @param tableName - Name of the table to compact. + * @param options - Options for the compact operation. + * @throws IOException on Failure + */ + void compactTable(String tableName, ManagedCompactRangeOptions options) throws IOException; + /** * Moves a key from the Source Table to the destination Table. * diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java index b0096730d01e..3539a906117d 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java @@ -222,6 +222,22 @@ public void compactDB() throws IOException { } } + @Override + public void compactTable(String tableName) throws IOException { + try (ManagedCompactRangeOptions options = new ManagedCompactRangeOptions()) { + compactTable(tableName, options); + } + } + + @Override + public void compactTable(String tableName, ManagedCompactRangeOptions options) throws IOException { + RocksDatabase.ColumnFamily columnFamily = db.getColumnFamily(tableName); + if (columnFamily == null) { + throw new IOException("Table not found: " + tableName); + } + db.compactRange(columnFamily, null, null, options); + } + @Override public void close() throws IOException { if (metrics != null) { diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java index 567c2a13c011..01be3a73f8e2 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java @@ -151,6 +151,23 @@ public void compactDB() throws Exception { } + @Test + public void compactTable() throws Exception { + assertNotNull(rdbStore, "DBStore cannot be null"); + + for (int j = 0; j <= 20; j++) { + insertRandomData(rdbStore, 0); + rdbStore.flushDB(); + } + + int metaSizeBeforeCompact = rdbStore.getDb().getLiveFilesMetaDataSize(); + rdbStore.compactTable(StringUtils.bytes2String(RocksDB.DEFAULT_COLUMN_FAMILY)); + int metaSizeAfterCompact = rdbStore.getDb().getLiveFilesMetaDataSize(); + + assertThat(metaSizeAfterCompact).isLessThan(metaSizeBeforeCompact); + assertThat(metaSizeAfterCompact).isEqualTo(1); + } + @Test public void close() throws Exception { assertNotNull(rdbStore, "DBStore cannot be null"); diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index d9b08494f013..174c1a887c51 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -637,6 +637,13 @@ public final class OMConfigKeys { public static final String OZONE_OM_COMPACTION_SERVICE_COLUMNFAMILIES_DEFAULT = "keyTable,fileTable,directoryTable,deletedTable,deletedDirectoryTable,multipartInfoTable"; + /** + * Configuration to enable/disable non-snapshot diff table compaction when snapshots are evicted from cache. + */ + public static final String OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES = + "ozone.om.snapshot.compact.non.snapshot.diff.tables"; + public static final boolean OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT = false; + /** * Never constructed. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index e6f893ee7210..262750e5c2f2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -30,6 +30,8 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_MAX_SIZE_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DB_MAX_OPEN_FILES; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DB_MAX_OPEN_FILES_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_SNAPSHOT_DIFF_CLEANUP_SERVICE_RUN_INTERVAL; @@ -289,8 +291,11 @@ public OmSnapshotManager(OzoneManager ozoneManager) { .getTimeDuration(OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL, OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + boolean compactNonSnapshotDiffTables = ozoneManager.getConfiguration() + .getBoolean(OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES, + OZONE_OM_SNAPSHOT_COMPACT_NON_SNAPSHOT_DIFF_TABLES_DEFAULT); this.snapshotCache = new SnapshotCache(loader, softCacheSize, ozoneManager.getMetrics(), - cacheCleanupServiceInterval); + cacheCleanupServiceInterval, compactNonSnapshotDiffTables); this.snapshotDiffManager = new SnapshotDiffManager(snapshotDiffDb, differ, ozoneManager, snapDiffJobCf, snapDiffReportCf, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index c6003af3c32b..4bdca5d04ffe 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.snapshot; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND; +import static org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.COLUMN_FAMILIES_TO_TRACK_IN_DAG; import com.google.common.annotations.VisibleForTesting; import com.google.common.cache.CacheLoader; @@ -28,6 +29,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.hdds.utils.Scheduler; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -57,16 +60,47 @@ public class SnapshotCache implements ReferenceCountedCallback, AutoCloseable { private final Scheduler scheduler; private static final String SNAPSHOT_CACHE_CLEANUP_SERVICE = "SnapshotCacheCleanupService"; + private final boolean compactNonSnapshotDiffTables; private final OMMetrics omMetrics; + private boolean shouldCompactTable(String tableName) { + return !COLUMN_FAMILIES_TO_TRACK_IN_DAG.contains(tableName); + } + + /** + * Compacts the RocksDB tables in the given snapshot that are not part of the snapshot diff DAG. + * This operation is performed outside of the main snapshot operations to avoid blocking reads. + * Only tables that are not tracked in the DAG (determined by {@link #shouldCompactTable}) will be compacted. + * + * @param snapshot The OmSnapshot instance whose tables need to be compacted + * @throws IOException if there is an error accessing the metadata manager + */ + private void compactSnapshotDB(OmSnapshot snapshot) throws IOException { + if (!compactNonSnapshotDiffTables) { + return; + } + OMMetadataManager metadataManager = snapshot.getMetadataManager(); + for (Table table : metadataManager.getStore().listTables()) { + if (shouldCompactTable(table.getName())) { + try { + metadataManager.getStore().compactTable(table.getName()); + } catch (IOException e) { + LOG.warn("Failed to compact table {} in snapshot {}: {}", + table.getName(), snapshot.getSnapshotID(), e.getMessage()); + } + } + } + } + public SnapshotCache(CacheLoader cacheLoader, int cacheSizeLimit, OMMetrics omMetrics, - long cleanupInterval) { + long cleanupInterval, boolean compactNonSnapshotDiffTables) { this.dbMap = new ConcurrentHashMap<>(); this.cacheLoader = cacheLoader; this.cacheSizeLimit = cacheSizeLimit; this.omMetrics = omMetrics; this.pendingEvictionQueue = ConcurrentHashMap.newKeySet(); + this.compactNonSnapshotDiffTables = compactNonSnapshotDiffTables; if (cleanupInterval > 0) { this.scheduler = new Scheduler(SNAPSHOT_CACHE_CLEANUP_SERVICE, true, 1); @@ -224,6 +258,16 @@ public void release(UUID key) { void cleanup() { if (dbMap.size() > cacheSizeLimit) { for (UUID evictionKey : pendingEvictionQueue) { + ReferenceCounted snapshot = dbMap.get(evictionKey); + if (snapshot != null && snapshot.getTotalRefCount() == 0) { + try { + compactSnapshotDB(snapshot.get()); + } catch (IOException e) { + LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", + evictionKey, e.getMessage()); + } + } + dbMap.compute(evictionKey, (k, v) -> { pendingEvictionQueue.remove(k); if (v == null) { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index 4a9bc0f8577e..195bcd105a92 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -18,18 +18,27 @@ package org.apache.hadoop.ozone.om.snapshot; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.fail; -import static org.mockito.Mockito.any; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.google.common.cache.CacheLoader; import java.io.IOException; +import java.util.ArrayList; import java.util.UUID; +import java.util.concurrent.Semaphore; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.ozone.test.GenericTestUtils; @@ -68,6 +77,23 @@ static void beforeAll() throws Exception { when(omSnapshot.getSnapshotTableKey()).thenReturn(snapshotID.toString()); when(omSnapshot.getSnapshotID()).thenReturn(snapshotID); + OMMetadataManager metadataManager = mock(OMMetadataManager.class); + org.apache.hadoop.hdds.utils.db.DBStore store = mock(org.apache.hadoop.hdds.utils.db.DBStore.class); + when(omSnapshot.getMetadataManager()).thenReturn(metadataManager); + when(metadataManager.getStore()).thenReturn(store); + + Table table1 = mock(Table.class); + Table table2 = mock(Table.class); + Table keyTable = mock(Table.class); + when(table1.getName()).thenReturn("table1"); + when(table2.getName()).thenReturn("table2"); + when(keyTable.getName()).thenReturn("keyTable"); // This is in COLUMN_FAMILIES_TO_TRACK_IN_DAG + ArrayList tables = new ArrayList(); + tables.add(table1); + tables.add(table2); + tables.add(keyTable); + when(store.listTables()).thenReturn(tables); + return omSnapshot; } ); @@ -80,7 +106,7 @@ static void beforeAll() throws Exception { void setUp() { // Reset cache for each test case omMetrics = OMMetrics.create(); - snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics, 50); + snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics, 50, true); } @AfterEach @@ -209,7 +235,7 @@ private void assertEntryExistence(UUID key, boolean shouldExist) { void testEviction1() throws IOException, InterruptedException, TimeoutException { final UUID dbKey1 = UUID.randomUUID(); - snapshotCache.get(dbKey1); + UncheckedAutoCloseableSupplier snapshot1 = snapshotCache.get(dbKey1); assertEquals(1, snapshotCache.size()); assertEquals(1, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey1); @@ -240,6 +266,13 @@ void testEviction1() throws IOException, InterruptedException, TimeoutException assertEquals(1, snapshotCache.size()); assertEquals(1, omMetrics.getNumSnapshotCacheSize()); assertEntryExistence(dbKey1, false); + + // Verify compaction was called on the tables + org.apache.hadoop.hdds.utils.db.DBStore store1 = snapshot1.get().getMetadataManager().getStore(); + verify(store1, times(1)).compactTable("table1"); + verify(store1, times(1)).compactTable("table2"); + // Verify compaction was NOT called on the reserved table + verify(store1, times(0)).compactTable("keyTable"); } @Test @@ -333,4 +366,54 @@ void testEviction3WithClose() throws IOException, InterruptedException, TimeoutE assertEquals(1, snapshotCache.size()); assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } + + @Test + @DisplayName("Snapshot operations not blocked during compaction") + void testSnapshotOperationsNotBlockedDuringCompaction() throws IOException, InterruptedException, TimeoutException { + omMetrics = OMMetrics.create(); + snapshotCache = new SnapshotCache(cacheLoader, 1, omMetrics, 50, true); + final UUID dbKey1 = UUID.randomUUID(); + UncheckedAutoCloseableSupplier snapshot1 = snapshotCache.get(dbKey1); + assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); + snapshotCache.release(dbKey1); + assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); + + // Simulate compaction blocking + final Semaphore compactionLock = new Semaphore(1); + final AtomicBoolean table1Compacting = new AtomicBoolean(false); + final AtomicBoolean table1CompactedFinish = new AtomicBoolean(false); + org.apache.hadoop.hdds.utils.db.DBStore store1 = snapshot1.get().getMetadataManager().getStore(); + doAnswer(invocation -> { + table1Compacting.set(true); + // Simulate compaction lock + compactionLock.acquire(); + table1CompactedFinish.set(true); + return null; + }).when(store1).compactTable("table1"); + compactionLock.acquire(); + + final UUID dbKey2 = UUID.randomUUID(); + snapshotCache.get(dbKey2); + assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); + snapshotCache.release(dbKey2); + assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); + + // wait for compaction to start + GenericTestUtils.waitFor(() -> table1Compacting.get(), 50, 3000); + + snapshotCache.get(dbKey1); // this should not be blocked + + // wait for compaction to finish + assertFalse(table1CompactedFinish.get()); + compactionLock.release(); + GenericTestUtils.waitFor(() -> table1CompactedFinish.get(), 50, 3000); + + verify(store1, times(1)).compactTable("table1"); + verify(store1, times(1)).compactTable("table2"); + verify(store1, times(0)).compactTable("keyTable"); + } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index 71ce98c3876c..57666d3665e2 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -370,7 +370,7 @@ public void init() throws RocksDBException, IOException, ExecutionException { omSnapshotManager = mock(OmSnapshotManager.class); when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); - SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics, 0); + SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics, 0, true); when(omSnapshotManager.getActiveSnapshot(anyString(), anyString(), anyString())) .thenAnswer(invocationOnMock -> {