From 5d138bd20c8a3bfae944ec73f7bb7b62538fb0bc Mon Sep 17 00:00:00 2001 From: George Jahad Date: Sun, 21 Jan 2024 22:20:14 -0800 Subject: [PATCH 1/7] broken --- .../org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 71c6bc1c9754..c8c15f0f1643 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -198,13 +198,13 @@ public void shutdown() { } } - @ParameterizedTest - @ValueSource(ints = {100}) + @Test + // tried up to 1000 snapshots and this test works, but some of the // timeouts have to be increased. - @Unhealthy("HDDS-10059") - void testInstallSnapshot(int numSnapshotsToCreate, @TempDir Path tempDir) throws Exception { + public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { // Get the leader OM + int numSnapshotsToCreate = 100; String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) .getCurrentProxyOMNodeId(); From d0b1937beac690e2815ab3d734c03d41ef3b40bb Mon Sep 17 00:00:00 2001 From: George Jahad Date: Sun, 21 Jan 2024 22:51:41 -0800 Subject: [PATCH 2/7] fix break --- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index c8c15f0f1643..eef6ce6b6466 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; import org.apache.hadoop.hdds.utils.db.RDBCheckpointUtils; +import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; import org.apache.hadoop.ozone.client.BucketArgs; @@ -62,6 +63,8 @@ import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -326,7 +329,7 @@ public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, String snapshotName, List keys, SnapshotInfo snapshotInfo) - throws IOException { + throws IOException, RocksDBException { // Read back data from snapshot. OmKeyArgs omKeyArgs = new OmKeyArgs.Builder() .setVolumeName(volumeName) @@ -349,6 +352,13 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo)); // Get the list of hardlinks from the leader. Then confirm those links // are on the follower + RocksDB activeRocksDB = ((RDBStore)leaderOM.getMetadataManager().getStore()).getDb().getManagedRocksDb() + .get(); + List liveSstFiles = new ArrayList<>(); + liveSstFiles.addAll(activeRocksDB.getLiveFiles().files.stream().map(s -> s.substring(1)).collect( + Collectors.toList())); + + int hardLinkCount = 0; try (Streamlist = Files.list(leaderSnapshotDir)) { for (Path leaderSnapshotSST: list.collect(Collectors.toList())) { @@ -358,7 +368,8 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Path leaderActiveSST = Paths.get(leaderActiveDir.toString(), fileName); // Skip if not hard link on the leader - if (!leaderActiveSST.toFile().exists()) { + // First confirm it is live + if (!liveSstFiles.stream().anyMatch(s -> s.equals(fileName))) { continue; } // If it is a hard link on the leader, it should be a hard From 55217861e3bd196333f11688929b8dfbbabc018b Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 Jan 2024 09:30:58 -0800 Subject: [PATCH 3/7] fixed --- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index eef6ce6b6466..852325c60ca8 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -52,7 +52,6 @@ import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.apache.ratis.server.protocol.TermIndex; import org.assertj.core.api.Fail; import org.junit.jupiter.api.AfterEach; @@ -61,8 +60,6 @@ import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.slf4j.Logger; @@ -202,12 +199,12 @@ public void shutdown() { } @Test - // tried up to 1000 snapshots and this test works, but some of the // timeouts have to be increased. + static int numSnapshotsToCreate = 100; + public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { // Get the leader OM - int numSnapshotsToCreate = 100; String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) .getCurrentProxyOMNodeId(); @@ -350,15 +347,17 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Path leaderActiveDir = Paths.get(leaderMetaDir.toString(), OM_DB_NAME); Path leaderSnapshotDir = Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo)); - // Get the list of hardlinks from the leader. Then confirm those links - // are on the follower + + // Get list of live files on the leader. RocksDB activeRocksDB = ((RDBStore)leaderOM.getMetadataManager().getStore()).getDb().getManagedRocksDb() .get(); List liveSstFiles = new ArrayList<>(); + // strip the leading "/". liveSstFiles.addAll(activeRocksDB.getLiveFiles().files.stream().map(s -> s.substring(1)).collect( Collectors.toList())); - + // Get the list of hardlinks from the leader. Then confirm those links + // are on the follower int hardLinkCount = 0; try (Streamlist = Files.list(leaderSnapshotDir)) { for (Path leaderSnapshotSST: list.collect(Collectors.toList())) { From 897830d73cf81bc89b93328bd33ba994d7925f7c Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 Jan 2024 10:17:00 -0800 Subject: [PATCH 4/7] fix checkstyle --- .../java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 852325c60ca8..37844aca6448 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -198,11 +198,11 @@ public void shutdown() { } } - @Test // tried up to 1000 snapshots and this test works, but some of the // timeouts have to be increased. - static int numSnapshotsToCreate = 100; + private static int numSnapshotsToCreate = 100; + @Test public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { // Get the leader OM String leaderOMNodeId = OmFailoverProxyUtil From df58ab3412829abd0a8bbe4354cea4bbe30805ff Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 Jan 2024 13:53:58 -0800 Subject: [PATCH 5/7] disabling compaction on loading trn # --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index 342a0400cbd6..a9be4947f99c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -316,6 +316,7 @@ public static DBStore loadDB(OzoneConfiguration configuration, File metaDir, DBStoreBuilder dbStoreBuilder = DBStoreBuilder.newBuilder(configuration, rocksDBConfiguration) .setName(dbName) + .disableDefaultCFAutoCompaction(true) .setPath(Paths.get(metaDir.getPath())); // Add column family names and codecs. for (DBColumnFamilyDefinition columnFamily : definition From 9c14aeff1b0b947fde6ad96c766740abd0b73fff Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 Jan 2024 16:31:12 -0800 Subject: [PATCH 6/7] remove compaction disable --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index a9be4947f99c..342a0400cbd6 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -316,7 +316,6 @@ public static DBStore loadDB(OzoneConfiguration configuration, File metaDir, DBStoreBuilder dbStoreBuilder = DBStoreBuilder.newBuilder(configuration, rocksDBConfiguration) .setName(dbName) - .disableDefaultCFAutoCompaction(true) .setPath(Paths.get(metaDir.getPath())); // Add column family names and codecs. for (DBColumnFamilyDefinition columnFamily : definition From 34ea379a2a82599ea431d8ccded06bf3ae15bbed Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 29 Jan 2024 17:58:26 +0100 Subject: [PATCH 7/7] address review comments: use contains(), use Set, convert to constant --- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 37844aca6448..3780363a1b34 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -200,7 +200,7 @@ public void shutdown() { // tried up to 1000 snapshots and this test works, but some of the // timeouts have to be increased. - private static int numSnapshotsToCreate = 100; + private static final int SNAPSHOTS_TO_CREATE = 100; @Test public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { @@ -231,8 +231,7 @@ public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { String snapshotName = ""; List keys = new ArrayList<>(); SnapshotInfo snapshotInfo = null; - for (int snapshotCount = 0; snapshotCount < numSnapshotsToCreate; - snapshotCount++) { + for (int snapshotCount = 0; snapshotCount < SNAPSHOTS_TO_CREATE; snapshotCount++) { snapshotName = snapshotNamePrefix + snapshotCount; keys = writeKeys(keyIncrement); snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName); @@ -349,17 +348,17 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo)); // Get list of live files on the leader. - RocksDB activeRocksDB = ((RDBStore)leaderOM.getMetadataManager().getStore()).getDb().getManagedRocksDb() - .get(); - List liveSstFiles = new ArrayList<>(); + RocksDB activeRocksDB = ((RDBStore) leaderOM.getMetadataManager().getStore()) + .getDb().getManagedRocksDb().get(); // strip the leading "/". - liveSstFiles.addAll(activeRocksDB.getLiveFiles().files.stream().map(s -> s.substring(1)).collect( - Collectors.toList())); + Set liveSstFiles = activeRocksDB.getLiveFiles().files.stream() + .map(s -> s.substring(1)) + .collect(Collectors.toSet()); // Get the list of hardlinks from the leader. Then confirm those links // are on the follower int hardLinkCount = 0; - try (Streamlist = Files.list(leaderSnapshotDir)) { + try (Stream list = Files.list(leaderSnapshotDir)) { for (Path leaderSnapshotSST: list.collect(Collectors.toList())) { String fileName = leaderSnapshotSST.getFileName().toString(); if (fileName.toLowerCase().endsWith(".sst")) { @@ -368,7 +367,7 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Paths.get(leaderActiveDir.toString(), fileName); // Skip if not hard link on the leader // First confirm it is live - if (!liveSstFiles.stream().anyMatch(s -> s.equals(fileName))) { + if (!liveSstFiles.contains(fileName)) { continue; } // If it is a hard link on the leader, it should be a hard