diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 71c6bc1c9754..3780363a1b34 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.DBCheckpoint; import org.apache.hadoop.hdds.utils.db.RDBCheckpointUtils; +import org.apache.hadoop.hdds.utils.db.RDBStore; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; import org.apache.hadoop.ozone.client.BucketArgs; @@ -51,7 +52,6 @@ import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.tag.Unhealthy; import org.apache.ratis.server.protocol.TermIndex; import org.assertj.core.api.Fail; import org.junit.jupiter.api.AfterEach; @@ -60,8 +60,8 @@ import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -198,12 +198,12 @@ public void shutdown() { } } - @ParameterizedTest - @ValueSource(ints = {100}) // tried up to 1000 snapshots and this test works, but some of the // timeouts have to be increased. - @Unhealthy("HDDS-10059") - void testInstallSnapshot(int numSnapshotsToCreate, @TempDir Path tempDir) throws Exception { + private static final int SNAPSHOTS_TO_CREATE = 100; + + @Test + public void testInstallSnapshot(@TempDir Path tempDir) throws Exception { // Get the leader OM String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) @@ -231,8 +231,7 @@ void testInstallSnapshot(int numSnapshotsToCreate, @TempDir Path tempDir) throws String snapshotName = ""; List keys = new ArrayList<>(); SnapshotInfo snapshotInfo = null; - for (int snapshotCount = 0; snapshotCount < numSnapshotsToCreate; - snapshotCount++) { + for (int snapshotCount = 0; snapshotCount < SNAPSHOTS_TO_CREATE; snapshotCount++) { snapshotName = snapshotNamePrefix + snapshotCount; keys = writeKeys(keyIncrement); snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName); @@ -326,7 +325,7 @@ void testInstallSnapshot(int numSnapshotsToCreate, @TempDir Path tempDir) throws private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, String snapshotName, List keys, SnapshotInfo snapshotInfo) - throws IOException { + throws IOException, RocksDBException { // Read back data from snapshot. OmKeyArgs omKeyArgs = new OmKeyArgs.Builder() .setVolumeName(volumeName) @@ -347,10 +346,19 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Path leaderActiveDir = Paths.get(leaderMetaDir.toString(), OM_DB_NAME); Path leaderSnapshotDir = Paths.get(getSnapshotPath(leaderOM.getConfiguration(), snapshotInfo)); + + // Get list of live files on the leader. + RocksDB activeRocksDB = ((RDBStore) leaderOM.getMetadataManager().getStore()) + .getDb().getManagedRocksDb().get(); + // strip the leading "/". + Set liveSstFiles = activeRocksDB.getLiveFiles().files.stream() + .map(s -> s.substring(1)) + .collect(Collectors.toSet()); + // Get the list of hardlinks from the leader. Then confirm those links // are on the follower int hardLinkCount = 0; - try (Streamlist = Files.list(leaderSnapshotDir)) { + try (Stream list = Files.list(leaderSnapshotDir)) { for (Path leaderSnapshotSST: list.collect(Collectors.toList())) { String fileName = leaderSnapshotSST.getFileName().toString(); if (fileName.toLowerCase().endsWith(".sst")) { @@ -358,7 +366,8 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Path leaderActiveSST = Paths.get(leaderActiveDir.toString(), fileName); // Skip if not hard link on the leader - if (!leaderActiveSST.toFile().exists()) { + // First confirm it is live + if (!liveSstFiles.contains(fileName)) { continue; } // If it is a hard link on the leader, it should be a hard