From 70ba72619166844a3da254763db397370e3f4ffb Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 May 2023 14:51:21 -0700 Subject: [PATCH 01/20] moved to lastest master --- .../org/apache/hadoop/hdds/utils/HAUtils.java | 24 +- .../hdds/utils/RDBSnapshotProvider.java | 16 +- .../hdds/utils/TestRDBSnapshotProvider.java | 19 ++ .../apache/hadoop/ozone/om/OMConfigKeys.java | 2 +- .../ozone/om/TestOMDbCheckpointServlet.java | 17 +- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 182 ++++++++++++-- .../ozone/om/OMDBCheckpointServlet.java | 133 +++++++--- .../apache/hadoop/ozone/om/OzoneManager.java | 15 +- .../ozone/om/snapshot/OmSnapshotUtils.java | 45 ++-- .../ozone/om/TestOmSnapshotManager.java | 231 +++++++++++++++--- .../om/snapshot/TestOmSnapshotUtils.java | 68 ++++++ 11 files changed, 606 insertions(+), 146 deletions(-) create mode 100644 hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index 4009cbec8c1b..ad2b44a8034f 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -66,6 +66,8 @@ import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CA_LIST_RETRY_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CA_LIST_RETRY_INTERVAL_DEFAULT; @@ -357,26 +359,26 @@ public static File getMetaDir(DBDefinition definition, } /** - * Scan the DB dir and return the existing SST files. + * Scan the DB dir and return the existing SST files, + * including omSnapshot sst files. * SSTs could be used for avoiding repeated download. * * @param db the file representing the DB to be scanned * @return the list of SST file name. If db not exist, will return empty list */ - public static List getExistingSstFiles(File db) { + public static List getExistingSstFiles(File db) throws IOException { List sstList = new ArrayList<>(); if (!db.exists()) { return sstList; } - FilenameFilter filter = new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.endsWith(ROCKSDB_SST_SUFFIX); - } - }; - String[] tempArray = db.list(filter); - if (tempArray != null) { - sstList = Arrays.asList(tempArray); + + int truncateLength = db.toString().length() + 1; + // Walk the db dir and get all sst files including omSnapshot files. + try(Stream files = Files.walk(db.toPath())) { + sstList = + files.filter(path -> path.toString().endsWith(ROCKSDB_SST_SUFFIX)). + map(p -> p.toString().substring(truncateLength)). + collect(Collectors.toList()); if (LOG.isDebugEnabled()) { LOG.debug("Scanned SST files {} in {}.", sstList, db.getAbsolutePath()); } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java index c934e7d2b999..3391ffc33599 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java @@ -60,6 +60,7 @@ public abstract class RDBSnapshotProvider implements Closeable { private final AtomicReference lastLeaderRef; private final AtomicLong numDownloaded; private FaultInjector injector; + private final AtomicLong initCount; public RDBSnapshotProvider(File snapshotDir, String dbName) { this.snapshotDir = snapshotDir; @@ -68,6 +69,7 @@ public RDBSnapshotProvider(File snapshotDir, String dbName) { this.injector = null; this.lastLeaderRef = new AtomicReference<>(null); this.numDownloaded = new AtomicLong(); + this.initCount = new AtomicLong(); init(); } @@ -91,6 +93,7 @@ public synchronized void init() { // reset leader info lastLeaderRef.set(null); + initCount.incrementAndGet(); } /** @@ -112,13 +115,14 @@ public DBCheckpoint downloadDBSnapshotFromLeader(String leaderNodeID) LOG.info("Successfully download the latest snapshot {} from leader OM: {}", targetFile, leaderNodeID); + numDownloaded.incrementAndGet(); + injectPause(); + RocksDBCheckpoint checkpoint = getCheckpointFromSnapshotFile(targetFile, candidateDir, true); LOG.info("Successfully untar the downloaded snapshot {} at {}.", targetFile, checkpoint.getCheckpointLocation()); - numDownloaded.incrementAndGet(); - injectPause(); return checkpoint; } @@ -131,7 +135,8 @@ public DBCheckpoint downloadDBSnapshotFromLeader(String leaderNodeID) * * @param currentLeader the ID of leader node */ - private void checkLeaderConsistent(String currentLeader) { + @VisibleForTesting + void checkLeaderConsistent(String currentLeader) throws IOException { String lastLeader = lastLeaderRef.get(); if (lastLeader != null) { if (!lastLeader.equals(currentLeader)) { @@ -230,4 +235,9 @@ public void setInjector(FaultInjector injector) { public long getNumDownloaded() { return numDownloaded.get(); } + + @VisibleForTesting + public long getInitCount() { + return initCount.get(); + } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index 54fb72645ac6..a757085fc8d6 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -46,6 +46,8 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -56,6 +58,7 @@ import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream; import static org.apache.hadoop.hdds.utils.db.TestRDBStore.newRDBStore; +import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -236,4 +239,20 @@ public void insertRandomData(RDBStore dbStore, int familyIndex) throw new IOException(e); } } + + @Test + public void testCheckLeaderConsistent() throws IOException { + assertTrue(rdbSnapshotProvider.getInitCount() == 1); + File dummyFile = new File(rdbSnapshotProvider.getCandidateDir(), + "file1.sst"); + Files.write(dummyFile.toPath(), + "dummyData".getBytes(StandardCharsets.UTF_8)); + rdbSnapshotProvider.checkLeaderConsistent("node1"); + assertTrue(rdbSnapshotProvider.getInitCount() == 2); + assertFalse(dummyFile.exists()); + rdbSnapshotProvider.checkLeaderConsistent("node1"); + assertTrue(rdbSnapshotProvider.getInitCount() == 2); + rdbSnapshotProvider.checkLeaderConsistent("node2"); + assertTrue(rdbSnapshotProvider.getInitCount() == 3); + } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 5c6a39d1984b..7204ef66b26a 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -233,7 +233,7 @@ private OMConfigKeys() { "ozone.om.snapshot.provider.request.timeout"; public static final TimeDuration OZONE_OM_SNAPSHOT_PROVIDER_REQUEST_TIMEOUT_DEFAULT = - TimeDuration.valueOf(5000, TimeUnit.MILLISECONDS); + TimeDuration.valueOf(300000, TimeUnit.MILLISECONDS); public static final String OZONE_OM_FS_SNAPSHOT_MAX_LIMIT = "ozone.om.fs.snapshot.max.limit"; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java index 498a446be9cc..e3ab2ef30697 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java @@ -121,6 +121,7 @@ public class TestOMDbCheckpointServlet { private String snapshotDirName2; private Path compactionDirPath; private DBCheckpoint dbCheckpoint; + private static final String FABRICATED_FILE_NAME = "fabricatedFile.sst"; @Rule public Timeout timeout = Timeout.seconds(240); @@ -361,7 +362,7 @@ public void testWriteDbDataToStream() throws Exception { for (String line : lines.collect(Collectors.toList())) { Assert.assertFalse("CURRENT file is not a hard link", line.contains("CURRENT")); - if (line.contains("fabricatedFile")) { + if (line.contains(FABRICATED_FILE_NAME)) { fabricatedLinkLines.add(line); } else { checkLine(shortSnapshotLocation, shortSnapshotLocation2, line); @@ -478,14 +479,14 @@ private void prepSnapshotData() throws Exception { new File(snapshotDirName).getParent(), "fabricatedSnapshot"); fabricatedSnapshot.toFile().mkdirs(); - Assert.assertTrue(Paths.get(fabricatedSnapshot.toString(), "fabricatedFile") + Assert.assertTrue(Paths.get(fabricatedSnapshot.toString(), FABRICATED_FILE_NAME) .toFile().createNewFile()); // Create fabricated links to snapshot dirs // to confirm that links are recognized even if // they are don't point to the checkpoint directory. - Path fabricatedFile = Paths.get(snapshotDirName, "fabricatedFile"); - Path fabricatedLink = Paths.get(snapshotDirName2, "fabricatedFile"); + Path fabricatedFile = Paths.get(snapshotDirName, FABRICATED_FILE_NAME); + Path fabricatedLink = Paths.get(snapshotDirName2, FABRICATED_FILE_NAME); Files.write(fabricatedFile, "fabricatedData".getBytes(StandardCharsets.UTF_8)); @@ -495,7 +496,7 @@ private void prepSnapshotData() throws Exception { compactionDirPath = Paths.get(metaDir.toString(), OM_SNAPSHOT_DIFF_DIR, DB_COMPACTION_SST_BACKUP_DIR); Path fabricatedLink2 = Paths.get(compactionDirPath.toString(), - "fabricatedFile"); + FABRICATED_FILE_NAME); Files.createLink(fabricatedLink2, fabricatedFile); Path currentFile = Paths.get(metaDir.toString(), OM_DB_NAME, "CURRENT"); @@ -565,7 +566,7 @@ private void checkFabricatedLines(Set directories, List lines, // find the real file String realDir = null; for (String dir: directories) { - if (Paths.get(testDirName, dir, "fabricatedFile").toFile().exists()) { + if (Paths.get(testDirName, dir, FABRICATED_FILE_NAME).toFile().exists()) { Assert.assertNull( "Exactly one copy of the fabricated file exists in the tarball", realDir); @@ -589,8 +590,8 @@ private void checkFabricatedLines(Set directories, List lines, Path path0 = Paths.get(files[0]); Path path1 = Paths.get(files[1]); Assert.assertTrue("fabricated entries contains correct file name", - path0.getFileName().toString().equals("fabricatedFile") && - path1.getFileName().toString().equals("fabricatedFile")); + path0.getFileName().toString().equals(FABRICATED_FILE_NAME) && + path1.getFileName().toString().equals(FABRICATED_FILE_NAME)); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 1850ba0b5a95..84b638100d96 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.om; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.ExitManager; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -55,6 +56,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -64,8 +66,10 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; @@ -76,9 +80,11 @@ import java.util.stream.Stream; import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; +import static org.apache.hadoop.ozone.om.OmSnapshotManager.OM_HARDLINK_FILE; import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPath; import static org.apache.hadoop.ozone.om.TestOzoneManagerHAWithData.createKey; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -166,15 +172,17 @@ public void shutdown() { } } + // @ParameterizedTest + // @ValueSource(ints = {1000}) @Test public void testInstallSnapshot() throws Exception { + int numSnapshotsToCreate = 100; // Get the leader OM String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) .getCurrentProxyOMNodeId(); OzoneManager leaderOM = cluster.getOzoneManager(leaderOMNodeId); - OzoneManagerRatisServer leaderRatisServer = leaderOM.getOmRatisServer(); // Find the inactive OM String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId(); @@ -184,9 +192,17 @@ public void testInstallSnapshot() throws Exception { OzoneManager followerOM = cluster.getOzoneManager(followerNodeId); // Do some transactions so that the log index increases - List keys = writeKeysToIncreaseLogIndex(leaderRatisServer, 200); + int keyIncrement = 10; + String snapshotNamePrefix = "snapshot"; + String snapshotName = ""; + List keys = new ArrayList<>(); + SnapshotInfo snapshotInfo = null; + for (int snapshotCount = 0; snapshotCount < numSnapshotsToCreate; snapshotCount++) { + snapshotName = snapshotNamePrefix + snapshotCount; + keys = writeKeys(keyIncrement); + snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName); + } - SnapshotInfo snapshotInfo = createOzoneSnapshot(leaderOM); // Get the latest db checkpoint from the leader OM. TransactionInfo transactionInfo = @@ -204,11 +220,11 @@ public void testInstallSnapshot() throws Exception { // The recently started OM should be lagging behind the leader OM. // Wait & for follower to update transactions to leader snapshot index. - // Timeout error if follower does not load update within 3s + // Timeout error if follower does not load update within 10s GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 100, 3000); + }, 100, 600000); long followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex(); @@ -231,12 +247,12 @@ public void testInstallSnapshot() throws Exception { // Verify that the follower OM's DB contains the transactions which were // made while it was inactive. OMMetadataManager followerOMMetaMngr = followerOM.getMetadataManager(); - Assert.assertNotNull(followerOMMetaMngr.getVolumeTable().get( + assertNotNull(followerOMMetaMngr.getVolumeTable().get( followerOMMetaMngr.getVolumeKey(volumeName))); - Assert.assertNotNull(followerOMMetaMngr.getBucketTable().get( + assertNotNull(followerOMMetaMngr.getBucketTable().get( followerOMMetaMngr.getBucketKey(volumeName, bucketName))); for (String key : keys) { - Assert.assertNotNull(followerOMMetaMngr.getKeyTable( + assertNotNull(followerOMMetaMngr.getKeyTable( TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } @@ -259,11 +275,17 @@ public void testInstallSnapshot() throws Exception { volumeName, bucketName, newKeys.get(0)))); */ - // Read back data from the OM snapshot. + checkSnapshot(leaderOM, followerOM, snapshotName, keys, snapshotInfo); + } + + private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, String snapshotName, + List keys, SnapshotInfo snapshotInfo) + throws IOException { + // Read back data from snapshot. OmKeyArgs omKeyArgs = new OmKeyArgs.Builder() .setVolumeName(volumeName) .setBucketName(bucketName) - .setKeyName(".snapshot/snap1/" + keys.get(0)).build(); + .setKeyName(".snapshot/" + snapshotName + "/" + keys.get(keys.size() - 1)).build(); OmKeyInfo omKeyInfo; omKeyInfo = followerOM.lookupKey(omKeyArgs); Assertions.assertNotNull(omKeyInfo); @@ -313,8 +335,10 @@ public void testInstallSnapshot() throws Exception { } @Test + //gbjfix @Timeout(300) - public void testInstallIncrementalSnapshot() throws Exception { + public void testInstallIncrementalSnapshot(@TempDir Path tempDir) + throws Exception { // Get the leader OM String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) @@ -338,20 +362,21 @@ public void testInstallIncrementalSnapshot() throws Exception { List firstKeys = writeKeysToIncreaseLogIndex(leaderRatisServer, 80); + SnapshotInfo snapshotInfo2 = createOzoneSnapshot(leaderOM, "snap80"); + // Start the inactive OM. Checkpoint installation will happen spontaneously. cluster.startInactiveOM(followerNodeId); // Wait the follower download the snapshot,but get stuck by injector + //gbjfix GenericTestUtils.waitFor(() -> { return followerOM.getOmSnapshotProvider().getNumDownloaded() == 1; }, 1000, 10000); - // Do some transactions, let leader OM take a new snapshot and purge the - // old logs, so that follower must download the new snapshot again. - List secondKeys = writeKeysToIncreaseLogIndex(leaderRatisServer, - 160); - - // Resume the follower thread, it would download the incremental snapshot. + TestResults firstIncrement = getNextIncrementalTarball(160, 2, leaderOM, + leaderRatisServer, faultInjector, followerOM, tempDir); + TestResults secondIncrement = getNextIncrementalTarball(240, 3, leaderOM, + leaderRatisServer, faultInjector, followerOM, tempDir); faultInjector.resume(); // Get the latest db checkpoint from the leader OM. @@ -361,16 +386,16 @@ public void testInstallIncrementalSnapshot() throws Exception { TermIndex.valueOf(transactionInfo.getTerm(), transactionInfo.getTransactionIndex()); long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex(); - + //gbjfix // The recently started OM should be lagging behind the leader OM. // Wait & for follower to update transactions to leader snapshot index. // Timeout error if follower does not load update within 10s GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 1000, 10000); + }, 1000, 30000); - assertEquals(2, followerOM.getOmSnapshotProvider().getNumDownloaded()); + assertEquals(3, followerOM.getOmSnapshotProvider().getNumDownloaded()); // Verify that the follower OM's DB contains the transactions which were // made while it was inactive. @@ -384,7 +409,12 @@ public void testInstallIncrementalSnapshot() throws Exception { assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } - for (String key : secondKeys) { + for (String key : firstIncrement.keys) { + assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) + .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); + } + + for (String key : secondIncrement.keys) { assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } @@ -394,7 +424,7 @@ public void testInstallIncrementalSnapshot() throws Exception { getDBCheckpointMetrics(); Assertions.assertTrue( dbMetrics.getLastCheckpointStreamingNumSSTExcluded() > 0); - assertEquals(1, dbMetrics.getNumIncrementalCheckpoints()); + assertEquals(2, dbMetrics.getNumIncrementalCheckpoints()); // Verify RPC server is running GenericTestUtils.waitFor(() -> { @@ -413,6 +443,89 @@ public void testInstallIncrementalSnapshot() throws Exception { getCandidateDir().list(); assertNotNull(filesInCandidate); assertEquals(0, filesInCandidate.length); + + checkSnapshot(leaderOM, followerOM, "snap80", firstKeys, snapshotInfo2); + checkSnapshot(leaderOM, followerOM, "snap160", firstIncrement.keys, firstIncrement.snapshotInfo); + checkSnapshot(leaderOM, followerOM, "snap240", secondIncrement.keys, secondIncrement.snapshotInfo); + Assertions.assertEquals(followerOM.getOmSnapshotProvider().getInitCount(), 2, + "Only initialized twice"); + } + + private static class TestResults { + SnapshotInfo snapshotInfo; + List keys; + } + + private TestResults getNextIncrementalTarball(int numKeys, + int expectedNumDownloads, + OzoneManager leaderOM, + OzoneManagerRatisServer leaderRatisServer, + FaultInjector faultInjector, + OzoneManager followerOM, + Path tempDir) + throws IOException, InterruptedException, TimeoutException { + TestResults tr = new TestResults(); + + // Get the latest db checkpoint from the leader OM. + TransactionInfo transactionInfo = + TransactionInfo.readTransactionInfo(leaderOM.getMetadataManager()); + TermIndex leaderOMTermIndex = + TermIndex.valueOf(transactionInfo.getTerm(), + transactionInfo.getTransactionIndex()); + long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex(); + // Do some transactions, let leader OM take a new snapshot and purge the + // old logs, so that follower must download the new increment. + tr.keys = writeKeysToIncreaseLogIndex(leaderRatisServer, + numKeys); + + tr.snapshotInfo = createOzoneSnapshot(leaderOM, "snap" + numKeys); + // Resume the follower thread, it would download the incremental snapshot. + faultInjector.resume(); + + // Pause the follower thread again to block the next install + faultInjector.reset(); + + //gbjfix + // Wait the follower download the incremental snapshot, but get stuck + // by injector + GenericTestUtils.waitFor(() -> + followerOM.getOmSnapshotProvider().getNumDownloaded() == + expectedNumDownloads, 1000, 10000); + + assertTrue(followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() + >= leaderOMSnapshotIndex - 1); + Path increment = Paths.get(tempDir.toString(), "increment" + numKeys); + increment.toFile().mkdirs(); + unTarLatestTarBall(followerOM, increment); + List sstFiles = HAUtils.getExistingSstFiles(increment.toFile()); + Path followerCandidatePath = followerOM.getOmSnapshotProvider(). + getCandidateDir().toPath(); + + // Confirm that none of the files in the tarball match one in the candidate dir. + assertTrue(sstFiles.size() > 0); + for (String s: sstFiles) { + File sstFile = Paths.get(followerCandidatePath.toString(), s).toFile(); + assertFalse(sstFile.exists(), + sstFile + " should not duplicate existing files"); + } + + // Confirm that none of the links in the tarballs hardLinkFile + // match the existing files + // gbjnote, is the path right here? + Path hardLinkFile = Paths.get(increment.toString(), OM_HARDLINK_FILE); + try (Stream lines = Files.lines(hardLinkFile)) { + int lineCount = 0; + for (String line: lines.collect(Collectors.toList())) { + lineCount++; + String link = line.split("\t")[0]; + File linkFile = Paths.get(followerCandidatePath.toString(), link).toFile(); + assertFalse(linkFile.exists(), + "Incremental checkpoint should not " + + "duplicate existing links"); + } + assertTrue(lineCount > 0); + } + return tr; } @Test @@ -457,7 +570,7 @@ public void testInstallIncrementalSnapshotWithFailure() throws Exception { // Resume the follower thread, it would download the incremental snapshot. faultInjector.resume(); - // Pause the follower thread again to block the second-time install + // Pause the follower thread again to block the tarball install faultInjector.reset(); // Wait the follower download the incremental snapshot, but get stuck @@ -845,13 +958,13 @@ public void testInstallCorruptedCheckpointFailure() throws Exception { assertLogCapture(logCapture, msg); } - private SnapshotInfo createOzoneSnapshot(OzoneManager leaderOM) + private SnapshotInfo createOzoneSnapshot(OzoneManager leaderOM, String name) throws IOException { - objectStore.createSnapshot(volumeName, bucketName, "snap1"); + objectStore.createSnapshot(volumeName, bucketName, name); String tableKey = SnapshotInfo.getTableKey(volumeName, bucketName, - "snap1"); + name); SnapshotInfo snapshotInfo = leaderOM.getMetadataManager() .getSnapshotInfoTable() .get(tableKey); @@ -916,6 +1029,17 @@ private void assertLogCapture(GenericTestUtils.LogCapturer logCapture, }, 100, 5000); } + // Returns temp dir where tarball was untarred. + private void unTarLatestTarBall(OzoneManager followerOm, Path tempDir) + throws IOException { + File snapshotDir = followerOm.getOmSnapshotProvider().getSnapshotDir(); + // Find the latest snapshot. + String tarBall = Arrays.stream(Objects.requireNonNull(snapshotDir.list())). + filter(s -> s.toLowerCase().endsWith(".tar")). + reduce("", (s1, s2) -> s1.compareToIgnoreCase(s2) > 0 ? s1 : s2); + FileUtil.unTar(new File(snapshotDir, tarBall), tempDir.toFile()); + } + private static class DummyExitManager extends ExitManager { @Override public void exitSystem(int status, String message, Throwable throwable, @@ -950,6 +1074,12 @@ public void pause() throws IOException { @Override public void resume() throws IOException { + try { + ready.await(); + } catch (InterruptedException e) { + e.printStackTrace(); + Fail.fail("resume interrupted"); + } wait.countDown(); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index bf66528ffb5a..9542f9aca3be 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.compress.archivers.ArchiveOutputStream; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -31,7 +32,6 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.lock.BootstrapStateHandler; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; -import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,6 +60,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA; +import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX; import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.createHardLinkList; import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPath; import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.truncateFileName; @@ -129,8 +130,8 @@ public void writeDbDataToStream(DBCheckpoint checkpoint, Objects.requireNonNull(toExcludeList); Objects.requireNonNull(excludedList); - // Map of inodes to path. - Map copyFiles = new HashMap<>(); + // Files to be added to tarball + Set copyFiles = new HashSet<>(); // Map of link to path. Map hardLinkFiles = new HashMap<>(); @@ -141,32 +142,47 @@ public void writeDbDataToStream(DBCheckpoint checkpoint, .setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); archiveOutputStream .setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); - getFilesForArchive(checkpoint, copyFiles, hardLinkFiles, - includeSnapshotData(request)); - - // Exclude file - Map finalCopyFiles = new HashMap<>(); - copyFiles.forEach((o, path) -> { - String fName = path.getFileName().toString(); - if (!toExcludeList.contains(fName)) { - finalCopyFiles.put(o, path); - } else { - excludedList.add(fName); - } - }); - writeFilesToArchive(finalCopyFiles, hardLinkFiles, archiveOutputStream); + // Files to be excluded from tarball + Set toExcludeFiles = normalizeExcludeList(toExcludeList, + checkpoint.getCheckpointLocation().toString(), + ServerUtils.getOzoneMetaDirPath(getConf()).toString()); + getFilesForArchive(checkpoint, copyFiles, hardLinkFiles, toExcludeFiles, + includeSnapshotData(request), excludedList); + writeFilesToArchive(copyFiles, hardLinkFiles, archiveOutputStream); + } catch (Exception e) { + LOG.error("got exception writing to archive " + e); + throw e; } } + // format list from follower to match data on leader + @VisibleForTesting + public static Set normalizeExcludeList(List toExcludeList, + String checkpointLocation, String metaDirPath) { + Set paths = new HashSet<>(); + for (String s: toExcludeList) { + if (!s.startsWith(OM_SNAPSHOT_DIR)) { + Path fixedPath = Paths.get(checkpointLocation, s); + paths.add(fixedPath); + } else { + paths.add(Paths.get(metaDirPath, s)); + } + } + return paths; + } + private void getFilesForArchive(DBCheckpoint checkpoint, - Map copyFiles, + Set copyFiles, Map hardLinkFiles, - boolean includeSnapshotData) + Set toExcludeFiles, + boolean includeSnapshotData, + List excluded) throws IOException { // Get the active fs files. Path dir = checkpoint.getCheckpointLocation(); - processDir(dir, copyFiles, hardLinkFiles, new HashSet<>()); + processDir(dir, copyFiles, hardLinkFiles, toExcludeFiles, new HashSet<>(), excluded); + if (!includeSnapshotData) { return; @@ -176,7 +192,7 @@ private void getFilesForArchive(DBCheckpoint checkpoint, Set snapshotPaths = waitForSnapshotDirs(checkpoint); Path snapshotDir = Paths.get(OMStorage.getOmDbDir(getConf()).toString(), OM_SNAPSHOT_DIR); - processDir(snapshotDir, copyFiles, hardLinkFiles, snapshotPaths); + processDir(snapshotDir, copyFiles, hardLinkFiles, toExcludeFiles, snapshotPaths, excluded); } /** @@ -219,9 +235,11 @@ private void waitForDirToExist(Path dir) throws IOException { } } - private void processDir(Path dir, Map copyFiles, + private void processDir(Path dir, Set copyFiles, Map hardLinkFiles, - Set snapshotPaths) + Set toExcludeFiles, + Set snapshotPaths, + List excluded) throws IOException { try (Stream files = Files.list(dir)) { for (Path file : files.collect(Collectors.toList())) { @@ -234,24 +252,65 @@ private void processDir(Path dir, Map copyFiles, LOG.debug("Skipping unneeded file: " + file); continue; } - processDir(file, copyFiles, hardLinkFiles, snapshotPaths); + processDir(file, copyFiles, hardLinkFiles, toExcludeFiles, + snapshotPaths, excluded); } else { - processFile(file, copyFiles, hardLinkFiles); + processFile(file, copyFiles, hardLinkFiles, toExcludeFiles, excluded); } } } } - private void processFile(Path file, Map copyFiles, - Map hardLinkFiles) throws IOException { - // Get the inode. - Object key = OmSnapshotUtils.getINode(file); - // If we already have the inode, store as hard link. - if (copyFiles.containsKey(key)) { - hardLinkFiles.put(file, copyFiles.get(key)); + /** + * Takes a db file and determines whether it should be included in + * the tarball, or added as a link, or excluded altogether. + * Uses the toExcludeFiles list to know what already + * exists on the follower. + * @param file The db file to be processed. + * @param copyFiles The db files to be added to tarball. + * @param hardLinkFiles The db files to be added as hard links. + * @param toExcludeFiles The db files to be excluded from tarball. + * @param excluded The list of db files that actually were excluded. + */ + @VisibleForTesting + public static void processFile(Path file, Set copyFiles, + Map hardLinkFiles, + Set toExcludeFiles, + List excluded) { + if (toExcludeFiles.contains(file)) { + excluded.add(file.toString()); } else { - copyFiles.put(key, file); + String fileName = file.getFileName().toString(); + if (fileName.endsWith(ROCKSDB_SST_SUFFIX)) { + // If same as existing excluded file, add a link for it. + Path linkPath = findLinkPath(toExcludeFiles, fileName); + if (linkPath != null) { + hardLinkFiles.put(file, linkPath); + } else { + // If already in tarball add a link for it. + linkPath = findLinkPath(copyFiles, fileName); + if (linkPath != null) { + hardLinkFiles.put(file, linkPath); + } else { + // Add to tarball. + copyFiles.add(file); + } + } + } else {// Not sst file. + copyFiles.add(file); + } + } + } + + // If fileName exists in "files" parameter, + // it should be linked to path in files. + private static Path findLinkPath(Set files, String fileName) { + for (Path p: files) { + if (p.toString().endsWith(fileName)) { + return p; + } } + return null; } // Returns value of http request parameter. @@ -261,16 +320,16 @@ private boolean includeSnapshotData(HttpServletRequest request) { return Boolean.parseBoolean(includeParam); } - private void writeFilesToArchive(Map copyFiles, - Map hardLinkFiles, - ArchiveOutputStream archiveOutputStream) + private void writeFilesToArchive(Set copyFiles, + Map hardLinkFiles, + ArchiveOutputStream archiveOutputStream) throws IOException { File metaDirPath = ServerUtils.getOzoneMetaDirPath(getConf()); int truncateLength = metaDirPath.toString().length() + 1; // Go through each of the files to be copied and add to archive. - for (Path file : copyFiles.values()) { + for (Path file : copyFiles) { String fixedFile = truncateFileName(truncateLength, file); if (fixedFile.startsWith(OM_CHECKPOINT_DIR)) { // checkpoint files go to root of tarball diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index fc1337dd38d0..e7d4bc1483b7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3606,6 +3606,8 @@ public TermIndex installSnapshotFromLeader(String leaderId) { TermIndex termIndex = null; try { + // Install hard links. + OmSnapshotUtils.createHardLinks(omDBCheckpoint.getCheckpointLocation()); termIndex = installCheckpoint(leaderId, omDBCheckpoint); } catch (Exception ex) { LOG.error("Failed to install snapshot from Leader OM.", ex); @@ -3837,15 +3839,17 @@ private void moveCheckpointFiles(File oldDB, Path checkpointPath, File dbDir, // an inconsistent state and this marker file will fail OM from // starting up. Files.createFile(markerFile); - // Copy the candidate DB to real DB - org.apache.commons.io.FileUtils.copyDirectory(checkpointPath.toFile(), + // Link each of the candidate DB files to real DB directory. This + // preserves the links that already exist between files in the + // candidate db. + OmSnapshotUtils.linkFiles(checkpointPath.toFile(), oldDB); moveOmSnapshotData(oldDB.toPath(), dbSnapshotsDir.toPath()); Files.deleteIfExists(markerFile); } catch (IOException e) { LOG.error("Failed to move downloaded DB checkpoint {} to metadata " + - "directory {}. Resetting to original DB.", checkpointPath, - oldDB.toPath()); + "directory {}. Exception: {}. Resetting to original DB.", + checkpointPath, oldDB.toPath(), e); try { FileUtil.fullyDelete(oldDB); Files.move(dbBackup.toPath(), oldDB.toPath()); @@ -3862,14 +3866,13 @@ private void moveCheckpointFiles(File oldDB, Path checkpointPath, File dbDir, } } - // Move the new snapshot directory into place and create hard links. + // Move the new snapshot directory into place. private void moveOmSnapshotData(Path dbPath, Path dbSnapshotsDir) throws IOException { Path incomingSnapshotsDir = Paths.get(dbPath.toString(), OM_SNAPSHOT_DIR); if (incomingSnapshotsDir.toFile().exists()) { Files.move(incomingSnapshotsDir, dbSnapshotsDir); - OmSnapshotUtils.createHardLinks(dbPath); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java index 9aef593af85d..6960ac8de603 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java @@ -115,8 +115,8 @@ public static void createHardLinks(Path dbPath) throws IOException { for (String l : lines) { String from = l.split("\t")[1]; String to = l.split("\t")[0]; - Path fullFromPath = getFullPath(dbPath, from); - Path fullToPath = getFullPath(dbPath, to); + Path fullFromPath = Paths.get(dbPath.toString(), from); + Path fullToPath = Paths.get(dbPath.toString(), to); Files.createLink(fullToPath, fullFromPath); } if (!hardLinkFile.delete()) { @@ -126,19 +126,36 @@ public static void createHardLinks(Path dbPath) throws IOException { } } - // Prepend the full path to the hard link entry entry. - private static Path getFullPath(Path dbPath, String fileName) - throws IOException { - File file = new File(fileName); - // If there is no directory then this file belongs in the db. - if (file.getName().equals(fileName)) { - return Paths.get(dbPath.toString(), fileName); + /** + * Link each of the subfiles in oldDir to newDir. + * + * @param oldDir The dir to create links from. + * @param newDir The dir to create links to. + */ + public static void linkFiles(File oldDir, File newDir) throws IOException { + int truncateLength = oldDir.toString().length() + 1; + List oldDirList; + try (Stream files = Files.walk(oldDir.toPath())) { + oldDirList = files.map(Path::toString). + // Don't copy the directory itself + filter(s -> !s.equals(oldDir.toString())). + // Remove the old path + map(s -> s.substring(truncateLength)). + sorted(). + collect(Collectors.toList()); } - // Else this file belong in a directory parallel to the db. - Path parent = dbPath.getParent(); - if (parent == null) { - throw new IOException("Invalid database " + dbPath); + for (String s: oldDirList) { + File oldFile = new File(oldDir, s); + File newFile = new File(newDir, s); + File newParent = newFile.getParentFile(); + if (!newParent.exists()) { + newParent.mkdirs(); + } + if (oldFile.isDirectory()) { + newFile.mkdirs(); + } else { + Files.createLink(newFile.toPath(), oldFile.toPath()); + } } - return Paths.get(parent.toString(), fileName); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index bbc2dd6b5ac7..01392b1a7473 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -42,14 +42,24 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; +import static org.apache.commons.io.file.PathUtils.copyDirectory; +import static org.apache.hadoop.hdds.utils.HAUtils.getExistingSstFiles; import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_CHECKPOINT_DIR; +import static org.apache.hadoop.ozone.OzoneConsts.SNAPSHOT_CANDIDATE_DIR; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServlet.processFile; import static org.apache.hadoop.ozone.OzoneConsts.SNAPSHOT_INFO_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.BUCKET_TABLE; import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.VOLUME_TABLE; @@ -69,7 +79,7 @@ public class TestOmSnapshotManager { private OzoneManager om; private File testDir; - + private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + SNAPSHOT_CANDIDATE_DIR; @Before public void init() throws Exception { OzoneConfiguration configuration = new OzoneConfiguration(); @@ -165,58 +175,199 @@ public void testCloseOnEviction() throws IOException { verify(firstSnapshotStore, timeout(3000).times(1)).close(); } - @Test - @SuppressFBWarnings({"NP_NULL_ON_SOME_PATH"}) - public void testHardLinkCreation() throws IOException { + static class DirectoryData { + String pathSnap1; + String pathSnap2; + File leaderDir; + File leaderSnapdir1; + File leaderSnapdir2; + File leaderCheckpointDir; + File candidateDir; + File followerSnapdir1; + File followerSnapdir2; + File s1FileLink; + File s1File; + File f1FileLink; + File f1File; + File nolinkFile; + } + + private DirectoryData setupData() throws IOException { + // Setup the leader with the following files: + // leader/db.checkpoints/dir1/f1.sst + // leader/db.snapshots/checkpointState/dir1/s1.sst + + // And the following links: + // leader/db.snapshots/checkpointState/dir2/ + // leader/db.snapshots/checkpointState/dir2/ + + // Setup the follower with the following files, (as if they came + // from the tarball from the leader) + + // follower/cand/f1.sst + // follower/cand/db.snapshots/checkpointState/dir1/s1.sst + + // Note that the layout is slightly different in that the f1.sst on + // the leader is in a checkpoint directory but on the follower is + // moved to the candidate omdb directory; the links must be + // adjusted accordingly. + byte[] dummyData = {0}; + DirectoryData directoryData = new DirectoryData(); - // Create dummy files to be linked to. - File snapDir1 = new File(testDir.toString(), - OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"); - if (!snapDir1.mkdirs()) { - throw new IOException("failed to make directory: " + snapDir1); + // Create dummy leader files to calculate links + directoryData.leaderDir = new File(testDir.toString(), + "leader"); + directoryData.leaderDir.mkdirs(); + directoryData.pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"; + directoryData.pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"; + directoryData.leaderSnapdir1 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap1); + if (!directoryData.leaderSnapdir1.mkdirs()) { + throw new IOException("failed to make directory: " + directoryData.leaderSnapdir1); } - Files.write(Paths.get(snapDir1.toString(), "s1"), dummyData); + Files.write(Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst"), dummyData); - File snapDir2 = new File(testDir.toString(), - OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"); - if (!snapDir2.mkdirs()) { - throw new IOException("failed to make directory: " + snapDir2); + directoryData.leaderSnapdir2 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap2); + if (!directoryData.leaderSnapdir2.mkdirs()) { + throw new IOException("failed to make directory: " + directoryData.leaderSnapdir2); } + Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "noLink.sst"), dummyData); + Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "nonSstFile"), dummyData); - File dbDir = new File(testDir.toString(), OM_DB_NAME); - Files.write(Paths.get(dbDir.toString(), "f1"), dummyData); - // Create map of links to dummy files. - File checkpointDir1 = new File(testDir.toString(), + // Also create the follower files + directoryData.candidateDir = new File(testDir.toString(), + CANDIDATE_DIR_NAME); + directoryData.followerSnapdir1 = new File(directoryData.candidateDir.toString(), directoryData.pathSnap1); + directoryData.followerSnapdir2 = new File(directoryData.candidateDir.toString(), directoryData.pathSnap2); + copyDirectory(directoryData.leaderDir.toPath(), directoryData.candidateDir.toPath()); + Files.write(Paths.get(directoryData.candidateDir.toString(), "f1.sst"), dummyData); + + + directoryData.leaderCheckpointDir = new File(directoryData.leaderDir.toString(), OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"); + directoryData.leaderCheckpointDir.mkdirs(); + Files.write(Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst"), dummyData); + directoryData.s1FileLink = new File(directoryData.followerSnapdir2, "s1.sst"); + directoryData.s1File = new File(directoryData.followerSnapdir1, "s1.sst"); + directoryData.f1FileLink = new File(directoryData.followerSnapdir2, "f1.sst"); + directoryData.f1File = new File(directoryData.candidateDir, "f1.sst"); + directoryData.nolinkFile = new File(directoryData.followerSnapdir2, "noLink.sst"); + + return directoryData; + } + + @Test + @SuppressFBWarnings({"NP_NULL_ON_SOME_PATH"}) + public void testHardLinkCreation() throws IOException { + + // test that following links are created on the follower + // follower/db.snapshots/checkpointState/dir2/f1.sst + // follower/db.snapshots/checkpointState/dir2/s1.sst + + DirectoryData directoryData = setupData(); + + // Create map of links to dummy files on the leader Map hardLinkFiles = new HashMap<>(); - hardLinkFiles.put(Paths.get(snapDir2.toString(), "f1"), - Paths.get(checkpointDir1.toString(), "f1")); - hardLinkFiles.put(Paths.get(snapDir2.toString(), "s1"), - Paths.get(snapDir1.toString(), "s1")); + hardLinkFiles.put(Paths.get(directoryData.leaderSnapdir2.toString(), "f1.sst"), + Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst")); + hardLinkFiles.put(Paths.get(directoryData.leaderSnapdir2.toString(), "s1.sst"), + Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst")); // Create link list. Path hardLinkList = OmSnapshotUtils.createHardLinkList( - testDir.toString().length() + 1, hardLinkFiles); - Files.move(hardLinkList, Paths.get(dbDir.toString(), OM_HARDLINK_FILE)); - - // Create links from list. - OmSnapshotUtils.createHardLinks(dbDir.toPath()); - - // Confirm expected links. - for (Map.Entry entry : hardLinkFiles.entrySet()) { - Assert.assertTrue(entry.getKey().toFile().exists()); - Path value = entry.getValue(); - // Convert checkpoint path to om.db. - if (value.toString().contains(OM_CHECKPOINT_DIR)) { - value = Paths.get(dbDir.toString(), - value.getFileName().toString()); - } - Assert.assertEquals("link matches original file", - getINode(entry.getKey()), getINode(value)); - } + directoryData.leaderDir.toString().length() + 1, hardLinkFiles); + + Files.move(hardLinkList, Paths.get(directoryData.candidateDir.toString(), + OM_HARDLINK_FILE)); + + // Create links on the follower from list. + OmSnapshotUtils.createHardLinks(directoryData.candidateDir.toPath()); + + // Confirm expected follower links. + Assert.assertTrue(directoryData.s1FileLink.exists()); + Assert.assertEquals("link matches original file", + getINode(directoryData.s1File.toPath()), getINode(directoryData.s1FileLink.toPath())); + + Assert.assertTrue(directoryData.f1FileLink.exists()); + Assert.assertEquals("link matches original file", + getINode(directoryData.f1File.toPath()), getINode(directoryData.f1FileLink.toPath())); + } + + @Test + public void testFileUtilities() throws IOException { + + DirectoryData directoryData = setupData(); + List excludeList = getExistingSstFiles(directoryData.candidateDir); + Set existingSstFiles = new HashSet<>(excludeList); + int truncateLength = directoryData.candidateDir.toString().length() + 1; + Set expectedSstFiles = new HashSet<>(Arrays.asList( + directoryData.s1File.toString().substring(truncateLength), + directoryData.nolinkFile.toString().substring(truncateLength), + directoryData.f1File.toString().substring(truncateLength))); + Assert.assertEquals(expectedSstFiles, existingSstFiles); + + Set normalizedSet = + OMDBCheckpointServlet.normalizeExcludeList(excludeList, + directoryData.leaderCheckpointDir.toString(), directoryData.leaderDir.toString()); + Set expectedNormalizedSet = new HashSet<>(Arrays.asList( + Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst"), + Paths.get(directoryData.leaderSnapdir2.toString(), "noLink.sst"), + Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst"))); + Assert.assertEquals(expectedNormalizedSet, normalizedSet); + } + + @Test + public void testProcessFile() { + Path copyFile = Paths.get("/dir1/copyfile.sst"); + Path excludeFile = Paths.get("/dir1/excludefile.sst"); + Path linkToExcludedFile = Paths.get("/dir2/excludefile.sst"); + Path linkToCopiedFile = Paths.get("/dir2/copyfile.sst"); + Path addToCopiedFiles = Paths.get("/dir1/copyfile2.sst"); + Path addNonSstToCopiedFiles = Paths.get("/dir1/nonSst"); + + Set toExcludeFiles = new HashSet<>( + Collections.singletonList(excludeFile)); + Set copyFiles = new HashSet<>(Collections.singletonList(copyFile)); + List excluded = new ArrayList<>(); + Map hardLinkFiles = new HashMap<>(); + + processFile(excludeFile, copyFiles, hardLinkFiles, toExcludeFiles, excluded); + Assert.assertEquals(excluded.size(), 1); + Assert.assertEquals((excluded.get(0)), excludeFile.toString()); + Assert.assertEquals(copyFiles.size(), 1); + Assert.assertEquals(hardLinkFiles.size(), 0); + excluded = new ArrayList<>(); + + processFile(linkToExcludedFile, copyFiles, hardLinkFiles, toExcludeFiles, + excluded); + Assert.assertEquals(excluded.size(), 0); + Assert.assertEquals(copyFiles.size(), 1); + Assert.assertEquals(hardLinkFiles.size(), 1); + Assert.assertEquals(hardLinkFiles.get(linkToExcludedFile), excludeFile); + hardLinkFiles = new HashMap<>(); + + processFile(linkToCopiedFile, copyFiles, hardLinkFiles, toExcludeFiles, + excluded); + Assert.assertEquals(excluded.size(), 0); + Assert.assertEquals(copyFiles.size(), 1); + Assert.assertEquals(hardLinkFiles.size(), 1); + Assert.assertEquals(hardLinkFiles.get(linkToCopiedFile), copyFile); + hardLinkFiles = new HashMap<>(); + + processFile(addToCopiedFiles, copyFiles, hardLinkFiles, toExcludeFiles, + excluded); + Assert.assertEquals(excluded.size(), 0); + Assert.assertEquals(copyFiles.size(), 2); + Assert.assertTrue(copyFiles.contains(addToCopiedFiles)); + copyFiles = new HashSet<>(Collections.singletonList(copyFile)); + + processFile(addNonSstToCopiedFiles, copyFiles, hardLinkFiles, toExcludeFiles, + excluded); + Assert.assertEquals(excluded.size(), 0); + Assert.assertEquals(copyFiles.size(), 2); + Assert.assertTrue(copyFiles.contains(addNonSstToCopiedFiles)); } private SnapshotInfo createSnapshotInfo( diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java new file mode 100644 index 000000000000..d9c0c7be7f00 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.snapshot; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Set; +import java.util.stream.Collectors; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.getINode; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Class to test annotation based interceptor that checks whether + * Ozone snapshot feature is enabled. + */ +public class TestOmSnapshotUtils { + + /** + * Check Aspect implementation with SnapshotFeatureEnabledUtil. + */ + @Test + public void testLinkFiles(@TempDir File tempdir) throws Exception { + File dir1 = new File(tempdir, "tree1/dir1"); + File dir2 = new File(tempdir, "tree1/dir2"); + File tree1 = new File(tempdir, "tree1"); + File tree2 = new File(tempdir, "tree2"); + assertTrue(dir1.mkdirs()); + assertTrue(dir2.mkdirs()); + File f1 = new File(tempdir, "tree1/dir1/f1"); + Files.write(f1.toPath(), "dummyData".getBytes(UTF_8)); + File f1Link = new File(tempdir, "tree2/dir1/f1"); + + OmSnapshotUtils.linkFiles(tree1, tree2); + + Set tree1Files = Files.walk(tree1.toPath()). + map(Path::toString). + map((s) -> s.replace("tree1", "tree2")). + collect(Collectors.toSet()); + Set tree2Files = Files.walk(tree2.toPath()). + map(Path::toString).collect(Collectors.toSet()); + + assertEquals(tree1Files, tree2Files); + assertEquals(getINode(f1.toPath()), getINode(f1Link.toPath())); + } +} From 0f4abc8e5380922fc0c4c4292bc687a98c9ad35f Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 May 2023 16:51:37 -0700 Subject: [PATCH 02/20] findbugs --- .../ozone/om/snapshot/OmSnapshotUtils.java | 10 +++++--- .../ozone/om/TestOmSnapshotManager.java | 24 ++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java index 6960ac8de603..eac63a54bc7c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java @@ -127,7 +127,7 @@ public static void createHardLinks(Path dbPath) throws IOException { } /** - * Link each of the subfiles in oldDir to newDir. + * Link each of the files in oldDir to newDir. * * @param oldDir The dir to create links from. * @param newDir The dir to create links to. @@ -149,10 +149,14 @@ public static void linkFiles(File oldDir, File newDir) throws IOException { File newFile = new File(newDir, s); File newParent = newFile.getParentFile(); if (!newParent.exists()) { - newParent.mkdirs(); + if (!newParent.mkdirs()) { + throw new IOException("Directory create fails: " + newParent); + } } if (oldFile.isDirectory()) { - newFile.mkdirs(); + if (!newFile.mkdirs()) { + throw new IOException("Directory create fails: " + newFile); + } } else { Files.createLink(newFile.toPath(), oldFile.toPath()); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 01392b1a7473..9532007adf7f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -218,19 +218,15 @@ private DirectoryData setupData() throws IOException { // Create dummy leader files to calculate links directoryData.leaderDir = new File(testDir.toString(), "leader"); - directoryData.leaderDir.mkdirs(); + Assert.assertTrue(directoryData.leaderDir.mkdirs()); directoryData.pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"; directoryData.pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"; directoryData.leaderSnapdir1 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap1); - if (!directoryData.leaderSnapdir1.mkdirs()) { - throw new IOException("failed to make directory: " + directoryData.leaderSnapdir1); - } + Assert.assertTrue(directoryData.leaderSnapdir1.mkdirs()); Files.write(Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst"), dummyData); directoryData.leaderSnapdir2 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap2); - if (!directoryData.leaderSnapdir2.mkdirs()) { - throw new IOException("failed to make directory: " + directoryData.leaderSnapdir2); - } + Assert.assertTrue(directoryData.leaderSnapdir2.mkdirs()); Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "noLink.sst"), dummyData); Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "nonSstFile"), dummyData); @@ -246,7 +242,7 @@ private DirectoryData setupData() throws IOException { directoryData.leaderCheckpointDir = new File(directoryData.leaderDir.toString(), OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"); - directoryData.leaderCheckpointDir.mkdirs(); + Assert.assertTrue(directoryData.leaderCheckpointDir.mkdirs()); Files.write(Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst"), dummyData); directoryData.s1FileLink = new File(directoryData.followerSnapdir2, "s1.sst"); directoryData.s1File = new File(directoryData.followerSnapdir1, "s1.sst"); @@ -320,12 +316,12 @@ public void testFileUtilities() throws IOException { @Test public void testProcessFile() { - Path copyFile = Paths.get("/dir1/copyfile.sst"); - Path excludeFile = Paths.get("/dir1/excludefile.sst"); - Path linkToExcludedFile = Paths.get("/dir2/excludefile.sst"); - Path linkToCopiedFile = Paths.get("/dir2/copyfile.sst"); - Path addToCopiedFiles = Paths.get("/dir1/copyfile2.sst"); - Path addNonSstToCopiedFiles = Paths.get("/dir1/nonSst"); + Path copyFile = Paths.get(testDir.toString(), "dir1/copyfile.sst"); + Path excludeFile = Paths.get(testDir.toString(), "dir1/excludefile.sst"); + Path linkToExcludedFile = Paths.get(testDir.toString(), "dir2/excludefile.sst"); + Path linkToCopiedFile = Paths.get(testDir.toString(), "dir2/copyfile.sst"); + Path addToCopiedFiles = Paths.get(testDir.toString(), "dir1/copyfile2.sst"); + Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), "dir1/nonSst"); Set toExcludeFiles = new HashSet<>( Collections.singletonList(excludeFile)); From 1bc6f2887e5389c83c03d64a8620b40e0850abd5 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 May 2023 17:21:14 -0700 Subject: [PATCH 03/20] comments --- .../java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 9542f9aca3be..d6cf49fef5d7 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -155,7 +155,7 @@ public void writeDbDataToStream(DBCheckpoint checkpoint, } } - // format list from follower to match data on leader + // Format list from follower to match data on leader. @VisibleForTesting public static Set normalizeExcludeList(List toExcludeList, String checkpointLocation, String metaDirPath) { From 493053216e1302a521b20a360cb6eb3eb02abade Mon Sep 17 00:00:00 2001 From: George Jahad Date: Mon, 22 May 2023 19:12:07 -0700 Subject: [PATCH 04/20] findbugs --- .../java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index d6cf49fef5d7..9d517649f876 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -280,7 +280,7 @@ public static void processFile(Path file, Set copyFiles, if (toExcludeFiles.contains(file)) { excluded.add(file.toString()); } else { - String fileName = file.getFileName().toString(); + String fileName = Objects.requireNonNull(file.getFileName()).toString(); if (fileName.endsWith(ROCKSDB_SST_SUFFIX)) { // If same as existing excluded file, add a link for it. Path linkPath = findLinkPath(toExcludeFiles, fileName); From fb8b7cfa5657fe1cf57d95cfc03ab325d787963f Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 12:07:00 -0700 Subject: [PATCH 05/20] cleanup --- .../hdds/utils/TestRDBSnapshotProvider.java | 8 +++++ .../hadoop/ozone/om/TestOMRatisSnapshots.java | 31 +++++++++++-------- .../ozone/om/OMDBCheckpointServlet.java | 6 +++- .../ozone/om/TestOmSnapshotManager.java | 3 +- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index a757085fc8d6..c32596b8aabb 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -242,16 +242,24 @@ public void insertRandomData(RDBStore dbStore, int familyIndex) @Test public void testCheckLeaderConsistent() throws IOException { + // Leader inited to null at startup. assertTrue(rdbSnapshotProvider.getInitCount() == 1); File dummyFile = new File(rdbSnapshotProvider.getCandidateDir(), "file1.sst"); Files.write(dummyFile.toPath(), "dummyData".getBytes(StandardCharsets.UTF_8)); + assertTrue(dummyFile.exists()); + + // Set the leader. rdbSnapshotProvider.checkLeaderConsistent("node1"); assertTrue(rdbSnapshotProvider.getInitCount() == 2); assertFalse(dummyFile.exists()); + + // Confirm setting the same leader doesn't reinit. rdbSnapshotProvider.checkLeaderConsistent("node1"); assertTrue(rdbSnapshotProvider.getInitCount() == 2); + + // Confirm setting different leader does reinit. rdbSnapshotProvider.checkLeaderConsistent("node2"); assertTrue(rdbSnapshotProvider.getInitCount() == 3); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 84b638100d96..93e8a4ec044c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -57,6 +57,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -172,11 +174,11 @@ public void shutdown() { } } - // @ParameterizedTest - // @ValueSource(ints = {1000}) - @Test - public void testInstallSnapshot() throws Exception { - int numSnapshotsToCreate = 100; + @ParameterizedTest + @ValueSource(ints = {100}) + // tried up to 1000 snapshots and this test works, but some of the + // timeouts have to be increased. + public void testInstallSnapshot(int numSnapshotsToCreate) throws Exception { // Get the leader OM String leaderOMNodeId = OmFailoverProxyUtil .getFailoverProxyProvider(objectStore.getClientProxy()) @@ -191,7 +193,7 @@ public void testInstallSnapshot() throws Exception { } OzoneManager followerOM = cluster.getOzoneManager(followerNodeId); - // Do some transactions so that the log index increases + // Create some snapshots, each with new keys int keyIncrement = 10; String snapshotNamePrefix = "snapshot"; String snapshotName = ""; @@ -224,7 +226,7 @@ public void testInstallSnapshot() throws Exception { GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; - }, 100, 600000); + }, 100, 10000); long followerOMLastAppliedIndex = followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex(); @@ -335,7 +337,6 @@ private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, Strin } @Test - //gbjfix @Timeout(300) public void testInstallIncrementalSnapshot(@TempDir Path tempDir) throws Exception { @@ -368,15 +369,17 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) cluster.startInactiveOM(followerNodeId); // Wait the follower download the snapshot,but get stuck by injector - //gbjfix GenericTestUtils.waitFor(() -> { return followerOM.getOmSnapshotProvider().getNumDownloaded() == 1; }, 1000, 10000); + // Get two incremental tarballs, adding new keys/snapshot for each. TestResults firstIncrement = getNextIncrementalTarball(160, 2, leaderOM, leaderRatisServer, faultInjector, followerOM, tempDir); TestResults secondIncrement = getNextIncrementalTarball(240, 3, leaderOM, leaderRatisServer, faultInjector, followerOM, tempDir); + + // Resume the follower thread, it would download the incremental snapshot. faultInjector.resume(); // Get the latest db checkpoint from the leader OM. @@ -386,7 +389,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) TermIndex.valueOf(transactionInfo.getTerm(), transactionInfo.getTransactionIndex()); long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex(); - //gbjfix + // The recently started OM should be lagging behind the leader OM. // Wait & for follower to update transactions to leader snapshot index. // Timeout error if follower does not load update within 10s @@ -485,7 +488,6 @@ private TestResults getNextIncrementalTarball(int numKeys, // Pause the follower thread again to block the next install faultInjector.reset(); - //gbjfix // Wait the follower download the incremental snapshot, but get stuck // by injector GenericTestUtils.waitFor(() -> @@ -494,6 +496,9 @@ private TestResults getNextIncrementalTarball(int numKeys, assertTrue(followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1); + + // Now confirm tarball is just incremental and contains no unexpected + // files/links. Path increment = Paths.get(tempDir.toString(), "increment" + numKeys); increment.toFile().mkdirs(); unTarLatestTarBall(followerOM, increment); @@ -511,7 +516,6 @@ private TestResults getNextIncrementalTarball(int numKeys, // Confirm that none of the links in the tarballs hardLinkFile // match the existing files - // gbjnote, is the path right here? Path hardLinkFile = Paths.get(increment.toString(), OM_HARDLINK_FILE); try (Stream lines = Files.lines(hardLinkFile)) { int lineCount = 0; @@ -1033,7 +1037,7 @@ private void assertLogCapture(GenericTestUtils.LogCapturer logCapture, private void unTarLatestTarBall(OzoneManager followerOm, Path tempDir) throws IOException { File snapshotDir = followerOm.getOmSnapshotProvider().getSnapshotDir(); - // Find the latest snapshot. + // Find the latest tarball. String tarBall = Arrays.stream(Objects.requireNonNull(snapshotDir.list())). filter(s -> s.toLowerCase().endsWith(".tar")). reduce("", (s1, s2) -> s1.compareToIgnoreCase(s2) > 0 ? s1 : s2); @@ -1074,6 +1078,7 @@ public void pause() throws IOException { @Override public void resume() throws IOException { + // Make sure injector pauses before resuming. try { ready.await(); } catch (InterruptedException e) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 9d517649f876..ad1417233481 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -280,7 +280,11 @@ public static void processFile(Path file, Set copyFiles, if (toExcludeFiles.contains(file)) { excluded.add(file.toString()); } else { - String fileName = Objects.requireNonNull(file.getFileName()).toString(); + Path fileNamePath = file.getFileName(); + if (fileNamePath == null) { + throw new NullPointerException("Bad file: " + file.toString()); + } + String fileName = fileNamePath.toString(); if (fileName.endsWith(ROCKSDB_SST_SUFFIX)) { // If same as existing excluded file, add a link for it. Path linkPath = findLinkPath(toExcludeFiles, fileName); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 9532007adf7f..afecbcde1ab3 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -79,7 +79,8 @@ public class TestOmSnapshotManager { private OzoneManager om; private File testDir; - private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + SNAPSHOT_CANDIDATE_DIR; + private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + + SNAPSHOT_CANDIDATE_DIR; @Before public void init() throws Exception { OzoneConfiguration configuration = new OzoneConfiguration(); From c090332f6fae5b06a6c576d7ce8e1b124a3342d4 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 16:39:21 -0700 Subject: [PATCH 06/20] cleanup --- .../ozone/om/TestOmSnapshotManager.java | 138 ++++++++---------- .../om/snapshot/TestOmSnapshotUtils.java | 35 +++-- 2 files changed, 87 insertions(+), 86 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index afecbcde1ab3..8f28eb988406 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -81,6 +81,18 @@ public class TestOmSnapshotManager { private File testDir; private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + SNAPSHOT_CANDIDATE_DIR; + private File leaderDir; + private File leaderSnapdir1; + private File leaderSnapdir2; + private File leaderCheckpointDir; + private File candidateDir; + private File s1FileLink; + private File s1File; + private File f1FileLink; + private File f1File; + private File nolinkFile; + + @Before public void init() throws Exception { OzoneConfiguration configuration = new OzoneConfiguration(); @@ -97,6 +109,7 @@ public void init() throws Exception { OmTestManagers omTestManagers = new OmTestManagers(configuration); om = omTestManagers.getOzoneManager(); + setupData(); } @After @@ -176,25 +189,8 @@ public void testCloseOnEviction() throws IOException { verify(firstSnapshotStore, timeout(3000).times(1)).close(); } - static class DirectoryData { - String pathSnap1; - String pathSnap2; - File leaderDir; - File leaderSnapdir1; - File leaderSnapdir2; - File leaderCheckpointDir; - File candidateDir; - File followerSnapdir1; - File followerSnapdir2; - File s1FileLink; - File s1File; - File f1FileLink; - File f1File; - File nolinkFile; - } - - private DirectoryData setupData() throws IOException { - // Setup the leader with the following files: + private void setupData() throws IOException { + // Set up the leader with the following files: // leader/db.checkpoints/dir1/f1.sst // leader/db.snapshots/checkpointState/dir1/s1.sst @@ -202,56 +198,53 @@ private DirectoryData setupData() throws IOException { // leader/db.snapshots/checkpointState/dir2/ // leader/db.snapshots/checkpointState/dir2/ - // Setup the follower with the following files, (as if they came + // Set up the follower with the following files, (as if they came // from the tarball from the leader) // follower/cand/f1.sst // follower/cand/db.snapshots/checkpointState/dir1/s1.sst - // Note that the layout is slightly different in that the f1.sst on - // the leader is in a checkpoint directory but on the follower is - // moved to the candidate omdb directory; the links must be - // adjusted accordingly. + // Note that the layout between leader and follower is slightly + // different in that the f1.sst on the leader is in a checkpoint + // directory but on the follower is moved to the candidate omdb + // directory; the links must be adjusted accordingly. byte[] dummyData = {0}; - DirectoryData directoryData = new DirectoryData(); // Create dummy leader files to calculate links - directoryData.leaderDir = new File(testDir.toString(), + leaderDir = new File(testDir.toString(), "leader"); - Assert.assertTrue(directoryData.leaderDir.mkdirs()); - directoryData.pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"; - directoryData.pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"; - directoryData.leaderSnapdir1 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap1); - Assert.assertTrue(directoryData.leaderSnapdir1.mkdirs()); - Files.write(Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst"), dummyData); + Assert.assertTrue(leaderDir.mkdirs()); + String pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"; + String pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"; + leaderSnapdir1 = new File(leaderDir.toString(), pathSnap1); + Assert.assertTrue(leaderSnapdir1.mkdirs()); + Files.write(Paths.get(leaderSnapdir1.toString(), "s1.sst"), dummyData); - directoryData.leaderSnapdir2 = new File(directoryData.leaderDir.toString(), directoryData.pathSnap2); - Assert.assertTrue(directoryData.leaderSnapdir2.mkdirs()); - Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "noLink.sst"), dummyData); - Files.write(Paths.get(directoryData.leaderSnapdir2.toString(), "nonSstFile"), dummyData); + leaderSnapdir2 = new File(leaderDir.toString(), pathSnap2); + Assert.assertTrue(leaderSnapdir2.mkdirs()); + Files.write(Paths.get(leaderSnapdir2.toString(), "noLink.sst"), dummyData); + Files.write(Paths.get(leaderSnapdir2.toString(), "nonSstFile"), dummyData); // Also create the follower files - directoryData.candidateDir = new File(testDir.toString(), + candidateDir = new File(testDir.toString(), CANDIDATE_DIR_NAME); - directoryData.followerSnapdir1 = new File(directoryData.candidateDir.toString(), directoryData.pathSnap1); - directoryData.followerSnapdir2 = new File(directoryData.candidateDir.toString(), directoryData.pathSnap2); - copyDirectory(directoryData.leaderDir.toPath(), directoryData.candidateDir.toPath()); - Files.write(Paths.get(directoryData.candidateDir.toString(), "f1.sst"), dummyData); + File followerSnapdir1 = new File(candidateDir.toString(), pathSnap1); + File followerSnapdir2 = new File(candidateDir.toString(), pathSnap2); + copyDirectory(leaderDir.toPath(), candidateDir.toPath()); + Files.write(Paths.get(candidateDir.toString(), "f1.sst"), dummyData); - directoryData.leaderCheckpointDir = new File(directoryData.leaderDir.toString(), + leaderCheckpointDir = new File(leaderDir.toString(), OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"); - Assert.assertTrue(directoryData.leaderCheckpointDir.mkdirs()); - Files.write(Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst"), dummyData); - directoryData.s1FileLink = new File(directoryData.followerSnapdir2, "s1.sst"); - directoryData.s1File = new File(directoryData.followerSnapdir1, "s1.sst"); - directoryData.f1FileLink = new File(directoryData.followerSnapdir2, "f1.sst"); - directoryData.f1File = new File(directoryData.candidateDir, "f1.sst"); - directoryData.nolinkFile = new File(directoryData.followerSnapdir2, "noLink.sst"); - - return directoryData; + Assert.assertTrue(leaderCheckpointDir.mkdirs()); + Files.write(Paths.get(leaderCheckpointDir.toString(), "f1.sst"), dummyData); + s1FileLink = new File(followerSnapdir2, "s1.sst"); + s1File = new File(followerSnapdir1, "s1.sst"); + f1FileLink = new File(followerSnapdir2, "f1.sst"); + f1File = new File(candidateDir, "f1.sst"); + nolinkFile = new File(followerSnapdir2, "noLink.sst"); } @Test @@ -262,56 +255,53 @@ public void testHardLinkCreation() throws IOException { // follower/db.snapshots/checkpointState/dir2/f1.sst // follower/db.snapshots/checkpointState/dir2/s1.sst - DirectoryData directoryData = setupData(); - // Create map of links to dummy files on the leader Map hardLinkFiles = new HashMap<>(); - hardLinkFiles.put(Paths.get(directoryData.leaderSnapdir2.toString(), "f1.sst"), - Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst")); - hardLinkFiles.put(Paths.get(directoryData.leaderSnapdir2.toString(), "s1.sst"), - Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst")); + hardLinkFiles.put(Paths.get(leaderSnapdir2.toString(), "f1.sst"), + Paths.get(leaderCheckpointDir.toString(), "f1.sst")); + hardLinkFiles.put(Paths.get(leaderSnapdir2.toString(), "s1.sst"), + Paths.get(leaderSnapdir1.toString(), "s1.sst")); // Create link list. Path hardLinkList = OmSnapshotUtils.createHardLinkList( - directoryData.leaderDir.toString().length() + 1, hardLinkFiles); + leaderDir.toString().length() + 1, hardLinkFiles); - Files.move(hardLinkList, Paths.get(directoryData.candidateDir.toString(), + Files.move(hardLinkList, Paths.get(candidateDir.toString(), OM_HARDLINK_FILE)); // Create links on the follower from list. - OmSnapshotUtils.createHardLinks(directoryData.candidateDir.toPath()); + OmSnapshotUtils.createHardLinks(candidateDir.toPath()); // Confirm expected follower links. - Assert.assertTrue(directoryData.s1FileLink.exists()); + Assert.assertTrue(s1FileLink.exists()); Assert.assertEquals("link matches original file", - getINode(directoryData.s1File.toPath()), getINode(directoryData.s1FileLink.toPath())); + getINode(s1File.toPath()), getINode(s1FileLink.toPath())); - Assert.assertTrue(directoryData.f1FileLink.exists()); + Assert.assertTrue(f1FileLink.exists()); Assert.assertEquals("link matches original file", - getINode(directoryData.f1File.toPath()), getINode(directoryData.f1FileLink.toPath())); + getINode(f1File.toPath()), getINode(f1FileLink.toPath())); } @Test public void testFileUtilities() throws IOException { - DirectoryData directoryData = setupData(); - List excludeList = getExistingSstFiles(directoryData.candidateDir); + List excludeList = getExistingSstFiles(candidateDir); Set existingSstFiles = new HashSet<>(excludeList); - int truncateLength = directoryData.candidateDir.toString().length() + 1; + int truncateLength = candidateDir.toString().length() + 1; Set expectedSstFiles = new HashSet<>(Arrays.asList( - directoryData.s1File.toString().substring(truncateLength), - directoryData.nolinkFile.toString().substring(truncateLength), - directoryData.f1File.toString().substring(truncateLength))); + s1File.toString().substring(truncateLength), + nolinkFile.toString().substring(truncateLength), + f1File.toString().substring(truncateLength))); Assert.assertEquals(expectedSstFiles, existingSstFiles); Set normalizedSet = OMDBCheckpointServlet.normalizeExcludeList(excludeList, - directoryData.leaderCheckpointDir.toString(), directoryData.leaderDir.toString()); + leaderCheckpointDir.toString(), leaderDir.toString()); Set expectedNormalizedSet = new HashSet<>(Arrays.asList( - Paths.get(directoryData.leaderSnapdir1.toString(), "s1.sst"), - Paths.get(directoryData.leaderSnapdir2.toString(), "noLink.sst"), - Paths.get(directoryData.leaderCheckpointDir.toString(), "f1.sst"))); + Paths.get(leaderSnapdir1.toString(), "s1.sst"), + Paths.get(leaderSnapdir2.toString(), "noLink.sst"), + Paths.get(leaderCheckpointDir.toString(), "f1.sst"))); Assert.assertEquals(expectedNormalizedSet, normalizedSet); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java index d9c0c7be7f00..8ab652612f56 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java @@ -31,30 +31,42 @@ import static org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils.getINode; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; - +import static org.junit.jupiter.api.Assertions.assertFalse; /** - * Class to test annotation based interceptor that checks whether - * Ozone snapshot feature is enabled. + * Class to test snapshot utilities. */ public class TestOmSnapshotUtils { /** - * Check Aspect implementation with SnapshotFeatureEnabledUtil. + * Test linkFiles(). */ @Test - public void testLinkFiles(@TempDir File tempdir) throws Exception { - File dir1 = new File(tempdir, "tree1/dir1"); - File dir2 = new File(tempdir, "tree1/dir2"); - File tree1 = new File(tempdir, "tree1"); - File tree2 = new File(tempdir, "tree2"); + public void testLinkFiles(@TempDir File tempDir) throws Exception { + + // Create the tree to link from + File dir1 = new File(tempDir, "tree1/dir1"); + File dir2 = new File(tempDir, "tree1/dir2"); + File tree1 = new File(tempDir, "tree1"); assertTrue(dir1.mkdirs()); assertTrue(dir2.mkdirs()); - File f1 = new File(tempdir, "tree1/dir1/f1"); + File f1 = new File(tempDir, "tree1/dir1/f1"); Files.write(f1.toPath(), "dummyData".getBytes(UTF_8)); - File f1Link = new File(tempdir, "tree2/dir1/f1"); + + // Create pointers to expected files/links. + File tree2 = new File(tempDir, "tree2"); + File f1Link = new File(tempDir, "tree2/dir1/f1"); + + // Expected files/links shouldn't exist yet. + assertFalse(tree2.exists()); + assertFalse(f1Link.exists()); OmSnapshotUtils.linkFiles(tree1, tree2); + // Expected files/links should exist now. + assertTrue(tree2.exists()); + assertTrue(f1Link.exists()); + assertEquals(getINode(f1.toPath()), getINode(f1Link.toPath())); + Set tree1Files = Files.walk(tree1.toPath()). map(Path::toString). map((s) -> s.replace("tree1", "tree2")). @@ -63,6 +75,5 @@ public void testLinkFiles(@TempDir File tempdir) throws Exception { map(Path::toString).collect(Collectors.toSet()); assertEquals(tree1Files, tree2Files); - assertEquals(getINode(f1.toPath()), getINode(f1Link.toPath())); } } From 9e08d1a09766d646e68a16de6706440a5d6f4594 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 16:44:31 -0700 Subject: [PATCH 07/20] cleanup --- .../ozone/om/TestOmSnapshotManager.java | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 8f28eb988406..ca90673c355c 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -90,7 +90,7 @@ public class TestOmSnapshotManager { private File s1File; private File f1FileLink; private File f1File; - private File nolinkFile; + private File noLinkFile; @Before @@ -191,18 +191,18 @@ public void testCloseOnEviction() throws IOException { private void setupData() throws IOException { // Set up the leader with the following files: - // leader/db.checkpoints/dir1/f1.sst - // leader/db.snapshots/checkpointState/dir1/s1.sst + // leader/db.checkpoints/checkpoint1/f1.sst + // leader/db.snapshots/checkpointState/snap1/s1.sst // And the following links: - // leader/db.snapshots/checkpointState/dir2/ - // leader/db.snapshots/checkpointState/dir2/ + // leader/db.snapshots/checkpointState/snap2/ + // leader/db.snapshots/checkpointState/snap2/ // Set up the follower with the following files, (as if they came // from the tarball from the leader) // follower/cand/f1.sst - // follower/cand/db.snapshots/checkpointState/dir1/s1.sst + // follower/cand/db.snapshots/checkpointState/snap1/s1.sst // Note that the layout between leader and follower is slightly // different in that the f1.sst on the leader is in a checkpoint @@ -215,8 +215,8 @@ private void setupData() throws IOException { leaderDir = new File(testDir.toString(), "leader"); Assert.assertTrue(leaderDir.mkdirs()); - String pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"; - String pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir2"; + String pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "snap1"; + String pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "snap2"; leaderSnapdir1 = new File(leaderDir.toString(), pathSnap1); Assert.assertTrue(leaderSnapdir1.mkdirs()); Files.write(Paths.get(leaderSnapdir1.toString(), "s1.sst"), dummyData); @@ -237,14 +237,14 @@ private void setupData() throws IOException { leaderCheckpointDir = new File(leaderDir.toString(), - OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "dir1"); + OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "checkpoint1"); Assert.assertTrue(leaderCheckpointDir.mkdirs()); Files.write(Paths.get(leaderCheckpointDir.toString(), "f1.sst"), dummyData); s1FileLink = new File(followerSnapdir2, "s1.sst"); s1File = new File(followerSnapdir1, "s1.sst"); f1FileLink = new File(followerSnapdir2, "f1.sst"); f1File = new File(candidateDir, "f1.sst"); - nolinkFile = new File(followerSnapdir2, "noLink.sst"); + noLinkFile = new File(followerSnapdir2, "noLink.sst"); } @Test @@ -252,8 +252,8 @@ private void setupData() throws IOException { public void testHardLinkCreation() throws IOException { // test that following links are created on the follower - // follower/db.snapshots/checkpointState/dir2/f1.sst - // follower/db.snapshots/checkpointState/dir2/s1.sst + // follower/db.snapshots/checkpointState/snap2/f1.sst + // follower/db.snapshots/checkpointState/snap2/s1.sst // Create map of links to dummy files on the leader Map hardLinkFiles = new HashMap<>(); @@ -291,7 +291,7 @@ public void testFileUtilities() throws IOException { int truncateLength = candidateDir.toString().length() + 1; Set expectedSstFiles = new HashSet<>(Arrays.asList( s1File.toString().substring(truncateLength), - nolinkFile.toString().substring(truncateLength), + noLinkFile.toString().substring(truncateLength), f1File.toString().substring(truncateLength))); Assert.assertEquals(expectedSstFiles, existingSstFiles); @@ -307,12 +307,12 @@ public void testFileUtilities() throws IOException { @Test public void testProcessFile() { - Path copyFile = Paths.get(testDir.toString(), "dir1/copyfile.sst"); - Path excludeFile = Paths.get(testDir.toString(), "dir1/excludefile.sst"); - Path linkToExcludedFile = Paths.get(testDir.toString(), "dir2/excludefile.sst"); - Path linkToCopiedFile = Paths.get(testDir.toString(), "dir2/copyfile.sst"); - Path addToCopiedFiles = Paths.get(testDir.toString(), "dir1/copyfile2.sst"); - Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), "dir1/nonSst"); + Path copyFile = Paths.get(testDir.toString(), "snap1/copyfile.sst"); + Path excludeFile = Paths.get(testDir.toString(), "snap1/excludefile.sst"); + Path linkToExcludedFile = Paths.get(testDir.toString(), "snap2/excludefile.sst"); + Path linkToCopiedFile = Paths.get(testDir.toString(), "snap2/copyfile.sst"); + Path addToCopiedFiles = Paths.get(testDir.toString(), "snap1/copyfile2.sst"); + Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), "snap1/nonSst"); Set toExcludeFiles = new HashSet<>( Collections.singletonList(excludeFile)); From 93833f965224deed6c5760a56da7f1202fd51d98 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 17:04:45 -0700 Subject: [PATCH 08/20] cleanup --- .../ozone/om/TestOmSnapshotManager.java | 74 ++++++++++--------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index ca90673c355c..96e8b9542f96 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -73,7 +73,7 @@ import static org.mockito.Mockito.when; /** - * Unit test om snapshot manager. + * Unit test ozone snapshot manager. */ public class TestOmSnapshotManager { @@ -82,16 +82,15 @@ public class TestOmSnapshotManager { private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + SNAPSHOT_CANDIDATE_DIR; private File leaderDir; - private File leaderSnapdir1; - private File leaderSnapdir2; + private File leaderSnapDir1; + private File leaderSnapDir2; + private File followerSnapDir2; private File leaderCheckpointDir; private File candidateDir; private File s1FileLink; private File s1File; private File f1FileLink; private File f1File; - private File noLinkFile; - @Before public void init() throws Exception { @@ -193,6 +192,8 @@ private void setupData() throws IOException { // Set up the leader with the following files: // leader/db.checkpoints/checkpoint1/f1.sst // leader/db.snapshots/checkpointState/snap1/s1.sst + // leader/db.snapshots/checkpointState/snap2/noLink.sst + // leader/db.snapshots/checkpointState/snap2/nonSstFile // And the following links: // leader/db.snapshots/checkpointState/snap2/ @@ -201,66 +202,70 @@ private void setupData() throws IOException { // Set up the follower with the following files, (as if they came // from the tarball from the leader) - // follower/cand/f1.sst - // follower/cand/db.snapshots/checkpointState/snap1/s1.sst + // follower/om.db.candidate/f1.sst + // follower/om.db.candidate/db.snapshots/checkpointState/snap1/s1.sst // Note that the layout between leader and follower is slightly // different in that the f1.sst on the leader is in a checkpoint - // directory but on the follower is moved to the candidate omdb + // directory but on the follower is moved to the candidate // directory; the links must be adjusted accordingly. byte[] dummyData = {0}; - // Create dummy leader files to calculate links + // Create dummy leader files to calculate links. leaderDir = new File(testDir.toString(), "leader"); Assert.assertTrue(leaderDir.mkdirs()); String pathSnap1 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "snap1"; String pathSnap2 = OM_SNAPSHOT_CHECKPOINT_DIR + OM_KEY_PREFIX + "snap2"; - leaderSnapdir1 = new File(leaderDir.toString(), pathSnap1); - Assert.assertTrue(leaderSnapdir1.mkdirs()); - Files.write(Paths.get(leaderSnapdir1.toString(), "s1.sst"), dummyData); + leaderSnapDir1 = new File(leaderDir.toString(), pathSnap1); + Assert.assertTrue(leaderSnapDir1.mkdirs()); + Files.write(Paths.get(leaderSnapDir1.toString(), "s1.sst"), dummyData); - leaderSnapdir2 = new File(leaderDir.toString(), pathSnap2); - Assert.assertTrue(leaderSnapdir2.mkdirs()); - Files.write(Paths.get(leaderSnapdir2.toString(), "noLink.sst"), dummyData); - Files.write(Paths.get(leaderSnapdir2.toString(), "nonSstFile"), dummyData); + leaderSnapDir2 = new File(leaderDir.toString(), pathSnap2); + Assert.assertTrue(leaderSnapDir2.mkdirs()); + Files.write(Paths.get(leaderSnapDir2.toString(), "noLink.sst"), dummyData); + Files.write(Paths.get(leaderSnapDir2.toString(), "nonSstFile"), dummyData); - // Also create the follower files + // Also create the follower files. candidateDir = new File(testDir.toString(), CANDIDATE_DIR_NAME); - File followerSnapdir1 = new File(candidateDir.toString(), pathSnap1); - File followerSnapdir2 = new File(candidateDir.toString(), pathSnap2); + File followerSnapDir1 = new File(candidateDir.toString(), pathSnap1); + followerSnapDir2 = new File(candidateDir.toString(), pathSnap2); copyDirectory(leaderDir.toPath(), candidateDir.toPath()); - Files.write(Paths.get(candidateDir.toString(), "f1.sst"), dummyData); - + f1File = new File(candidateDir, "f1.sst"); + Files.write(f1File.toPath(), dummyData); + s1File = new File(followerSnapDir1, "s1.sst"); + // confirm s1 file got copied over. + Assert.assertTrue(s1File.exists()); + // Finish creating leaders files that are not to be copied over, because + // f1.sst belongs in a different directory as explained above. leaderCheckpointDir = new File(leaderDir.toString(), OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "checkpoint1"); Assert.assertTrue(leaderCheckpointDir.mkdirs()); Files.write(Paths.get(leaderCheckpointDir.toString(), "f1.sst"), dummyData); - s1FileLink = new File(followerSnapdir2, "s1.sst"); - s1File = new File(followerSnapdir1, "s1.sst"); - f1FileLink = new File(followerSnapdir2, "f1.sst"); - f1File = new File(candidateDir, "f1.sst"); - noLinkFile = new File(followerSnapdir2, "noLink.sst"); + + // Pointers to follower files to be created. + s1FileLink = new File(followerSnapDir2, "s1.sst"); + f1FileLink = new File(followerSnapDir2, "f1.sst"); } @Test @SuppressFBWarnings({"NP_NULL_ON_SOME_PATH"}) public void testHardLinkCreation() throws IOException { - // test that following links are created on the follower + // test that following links are created on the Follower: // follower/db.snapshots/checkpointState/snap2/f1.sst // follower/db.snapshots/checkpointState/snap2/s1.sst // Create map of links to dummy files on the leader Map hardLinkFiles = new HashMap<>(); - hardLinkFiles.put(Paths.get(leaderSnapdir2.toString(), "f1.sst"), + hardLinkFiles.put(Paths.get(leaderSnapDir2.toString(), "f1.sst"), Paths.get(leaderCheckpointDir.toString(), "f1.sst")); - hardLinkFiles.put(Paths.get(leaderSnapdir2.toString(), "s1.sst"), - Paths.get(leaderSnapdir1.toString(), "s1.sst")); + hardLinkFiles.put(Paths.get(leaderSnapDir2.toString(), "s1.sst"), + Paths.get(leaderSnapDir1.toString(), "s1.sst")); // Create link list. Path hardLinkList = @@ -285,6 +290,7 @@ public void testHardLinkCreation() throws IOException { @Test public void testFileUtilities() throws IOException { + File noLinkFile = new File(followerSnapDir2, "noLink.sst"); List excludeList = getExistingSstFiles(candidateDir); Set existingSstFiles = new HashSet<>(excludeList); @@ -299,8 +305,8 @@ public void testFileUtilities() throws IOException { OMDBCheckpointServlet.normalizeExcludeList(excludeList, leaderCheckpointDir.toString(), leaderDir.toString()); Set expectedNormalizedSet = new HashSet<>(Arrays.asList( - Paths.get(leaderSnapdir1.toString(), "s1.sst"), - Paths.get(leaderSnapdir2.toString(), "noLink.sst"), + Paths.get(leaderSnapDir1.toString(), "s1.sst"), + Paths.get(leaderSnapDir2.toString(), "noLink.sst"), Paths.get(leaderCheckpointDir.toString(), "f1.sst"))); Assert.assertEquals(expectedNormalizedSet, normalizedSet); } @@ -308,8 +314,8 @@ public void testFileUtilities() throws IOException { @Test public void testProcessFile() { Path copyFile = Paths.get(testDir.toString(), "snap1/copyfile.sst"); - Path excludeFile = Paths.get(testDir.toString(), "snap1/excludefile.sst"); - Path linkToExcludedFile = Paths.get(testDir.toString(), "snap2/excludefile.sst"); + Path excludeFile = Paths.get(testDir.toString(), "snap1/excludeFile.sst"); + Path linkToExcludedFile = Paths.get(testDir.toString(), "snap2/excludeFile.sst"); Path linkToCopiedFile = Paths.get(testDir.toString(), "snap2/copyfile.sst"); Path addToCopiedFiles = Paths.get(testDir.toString(), "snap1/copyfile2.sst"); Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), "snap1/nonSst"); From 15353d6d095885fd8fb3e17922d176fe5caf74b3 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 17:33:06 -0700 Subject: [PATCH 09/20] cleanup --- .../ozone/om/TestOmSnapshotManager.java | 63 ++++++++++++------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index 96e8b9542f96..fc4f5510f834 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -87,9 +87,7 @@ public class TestOmSnapshotManager { private File followerSnapDir2; private File leaderCheckpointDir; private File candidateDir; - private File s1FileLink; private File s1File; - private File f1FileLink; private File f1File; @Before @@ -195,20 +193,19 @@ private void setupData() throws IOException { // leader/db.snapshots/checkpointState/snap2/noLink.sst // leader/db.snapshots/checkpointState/snap2/nonSstFile - // And the following links: - // leader/db.snapshots/checkpointState/snap2/ - // leader/db.snapshots/checkpointState/snap2/ - // Set up the follower with the following files, (as if they came // from the tarball from the leader) // follower/om.db.candidate/f1.sst // follower/om.db.candidate/db.snapshots/checkpointState/snap1/s1.sst + // follower/om.db.candidate/db.snapshots/checkpointState/snap2/noLink.sst + // follower/om.db.candidate/db.snapshots/checkpointState/snap2/nonSstFile // Note that the layout between leader and follower is slightly - // different in that the f1.sst on the leader is in a checkpoint - // directory but on the follower is moved to the candidate - // directory; the links must be adjusted accordingly. + // different in that the f1.sst on the leader is in the + // db.checkpoints/checkpoint1 directory but on the follower is + // moved to the om.db.candidate directory; the links must be adjusted + // accordingly. byte[] dummyData = {0}; @@ -246,28 +243,28 @@ private void setupData() throws IOException { OM_CHECKPOINT_DIR + OM_KEY_PREFIX + "checkpoint1"); Assert.assertTrue(leaderCheckpointDir.mkdirs()); Files.write(Paths.get(leaderCheckpointDir.toString(), "f1.sst"), dummyData); - - // Pointers to follower files to be created. - s1FileLink = new File(followerSnapDir2, "s1.sst"); - f1FileLink = new File(followerSnapDir2, "f1.sst"); } + /* + * Create map of links to files on the leader: + * leader/db.snapshots/checkpointState/snap2/ + * leader/db.snapshots/checkpointState/snap2/ + * and test that corresponding links are created on the Follower: + * follower/db.snapshots/checkpointState/snap2/f1.sst + * follower/db.snapshots/checkpointState/snap2/s1.sst + */ @Test @SuppressFBWarnings({"NP_NULL_ON_SOME_PATH"}) public void testHardLinkCreation() throws IOException { - // test that following links are created on the Follower: - // follower/db.snapshots/checkpointState/snap2/f1.sst - // follower/db.snapshots/checkpointState/snap2/s1.sst - - // Create map of links to dummy files on the leader + // Map of links to files on the leader Map hardLinkFiles = new HashMap<>(); hardLinkFiles.put(Paths.get(leaderSnapDir2.toString(), "f1.sst"), Paths.get(leaderCheckpointDir.toString(), "f1.sst")); hardLinkFiles.put(Paths.get(leaderSnapDir2.toString(), "s1.sst"), Paths.get(leaderSnapDir1.toString(), "s1.sst")); - // Create link list. + // Create link list from leader map. Path hardLinkList = OmSnapshotUtils.createHardLinkList( leaderDir.toString().length() + 1, hardLinkFiles); @@ -275,6 +272,10 @@ public void testHardLinkCreation() throws IOException { Files.move(hardLinkList, Paths.get(candidateDir.toString(), OM_HARDLINK_FILE)); + // Pointers to follower links to be created. + File f1FileLink = new File(followerSnapDir2, "f1.sst"); + File s1FileLink = new File(followerSnapDir2, "s1.sst"); + // Create links on the follower from list. OmSnapshotUtils.createHardLinks(candidateDir.toPath()); @@ -288,12 +289,16 @@ public void testHardLinkCreation() throws IOException { getINode(f1File.toPath()), getINode(f1FileLink.toPath())); } + /* + * Test that exclude list is generated correctly. + */ @Test - public void testFileUtilities() throws IOException { + public void testExcludeUtilities() throws IOException { File noLinkFile = new File(followerSnapDir2, "noLink.sst"); - List excludeList = getExistingSstFiles(candidateDir); - Set existingSstFiles = new HashSet<>(excludeList); + // Confirm that the list of existing sst files is as expected. + List existingSstList = getExistingSstFiles(candidateDir); + Set existingSstFiles = new HashSet<>(existingSstList); int truncateLength = candidateDir.toString().length() + 1; Set expectedSstFiles = new HashSet<>(Arrays.asList( s1File.toString().substring(truncateLength), @@ -301,8 +306,10 @@ public void testFileUtilities() throws IOException { f1File.toString().substring(truncateLength))); Assert.assertEquals(expectedSstFiles, existingSstFiles); + // Confirm that the excluded list is normalized as expected. + // (Normalizing means matches the layout on the leader.) Set normalizedSet = - OMDBCheckpointServlet.normalizeExcludeList(excludeList, + OMDBCheckpointServlet.normalizeExcludeList(existingSstList, leaderCheckpointDir.toString(), leaderDir.toString()); Set expectedNormalizedSet = new HashSet<>(Arrays.asList( Paths.get(leaderSnapDir1.toString(), "s1.sst"), @@ -311,6 +318,10 @@ public void testFileUtilities() throws IOException { Assert.assertEquals(expectedNormalizedSet, normalizedSet); } + /* + * Confirm that processFile() correctly determines whether a file + * should be copied, linked, or excluded from the tarball entirely. + */ @Test public void testProcessFile() { Path copyFile = Paths.get(testDir.toString(), "snap1/copyfile.sst"); @@ -326,6 +337,8 @@ public void testProcessFile() { List excluded = new ArrayList<>(); Map hardLinkFiles = new HashMap<>(); + // Confirm the exclude file gets added to the excluded list, + // (and thus is excluded.) processFile(excludeFile, copyFiles, hardLinkFiles, toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 1); Assert.assertEquals((excluded.get(0)), excludeFile.toString()); @@ -333,6 +346,7 @@ public void testProcessFile() { Assert.assertEquals(hardLinkFiles.size(), 0); excluded = new ArrayList<>(); + // Confirm the linkToExcludedFile gets added as a link. processFile(linkToExcludedFile, copyFiles, hardLinkFiles, toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 0); @@ -341,6 +355,7 @@ public void testProcessFile() { Assert.assertEquals(hardLinkFiles.get(linkToExcludedFile), excludeFile); hardLinkFiles = new HashMap<>(); + // Confirm the linkToCopiedFile gets added as a link. processFile(linkToCopiedFile, copyFiles, hardLinkFiles, toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 0); @@ -349,6 +364,7 @@ public void testProcessFile() { Assert.assertEquals(hardLinkFiles.get(linkToCopiedFile), copyFile); hardLinkFiles = new HashMap<>(); + // Confirm the addToCopiedFiles gets added to list of copied files processFile(addToCopiedFiles, copyFiles, hardLinkFiles, toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 0); @@ -356,6 +372,7 @@ public void testProcessFile() { Assert.assertTrue(copyFiles.contains(addToCopiedFiles)); copyFiles = new HashSet<>(Collections.singletonList(copyFile)); + // Confirm the addNonSstToCopiedFiles gets added to list of copied files processFile(addNonSstToCopiedFiles, copyFiles, hardLinkFiles, toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 0); From 4f04710bb9c26eba56a0cd860617d4685bf677f4 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Tue, 23 May 2023 23:08:23 -0700 Subject: [PATCH 10/20] cleanup again --- .../ozone/om/TestOMDbCheckpointServlet.java | 4 +- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 44 +++++++++++-------- .../ozone/om/OMDBCheckpointServlet.java | 9 ++-- .../ozone/om/TestOmSnapshotManager.java | 27 +++++++----- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java index e3ab2ef30697..d9ebcfd3a268 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java @@ -479,8 +479,8 @@ private void prepSnapshotData() throws Exception { new File(snapshotDirName).getParent(), "fabricatedSnapshot"); fabricatedSnapshot.toFile().mkdirs(); - Assert.assertTrue(Paths.get(fabricatedSnapshot.toString(), FABRICATED_FILE_NAME) - .toFile().createNewFile()); + Assert.assertTrue(Paths.get(fabricatedSnapshot.toString(), + FABRICATED_FILE_NAME).toFile().createNewFile()); // Create fabricated links to snapshot dirs // to confirm that links are recognized even if diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 93e8a4ec044c..f909598c687a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -199,7 +199,8 @@ public void testInstallSnapshot(int numSnapshotsToCreate) throws Exception { String snapshotName = ""; List keys = new ArrayList<>(); SnapshotInfo snapshotInfo = null; - for (int snapshotCount = 0; snapshotCount < numSnapshotsToCreate; snapshotCount++) { + for (int snapshotCount = 0; snapshotCount < numSnapshotsToCreate; + snapshotCount++) { snapshotName = snapshotNamePrefix + snapshotCount; keys = writeKeys(keyIncrement); snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName); @@ -280,14 +281,16 @@ public void testInstallSnapshot(int numSnapshotsToCreate) throws Exception { checkSnapshot(leaderOM, followerOM, snapshotName, keys, snapshotInfo); } - private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, String snapshotName, - List keys, SnapshotInfo snapshotInfo) + private void checkSnapshot(OzoneManager leaderOM, OzoneManager followerOM, + String snapshotName, + List keys, SnapshotInfo snapshotInfo) throws IOException { // Read back data from snapshot. OmKeyArgs omKeyArgs = new OmKeyArgs.Builder() .setVolumeName(volumeName) .setBucketName(bucketName) - .setKeyName(".snapshot/" + snapshotName + "/" + keys.get(keys.size() - 1)).build(); + .setKeyName(".snapshot/" + snapshotName + "/" + + keys.get(keys.size() - 1)).build(); OmKeyInfo omKeyInfo; omKeyInfo = followerOM.lookupKey(omKeyArgs); Assertions.assertNotNull(omKeyInfo); @@ -392,7 +395,7 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) // The recently started OM should be lagging behind the leader OM. // Wait & for follower to update transactions to leader snapshot index. - // Timeout error if follower does not load update within 10s + // Timeout error if follower does not load update within 30s GenericTestUtils.waitFor(() -> { return followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1; @@ -448,24 +451,24 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) assertEquals(0, filesInCandidate.length); checkSnapshot(leaderOM, followerOM, "snap80", firstKeys, snapshotInfo2); - checkSnapshot(leaderOM, followerOM, "snap160", firstIncrement.keys, firstIncrement.snapshotInfo); - checkSnapshot(leaderOM, followerOM, "snap240", secondIncrement.keys, secondIncrement.snapshotInfo); - Assertions.assertEquals(followerOM.getOmSnapshotProvider().getInitCount(), 2, + checkSnapshot(leaderOM, followerOM, "snap160", firstIncrement.keys, + firstIncrement.snapshotInfo); + checkSnapshot(leaderOM, followerOM, "snap240", secondIncrement.keys, + secondIncrement.snapshotInfo); + Assertions.assertEquals( + followerOM.getOmSnapshotProvider().getInitCount(), 2, "Only initialized twice"); } - private static class TestResults { + static class TestResults { SnapshotInfo snapshotInfo; List keys; } - private TestResults getNextIncrementalTarball(int numKeys, - int expectedNumDownloads, - OzoneManager leaderOM, - OzoneManagerRatisServer leaderRatisServer, - FaultInjector faultInjector, - OzoneManager followerOM, - Path tempDir) + private TestResults getNextIncrementalTarball( + int numKeys, int expectedNumDownloads, + OzoneManager leaderOM, OzoneManagerRatisServer leaderRatisServer, + FaultInjector faultInjector, OzoneManager followerOM, Path tempDir) throws IOException, InterruptedException, TimeoutException { TestResults tr = new TestResults(); @@ -494,7 +497,8 @@ private TestResults getNextIncrementalTarball(int numKeys, followerOM.getOmSnapshotProvider().getNumDownloaded() == expectedNumDownloads, 1000, 10000); - assertTrue(followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex() + assertTrue(followerOM.getOmRatisServer(). + getLastAppliedTermIndex().getIndex() >= leaderOMSnapshotIndex - 1); // Now confirm tarball is just incremental and contains no unexpected @@ -506,7 +510,8 @@ private TestResults getNextIncrementalTarball(int numKeys, Path followerCandidatePath = followerOM.getOmSnapshotProvider(). getCandidateDir().toPath(); - // Confirm that none of the files in the tarball match one in the candidate dir. + // Confirm that none of the files in the tarball match one in the + // candidate dir. assertTrue(sstFiles.size() > 0); for (String s: sstFiles) { File sstFile = Paths.get(followerCandidatePath.toString(), s).toFile(); @@ -522,7 +527,8 @@ private TestResults getNextIncrementalTarball(int numKeys, for (String line: lines.collect(Collectors.toList())) { lineCount++; String link = line.split("\t")[0]; - File linkFile = Paths.get(followerCandidatePath.toString(), link).toFile(); + File linkFile = Paths.get( + followerCandidatePath.toString(), link).toFile(); assertFalse(linkFile.exists(), "Incremental checkpoint should not " + "duplicate existing links"); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index ad1417233481..8ef32ed9f56c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -181,8 +181,8 @@ private void getFilesForArchive(DBCheckpoint checkpoint, // Get the active fs files. Path dir = checkpoint.getCheckpointLocation(); - processDir(dir, copyFiles, hardLinkFiles, toExcludeFiles, new HashSet<>(), excluded); - + processDir(dir, copyFiles, hardLinkFiles, toExcludeFiles, + new HashSet<>(), excluded); if (!includeSnapshotData) { return; @@ -192,7 +192,8 @@ private void getFilesForArchive(DBCheckpoint checkpoint, Set snapshotPaths = waitForSnapshotDirs(checkpoint); Path snapshotDir = Paths.get(OMStorage.getOmDbDir(getConf()).toString(), OM_SNAPSHOT_DIR); - processDir(snapshotDir, copyFiles, hardLinkFiles, toExcludeFiles, snapshotPaths, excluded); + processDir(snapshotDir, copyFiles, hardLinkFiles, toExcludeFiles, + snapshotPaths, excluded); } /** @@ -300,7 +301,7 @@ public static void processFile(Path file, Set copyFiles, copyFiles.add(file); } } - } else {// Not sst file. + } else { // Not sst file. copyFiles.add(file); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index fc4f5510f834..cf6e4d54d62e 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -80,7 +80,7 @@ public class TestOmSnapshotManager { private OzoneManager om; private File testDir; private static final String CANDIDATE_DIR_NAME = OM_DB_NAME + - SNAPSHOT_CANDIDATE_DIR; + SNAPSHOT_CANDIDATE_DIR; private File leaderDir; private File leaderSnapDir1; private File leaderSnapDir2; @@ -324,12 +324,18 @@ public void testExcludeUtilities() throws IOException { */ @Test public void testProcessFile() { - Path copyFile = Paths.get(testDir.toString(), "snap1/copyfile.sst"); - Path excludeFile = Paths.get(testDir.toString(), "snap1/excludeFile.sst"); - Path linkToExcludedFile = Paths.get(testDir.toString(), "snap2/excludeFile.sst"); - Path linkToCopiedFile = Paths.get(testDir.toString(), "snap2/copyfile.sst"); - Path addToCopiedFiles = Paths.get(testDir.toString(), "snap1/copyfile2.sst"); - Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), "snap1/nonSst"); + Path copyFile = Paths.get(testDir.toString(), + "snap1/copyfile.sst"); + Path excludeFile = Paths.get(testDir.toString(), + "snap1/excludeFile.sst"); + Path linkToExcludedFile = Paths.get(testDir.toString(), + "snap2/excludeFile.sst"); + Path linkToCopiedFile = Paths.get(testDir.toString(), + "snap2/copyfile.sst"); + Path addToCopiedFiles = Paths.get(testDir.toString(), + "snap1/copyfile2.sst"); + Path addNonSstToCopiedFiles = Paths.get(testDir.toString(), + "snap1/nonSst"); Set toExcludeFiles = new HashSet<>( Collections.singletonList(excludeFile)); @@ -339,7 +345,8 @@ public void testProcessFile() { // Confirm the exclude file gets added to the excluded list, // (and thus is excluded.) - processFile(excludeFile, copyFiles, hardLinkFiles, toExcludeFiles, excluded); + processFile(excludeFile, copyFiles, hardLinkFiles, toExcludeFiles, + excluded); Assert.assertEquals(excluded.size(), 1); Assert.assertEquals((excluded.get(0)), excludeFile.toString()); Assert.assertEquals(copyFiles.size(), 1); @@ -373,8 +380,8 @@ public void testProcessFile() { copyFiles = new HashSet<>(Collections.singletonList(copyFile)); // Confirm the addNonSstToCopiedFiles gets added to list of copied files - processFile(addNonSstToCopiedFiles, copyFiles, hardLinkFiles, toExcludeFiles, - excluded); + processFile(addNonSstToCopiedFiles, copyFiles, hardLinkFiles, + toExcludeFiles, excluded); Assert.assertEquals(excluded.size(), 0); Assert.assertEquals(copyFiles.size(), 2); Assert.assertTrue(copyFiles.contains(addNonSstToCopiedFiles)); From 0785a84d0eea3755b781dab177ce2c4c67136aa4 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Wed, 24 May 2023 12:12:55 -0700 Subject: [PATCH 11/20] checkstyle --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 3 +-- .../org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index ad2b44a8034f..f2414d54088e 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -54,7 +54,6 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.io.FilenameFilter; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -374,7 +373,7 @@ public static List getExistingSstFiles(File db) throws IOException { int truncateLength = db.toString().length() + 1; // Walk the db dir and get all sst files including omSnapshot files. - try(Stream files = Files.walk(db.toPath())) { + try (Stream files = Files.walk(db.toPath())) { sstList = files.filter(path -> path.toString().endsWith(ROCKSDB_SST_SUFFIX)). map(p -> p.toString().substring(truncateLength)). diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index c32596b8aabb..44a41eb8e9c2 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -47,7 +47,6 @@ import java.io.OutputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; From 0a178e6d1b2f63f5294c80b4c67c40bb30d7757c Mon Sep 17 00:00:00 2001 From: George Jahad Date: Wed, 24 May 2023 12:52:51 -0700 Subject: [PATCH 12/20] intellij warnings --- .../hdds/utils/TestRDBSnapshotProvider.java | 18 +++--- .../ozone/om/TestOMDbCheckpointServlet.java | 6 +- .../hadoop/ozone/om/TestOMRatisSnapshots.java | 59 +++++++++++-------- .../ozone/om/OMDBCheckpointServlet.java | 4 +- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index 44a41eb8e9c2..d3f7fc9cf52e 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -57,9 +57,9 @@ import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream; import static org.apache.hadoop.hdds.utils.db.TestRDBStore.newRDBStore; -import static org.junit.Assert.assertFalse; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -100,7 +100,7 @@ public void init(@TempDir File tempDir) throws Exception { MAX_DB_UPDATES_SIZE_THRESHOLD); rdbSnapshotProvider = new RDBSnapshotProvider(testDir, "test.db") { @Override - public void close() throws IOException { + public void close() { } @Override @@ -241,8 +241,8 @@ public void insertRandomData(RDBStore dbStore, int familyIndex) @Test public void testCheckLeaderConsistent() throws IOException { - // Leader inited to null at startup. - assertTrue(rdbSnapshotProvider.getInitCount() == 1); + // Leader initialized to null at startup. + assertEquals(1, rdbSnapshotProvider.getInitCount()); File dummyFile = new File(rdbSnapshotProvider.getCandidateDir(), "file1.sst"); Files.write(dummyFile.toPath(), @@ -251,15 +251,15 @@ public void testCheckLeaderConsistent() throws IOException { // Set the leader. rdbSnapshotProvider.checkLeaderConsistent("node1"); - assertTrue(rdbSnapshotProvider.getInitCount() == 2); + assertEquals(2, rdbSnapshotProvider.getInitCount()); assertFalse(dummyFile.exists()); - // Confirm setting the same leader doesn't reinit. + // Confirm setting the same leader doesn't reinitialize. rdbSnapshotProvider.checkLeaderConsistent("node1"); - assertTrue(rdbSnapshotProvider.getInitCount() == 2); + assertEquals(2, rdbSnapshotProvider.getInitCount()); - // Confirm setting different leader does reinit. + // Confirm setting different leader does reinitialize. rdbSnapshotProvider.checkLeaderConsistent("node2"); - assertTrue(rdbSnapshotProvider.getInitCount() == 3); + assertEquals(3, rdbSnapshotProvider.getInitCount()); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java index d9ebcfd3a268..ade5196304bd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java @@ -323,7 +323,7 @@ public void testWriteDbDataToStream() throws Exception { String newDbDirName = testDirName + OM_KEY_PREFIX + OM_DB_NAME; int newDbDirLength = newDbDirName.length() + 1; File newDbDir = new File(newDbDirName); - newDbDir.mkdirs(); + Assert.assertTrue(newDbDir.mkdirs()); FileUtil.unTar(tempFile, newDbDir); // Move snapshot dir to correct location. @@ -478,13 +478,13 @@ private void prepSnapshotData() throws Exception { Path fabricatedSnapshot = Paths.get( new File(snapshotDirName).getParent(), "fabricatedSnapshot"); - fabricatedSnapshot.toFile().mkdirs(); + Assert.assertTrue(fabricatedSnapshot.toFile().mkdirs()); Assert.assertTrue(Paths.get(fabricatedSnapshot.toString(), FABRICATED_FILE_NAME).toFile().createNewFile()); // Create fabricated links to snapshot dirs // to confirm that links are recognized even if - // they are don't point to the checkpoint directory. + // they don't point to the checkpoint directory. Path fabricatedFile = Paths.get(snapshotDirName, FABRICATED_FILE_NAME); Path fabricatedLink = Paths.get(snapshotDirName2, FABRICATED_FILE_NAME); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index f909598c687a..5b358f8131d3 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -85,6 +85,7 @@ import static org.apache.hadoop.ozone.om.OmSnapshotManager.OM_HARDLINK_FILE; import static org.apache.hadoop.ozone.om.OmSnapshotManager.getSnapshotPath; import static org.apache.hadoop.ozone.om.TestOzoneManagerHAWithData.createKey; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -377,9 +378,9 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) }, 1000, 10000); // Get two incremental tarballs, adding new keys/snapshot for each. - TestResults firstIncrement = getNextIncrementalTarball(160, 2, leaderOM, + IncrementData firstIncrement = getNextIncrementalTarball(160, 2, leaderOM, leaderRatisServer, faultInjector, followerOM, tempDir); - TestResults secondIncrement = getNextIncrementalTarball(240, 3, leaderOM, + IncrementData secondIncrement = getNextIncrementalTarball(240, 3, leaderOM, leaderRatisServer, faultInjector, followerOM, tempDir); // Resume the follower thread, it would download the incremental snapshot. @@ -415,12 +416,12 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } - for (String key : firstIncrement.keys) { + for (String key : firstIncrement.getKeys()) { assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } - for (String key : secondIncrement.keys) { + for (String key : secondIncrement.getKeys()) { assertNotNull(followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } @@ -451,26 +452,32 @@ public void testInstallIncrementalSnapshot(@TempDir Path tempDir) assertEquals(0, filesInCandidate.length); checkSnapshot(leaderOM, followerOM, "snap80", firstKeys, snapshotInfo2); - checkSnapshot(leaderOM, followerOM, "snap160", firstIncrement.keys, - firstIncrement.snapshotInfo); - checkSnapshot(leaderOM, followerOM, "snap240", secondIncrement.keys, - secondIncrement.snapshotInfo); + checkSnapshot(leaderOM, followerOM, "snap160", firstIncrement.getKeys(), + firstIncrement.getSnapshotInfo()); + checkSnapshot(leaderOM, followerOM, "snap240", secondIncrement.getKeys(), + secondIncrement.getSnapshotInfo()); Assertions.assertEquals( followerOM.getOmSnapshotProvider().getInitCount(), 2, "Only initialized twice"); } - static class TestResults { - SnapshotInfo snapshotInfo; - List keys; + static class IncrementData { + private List keys; + private SnapshotInfo snapshotInfo; + public List getKeys() { + return keys; + } + public SnapshotInfo getSnapshotInfo() { + return snapshotInfo; + } } - private TestResults getNextIncrementalTarball( + private IncrementData getNextIncrementalTarball( int numKeys, int expectedNumDownloads, OzoneManager leaderOM, OzoneManagerRatisServer leaderRatisServer, FaultInjector faultInjector, OzoneManager followerOM, Path tempDir) throws IOException, InterruptedException, TimeoutException { - TestResults tr = new TestResults(); + IncrementData tr = new IncrementData(); // Get the latest db checkpoint from the leader OM. TransactionInfo transactionInfo = @@ -504,7 +511,7 @@ private TestResults getNextIncrementalTarball( // Now confirm tarball is just incremental and contains no unexpected // files/links. Path increment = Paths.get(tempDir.toString(), "increment" + numKeys); - increment.toFile().mkdirs(); + assertTrue(increment.toFile().mkdirs()); unTarLatestTarBall(followerOM, increment); List sstFiles = HAUtils.getExistingSstFiles(increment.toFile()); Path followerCandidatePath = followerOM.getOmSnapshotProvider(). @@ -739,25 +746,25 @@ public void testInstallSnapshotWithClientWrite() throws Exception { // Verify that the follower OM's DB contains the transactions which were // made while it was inactive. OMMetadataManager followerOMMetaMgr = followerOM.getMetadataManager(); - Assert.assertNotNull(followerOMMetaMgr.getVolumeTable().get( + assertNotNull(followerOMMetaMgr.getVolumeTable().get( followerOMMetaMgr.getVolumeKey(volumeName))); - Assert.assertNotNull(followerOMMetaMgr.getBucketTable().get( + assertNotNull(followerOMMetaMgr.getBucketTable().get( followerOMMetaMgr.getBucketKey(volumeName, bucketName))); for (String key : keys) { - Assert.assertNotNull(followerOMMetaMgr.getKeyTable( + assertNotNull(followerOMMetaMgr.getKeyTable( TEST_BUCKET_LAYOUT) .get(followerOMMetaMgr.getOzoneKey(volumeName, bucketName, key))); } OMMetadataManager leaderOmMetaMgr = leaderOM.getMetadataManager(); for (String key : newKeys) { - Assert.assertNotNull(leaderOmMetaMgr.getKeyTable( + assertNotNull(leaderOmMetaMgr.getKeyTable( TEST_BUCKET_LAYOUT) .get(followerOMMetaMgr.getOzoneKey(volumeName, bucketName, key))); } Thread.sleep(5000); followerOMMetaMgr = followerOM.getMetadataManager(); for (String key : newKeys) { - Assert.assertNotNull(followerOMMetaMgr.getKeyTable( + assertNotNull(followerOMMetaMgr.getKeyTable( TEST_BUCKET_LAYOUT) .get(followerOMMetaMgr.getOzoneKey(volumeName, bucketName, key))); } @@ -807,7 +814,7 @@ public void testInstallSnapshotWithClientRead() throws Exception { getKeys(keys, 10); readKeys(keys); } catch (IOException e) { - Fail.fail("Read Key failed", e); + assertTrue(Fail.fail("Read Key failed", e)); } return null; }); @@ -838,12 +845,12 @@ public void testInstallSnapshotWithClientRead() throws Exception { // Verify that the follower OM's DB contains the transactions which were // made while it was inactive. OMMetadataManager followerOMMetaMngr = followerOM.getMetadataManager(); - Assert.assertNotNull(followerOMMetaMngr.getVolumeTable().get( + assertNotNull(followerOMMetaMngr.getVolumeTable().get( followerOMMetaMngr.getVolumeKey(volumeName))); - Assert.assertNotNull(followerOMMetaMngr.getBucketTable().get( + assertNotNull(followerOMMetaMngr.getBucketTable().get( followerOMMetaMngr.getBucketKey(volumeName, bucketName))); for (String key : keys) { - Assert.assertNotNull(followerOMMetaMngr.getKeyTable( + assertNotNull(followerOMMetaMngr.getKeyTable( TEST_BUCKET_LAYOUT) .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key))); } @@ -904,7 +911,7 @@ public void testInstallOldCheckpointFailure() throws Exception { assertLogCapture(logCapture, errorMsg); Assert.assertNull("OM installed checkpoint even though checkpoint " + "logIndex is less than it's lastAppliedIndex", newTermIndex); - Assert.assertEquals(followerTermIndex, + assertEquals(followerTermIndex, followerRatisServer.getLastAppliedTermIndex()); String msg = "OM DB is not stopped. Started services with Term: " + followerTermIndex.getTerm() + " and Index: " + @@ -1016,7 +1023,7 @@ private void getKeys(List keys, int round) throws IOException { while (round > 0) { for (String keyName : keys) { OzoneKeyDetails key = ozoneBucket.getKey(keyName); - Assert.assertEquals(keyName, key.getName()); + assertEquals(keyName, key.getName()); } round--; } @@ -1089,7 +1096,7 @@ public void resume() throws IOException { ready.await(); } catch (InterruptedException e) { e.printStackTrace(); - Fail.fail("resume interrupted"); + assertTrue(Fail.fail("resume interrupted")); } wait.countDown(); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 8ef32ed9f56c..9e3a1e3d9c87 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -75,7 +75,7 @@ * * If Kerberos is enabled, the principal should be appended to * `ozone.administrator`, e.g. `scm/scm@EXAMPLE.COM` - * If Kerberos is not enabled, simply append the login user name to + * If Kerberos is not enabled, simply append the login username to * `ozone.administrator`, e.g. `scm` */ public class OMDBCheckpointServlet extends DBCheckpointServlet @@ -283,7 +283,7 @@ public static void processFile(Path file, Set copyFiles, } else { Path fileNamePath = file.getFileName(); if (fileNamePath == null) { - throw new NullPointerException("Bad file: " + file.toString()); + throw new NullPointerException("Bad file: " + file); } String fileName = fileNamePath.toString(); if (fileName.endsWith(ROCKSDB_SST_SUFFIX)) { From 5b72c9e37ac3f80e010b05dda7c1cd1e4e6fda09 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Wed, 24 May 2023 13:26:19 -0700 Subject: [PATCH 13/20] cleanup --- .../org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 5b358f8131d3..285eb7fc71a9 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -477,7 +477,7 @@ private IncrementData getNextIncrementalTarball( OzoneManager leaderOM, OzoneManagerRatisServer leaderRatisServer, FaultInjector faultInjector, OzoneManager followerOM, Path tempDir) throws IOException, InterruptedException, TimeoutException { - IncrementData tr = new IncrementData(); + IncrementData id = new IncrementData(); // Get the latest db checkpoint from the leader OM. TransactionInfo transactionInfo = @@ -488,10 +488,10 @@ private IncrementData getNextIncrementalTarball( long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex(); // Do some transactions, let leader OM take a new snapshot and purge the // old logs, so that follower must download the new increment. - tr.keys = writeKeysToIncreaseLogIndex(leaderRatisServer, + id.keys = writeKeysToIncreaseLogIndex(leaderRatisServer, numKeys); - tr.snapshotInfo = createOzoneSnapshot(leaderOM, "snap" + numKeys); + id.snapshotInfo = createOzoneSnapshot(leaderOM, "snap" + numKeys); // Resume the follower thread, it would download the incremental snapshot. faultInjector.resume(); @@ -542,7 +542,7 @@ private IncrementData getNextIncrementalTarball( } assertTrue(lineCount > 0); } - return tr; + return id; } @Test From 5772f0a2ae7a16bca37f5c2e3705d7cd356fa792 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Wed, 24 May 2023 13:30:51 -0700 Subject: [PATCH 14/20] cleanup --- .../java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java index cf6e4d54d62e..2d0a2498e882 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java @@ -224,7 +224,6 @@ private void setupData() throws IOException { Files.write(Paths.get(leaderSnapDir2.toString(), "noLink.sst"), dummyData); Files.write(Paths.get(leaderSnapDir2.toString(), "nonSstFile"), dummyData); - // Also create the follower files. candidateDir = new File(testDir.toString(), CANDIDATE_DIR_NAME); From f758ab2cd96d92d2d73d51286bdd3c87840bceec Mon Sep 17 00:00:00 2001 From: George Jahad Date: Wed, 24 May 2023 15:52:31 -0700 Subject: [PATCH 15/20] checkstyle --- .../org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java index 285eb7fc71a9..cbe3575124c1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java @@ -49,7 +49,6 @@ import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.server.protocol.TermIndex; import org.assertj.core.api.Fail; -import org.junit.Assert; import org.junit.Ignore; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -909,8 +908,9 @@ public void testInstallOldCheckpointFailure() throws Exception { "TermIndex " + followerTermIndex + " and checkpoint has lower " + "TermIndex"; assertLogCapture(logCapture, errorMsg); - Assert.assertNull("OM installed checkpoint even though checkpoint " + - "logIndex is less than it's lastAppliedIndex", newTermIndex); + assertNull(newTermIndex, + "OM installed checkpoint even though checkpoint " + + "logIndex is less than it's lastAppliedIndex"); assertEquals(followerTermIndex, followerRatisServer.getLastAppliedTermIndex()); String msg = "OM DB is not stopped. Started services with Term: " + From 54aed002dc5d280cbd3065dea14a6454f0b860aa Mon Sep 17 00:00:00 2001 From: GeorgeJahad Date: Thu, 15 Jun 2023 13:38:52 -0700 Subject: [PATCH 16/20] Apply suggestions from code review Co-authored-by: Siyao Meng <50227127+smengcl@users.noreply.github.com> --- .../apache/hadoop/hdds/utils/RDBSnapshotProvider.java | 3 ++- .../hadoop/hdds/utils/TestRDBSnapshotProvider.java | 2 +- .../apache/hadoop/ozone/om/OMDBCheckpointServlet.java | 9 +++++---- .../java/org/apache/hadoop/ozone/om/OzoneManager.java | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java index 3391ffc33599..77df0040e444 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java @@ -60,6 +60,7 @@ public abstract class RDBSnapshotProvider implements Closeable { private final AtomicReference lastLeaderRef; private final AtomicLong numDownloaded; private FaultInjector injector; + // The number of times init() is called private final AtomicLong initCount; public RDBSnapshotProvider(File snapshotDir, String dbName) { @@ -136,7 +137,7 @@ public DBCheckpoint downloadDBSnapshotFromLeader(String leaderNodeID) * @param currentLeader the ID of leader node */ @VisibleForTesting - void checkLeaderConsistent(String currentLeader) throws IOException { + void checkLeaderConsistency(String currentLeader) throws IOException { String lastLeader = lastLeaderRef.get(); if (lastLeader != null) { if (!lastLeader.equals(currentLeader)) { diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index d3f7fc9cf52e..6249a1e7dac7 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -240,7 +240,7 @@ public void insertRandomData(RDBStore dbStore, int familyIndex) } @Test - public void testCheckLeaderConsistent() throws IOException { + public void testCheckLeaderConsistency() throws IOException { // Leader initialized to null at startup. assertEquals(1, rdbSnapshotProvider.getInitCount()); File dummyFile = new File(rdbSnapshotProvider.getCandidateDir(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 9e3a1e3d9c87..4db64fba4101 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -275,9 +275,9 @@ private void processDir(Path dir, Set copyFiles, */ @VisibleForTesting public static void processFile(Path file, Set copyFiles, - Map hardLinkFiles, - Set toExcludeFiles, - List excluded) { + Map hardLinkFiles, + Set toExcludeFiles, + List excluded) { if (toExcludeFiles.contains(file)) { excluded.add(file.toString()); } else { @@ -301,7 +301,8 @@ public static void processFile(Path file, Set copyFiles, copyFiles.add(file); } } - } else { // Not sst file. + } else { + // Not sst file. copyFiles.add(file); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index e7d4bc1483b7..c4a29a7bdaa9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3848,7 +3848,7 @@ private void moveCheckpointFiles(File oldDB, Path checkpointPath, File dbDir, Files.deleteIfExists(markerFile); } catch (IOException e) { LOG.error("Failed to move downloaded DB checkpoint {} to metadata " + - "directory {}. Exception: {}. Resetting to original DB.", + "directory {}. Exception: {}. Resetting to original DB.", checkpointPath, oldDB.toPath(), e); try { FileUtil.fullyDelete(oldDB); From dd25523aba77ee3345d3afc93088f96205855d1b Mon Sep 17 00:00:00 2001 From: George Jahad Date: Thu, 15 Jun 2023 15:25:09 -0700 Subject: [PATCH 17/20] addressed review comments --- hadoop-hdds/common/src/main/resources/ozone-default.xml | 2 +- .../org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java | 2 +- .../apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java | 6 +++--- .../hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 5457dc04ebb4..58ed33a33661 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -2007,7 +2007,7 @@ ozone.om.snapshot.provider.request.timeout - 5000ms + 300000ms OZONE, OM, HA, MANAGEMENT Connection request timeout for HTTP call made by OM Snapshot Provider to diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java index 77df0040e444..261e4e103dab 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java @@ -108,7 +108,7 @@ public DBCheckpoint downloadDBSnapshotFromLeader(String leaderNodeID) throws IOException { LOG.info("Prepare to download the snapshot from leader OM {} and " + "reloading state from the snapshot.", leaderNodeID); - checkLeaderConsistent(leaderNodeID); + checkLeaderConsistency(leaderNodeID); String snapshotFileName = getSnapshotFileName(leaderNodeID); File targetFile = new File(snapshotDir, snapshotFileName); diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java index 6249a1e7dac7..585c635d5ad1 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/TestRDBSnapshotProvider.java @@ -250,16 +250,16 @@ public void testCheckLeaderConsistency() throws IOException { assertTrue(dummyFile.exists()); // Set the leader. - rdbSnapshotProvider.checkLeaderConsistent("node1"); + rdbSnapshotProvider.checkLeaderConsistency("node1"); assertEquals(2, rdbSnapshotProvider.getInitCount()); assertFalse(dummyFile.exists()); // Confirm setting the same leader doesn't reinitialize. - rdbSnapshotProvider.checkLeaderConsistent("node1"); + rdbSnapshotProvider.checkLeaderConsistency("node1"); assertEquals(2, rdbSnapshotProvider.getInitCount()); // Confirm setting different leader does reinitialize. - rdbSnapshotProvider.checkLeaderConsistent("node2"); + rdbSnapshotProvider.checkLeaderConsistency("node2"); assertEquals(3, rdbSnapshotProvider.getInitCount()); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java index 8ab652612f56..7e2483e574b5 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOmSnapshotUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.snapshot; +import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -75,5 +76,6 @@ public void testLinkFiles(@TempDir File tempDir) throws Exception { map(Path::toString).collect(Collectors.toSet()); assertEquals(tree1Files, tree2Files); + GenericTestUtils.deleteDirectory(tempDir); } } From 02166d1522d44e3e2dd16e3fef4dc8d5350301fa Mon Sep 17 00:00:00 2001 From: Siyao Meng <50227127+smengcl@users.noreply.github.com> Date: Thu, 15 Jun 2023 18:19:33 -0700 Subject: [PATCH 18/20] Fix checkstyle --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index f2414d54088e..92319da79991 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -60,7 +60,6 @@ import java.nio.file.Paths; import java.security.cert.X509Certificate; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; From a817d0bdd63b399a7e902d8880c1d9643f549ed0 Mon Sep 17 00:00:00 2001 From: Siyao Meng <50227127+smengcl@users.noreply.github.com> Date: Thu, 15 Jun 2023 18:22:18 -0700 Subject: [PATCH 19/20] Revert "Fix checkstyle" This reverts commit 02166d1522d44e3e2dd16e3fef4dc8d5350301fa. --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index 92319da79991..f2414d54088e 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -60,6 +60,7 @@ import java.nio.file.Paths; import java.security.cert.X509Certificate; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; From 849d432ce6e66913c43784060507e0ab2d1e06b0 Mon Sep 17 00:00:00 2001 From: George Jahad Date: Thu, 15 Jun 2023 17:45:47 -0700 Subject: [PATCH 20/20] checkstyle --- .../src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index 71f9a2b6149a..2dbef0c2ad53 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -61,7 +61,6 @@ import java.nio.file.Paths; import java.security.cert.X509Certificate; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.Callable;