diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index b52d0667c781..8a1afc5a6be3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -40,6 +40,7 @@ import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.BiConsumer; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -47,7 +48,6 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.utils.SimpleStriped; import org.apache.hadoop.ozone.container.common.impl.ContainerData; -import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -94,15 +94,7 @@ public ContainerProtos.ContainerChecksumInfo writeContainerDataTree(ContainerDat Lock writeLock = getLock(containerID); writeLock.lock(); try { - ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null; - try { - // If the file is not present, we will create the data for the first time. This happens under a write lock. - checksumInfoBuilder = readBuilder(data).orElse(ContainerProtos.ContainerChecksumInfo.newBuilder()); - } catch (IOException ex) { - LOG.error("Failed to read container checksum tree file for container {}. Creating a new instance.", - containerID, ex); - checksumInfoBuilder = ContainerProtos.ContainerChecksumInfo.newBuilder(); - } + ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = readOrCreate(data).toBuilder(); ContainerProtos.ContainerMerkleTree treeProto = captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto); @@ -129,16 +121,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del Lock writeLock = getLock(containerID); writeLock.lock(); try { - ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null; - try { - // If the file is not present, we will create the data for the first time. This happens under a write lock. - checksumInfoBuilder = readBuilder(data) - .orElse(ContainerProtos.ContainerChecksumInfo.newBuilder()); - } catch (IOException ex) { - LOG.error("Failed to read container checksum tree file for container {}. Overwriting it with a new instance.", - data.getContainerID(), ex); - checksumInfoBuilder = ContainerProtos.ContainerChecksumInfo.newBuilder(); - } + ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = readOrCreate(data).toBuilder(); // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. @@ -179,7 +162,7 @@ public ContainerDiffReport diff(ContainerProtos.ContainerChecksumInfo thisChecks ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws StorageContainerException { - ContainerDiffReport report = new ContainerDiffReport(); + ContainerDiffReport report = new ContainerDiffReport(thisChecksumInfo.getContainerID()); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { Preconditions.assertNotNull(thisChecksumInfo, "Datanode's checksum info is null."); @@ -280,6 +263,8 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer List thisChunkMerkleTreeList = thisBlockMerkleTree.getChunkMerkleTreeList(); List peerChunkMerkleTreeList = peerBlockMerkleTree.getChunkMerkleTreeList(); int thisIdx = 0, peerIdx = 0; + long containerID = report.getContainerID(); + long blockID = thisBlockMerkleTree.getBlockID(); // Step 1: Process both lists while elements are present in both while (thisIdx < thisChunkMerkleTreeList.size() && peerIdx < peerChunkMerkleTreeList.size()) { @@ -293,8 +278,8 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer // thisTree = Healthy, peerTree = unhealthy -> Do nothing as thisTree is healthy. // thisTree = Unhealthy, peerTree = Unhealthy -> Do Nothing as both are corrupt. if (thisChunkMerkleTree.getDataChecksum() != peerChunkMerkleTree.getDataChecksum() && - !thisChunkMerkleTree.getChecksumMatches() && peerChunkMerkleTree.getChecksumMatches()) { - report.addCorruptChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + !thisChunkMerkleTree.getChecksumMatches()) { + reportChunkIfHealthy(containerID, blockID, peerChunkMerkleTree, report::addCorruptChunk); } thisIdx++; peerIdx++; @@ -304,14 +289,14 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer thisIdx++; } else { // Peer chunk's offset is smaller; record missing chunk and advance peerIdx - report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + reportChunkIfHealthy(containerID, blockID, peerChunkMerkleTree, report::addMissingChunk); peerIdx++; } } // Step 2: Process remaining chunks in the peer list while (peerIdx < peerChunkMerkleTreeList.size()) { - report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTreeList.get(peerIdx)); + reportChunkIfHealthy(containerID, blockID, peerChunkMerkleTreeList.get(peerIdx), report::addMissingChunk); peerIdx++; } @@ -319,15 +304,18 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer // chunks from us when they reconcile. } - public static long getDataChecksum(ContainerProtos.ContainerChecksumInfo checksumInfo) { - return checksumInfo.getContainerMerkleTree().getDataChecksum(); + private void reportChunkIfHealthy(long containerID, long blockID, ContainerProtos.ChunkMerkleTree peerTree, + BiConsumer addToReport) { + if (peerTree.getChecksumMatches()) { + addToReport.accept(blockID, peerTree); + } else { + LOG.warn("Skipping chunk at offset {} in block {} of container {} since peer reported it as " + + "unhealthy.", peerTree.getOffset(), blockID, containerID); + } } - /** - * Returns whether the container checksum tree file for the specified container exists without deserializing it. - */ - public static boolean hasContainerChecksumFile(ContainerData data) { - return getContainerChecksumFile(data).exists(); + public static long getDataChecksum(ContainerProtos.ContainerChecksumInfo checksumInfo) { + return checksumInfo.getContainerMerkleTree().getDataChecksum(); } /** @@ -348,21 +336,34 @@ private Lock getLock(long containerID) { } /** + * Reads the checksum info of the specified container. If the tree file with the information does not exist, an empty + * instance is returned. * Callers are not required to hold a lock while calling this since writes are done to a tmp file and atomically * swapped into place. */ - public Optional read(ContainerData data) throws IOException { + public ContainerProtos.ContainerChecksumInfo read(ContainerData data) throws IOException { try { - return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), () -> readChecksumInfo(data)); + return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), () -> + readChecksumInfo(data).orElse(ContainerProtos.ContainerChecksumInfo.newBuilder().build())); } catch (IOException ex) { metrics.incrementMerkleTreeReadFailures(); - throw new IOException(ex); + throw ex; } } - private Optional readBuilder(ContainerData data) throws IOException { - Optional checksumInfo = read(data); - return checksumInfo.map(ContainerProtos.ContainerChecksumInfo::toBuilder); + /** + * Reads the checksum info of the specified container. If the tree file with the information does not exist, or there + * is an exception trying to read the file, an empty instance is returned. + */ + private ContainerProtos.ContainerChecksumInfo readOrCreate(ContainerData data) { + try { + // If the file is not present, we will create the data for the first time. This happens under a write lock. + return read(data); + } catch (IOException ex) { + LOG.error("Failed to read container checksum tree file for container {}. Overwriting it with a new instance.", + data.getContainerID(), ex); + return ContainerProtos.ContainerChecksumInfo.newBuilder().build(); + } } /** @@ -443,10 +444,4 @@ public static Optional readChecksumInfo(C public ContainerMerkleTreeMetrics getMetrics() { return this.metrics; } - - public static boolean checksumFileExist(Container container) { - File checksumFile = getContainerChecksumFile(container.getContainerData()); - return checksumFile.exists(); - } - } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java index 6de3057b00cb..882c9a7e8319 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java @@ -31,11 +31,17 @@ public class ContainerDiffReport { private final List missingBlocks; private final Map> missingChunks; private final Map> corruptChunks; + private final long containerID; - public ContainerDiffReport() { + public ContainerDiffReport(long containerID) { this.missingBlocks = new ArrayList<>(); this.missingChunks = new HashMap<>(); this.corruptChunks = new HashMap<>(); + this.containerID = containerID; + } + + public long getContainerID() { + return containerID; } /** @@ -105,7 +111,7 @@ public long getNumMissingBlocks() { @Override public String toString() { - return "ContainerDiffReport:" + + return "Diff report for container " + containerID + ":" + " Missing Blocks: " + getNumMissingBlocks() + " Missing Chunks: " + getNumMissingChunks() + " chunks from " + missingChunks.size() + " blocks" + " Corrupt Chunks: " + getNumCorruptChunks() + " chunks from " + corruptChunks.size() + " blocks"; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java index 11092fe1f7b2..f79c7e3f1df5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java @@ -99,6 +99,7 @@ public abstract class ContainerData { // Checksum of the data within the container. private long dataChecksum; + private static final long UNSET_DATA_CHECKSUM = -1; private boolean isEmpty; @@ -153,7 +154,7 @@ protected ContainerData(ContainerType type, long containerId, this.originNodeId = originNodeId; this.isEmpty = false; this.checksum = ZERO_CHECKSUM; - this.dataChecksum = 0; + this.dataChecksum = UNSET_DATA_CHECKSUM; } protected ContainerData(ContainerData source) { @@ -538,13 +539,24 @@ public void computeAndSetContainerFileChecksum(Yaml yaml) throws IOException { } public void setDataChecksum(long dataChecksum) { + if (dataChecksum < 0) { + throw new IllegalArgumentException("Data checksum cannot be set to a negative number."); + } this.dataChecksum = dataChecksum; } public long getDataChecksum() { + // UNSET_DATA_CHECKSUM is an internal placeholder, it should not be used outside this class. + if (needsDataChecksum()) { + return 0; + } return dataChecksum; } + public boolean needsDataChecksum() { + return dataChecksum == UNSET_DATA_CHECKSUM; + } + /** * Returns a ProtoBuf Message from ContainerData. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a2dbdcef7f3e..b79c0b780506 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -81,7 +81,6 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; -import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.locks.Lock; @@ -1368,7 +1367,6 @@ public void markContainerForClose(Container container) } finally { container.writeUnlock(); } - updateContainerChecksumFromMetadataIfNeeded(container); ContainerLogger.logClosing(container.getContainerData()); sendICR(container); } @@ -1383,7 +1381,7 @@ public void updateContainerChecksum(Container container, ContainerMerkleTreeWrit * Write the merkle tree for this container using the existing checksum metadata only. The data is not read or * validated by this method, so it is expected to run quickly. *

- * If a checksum file already exists on the disk, this method will do nothing. The existing file would have either + * If a data checksum for the container already exists, this method does nothing. The existing value would have either * been made from the metadata or data itself so there is no need to recreate it from the metadata. This method * does not send an ICR with the updated checksum info. *

@@ -1391,7 +1389,7 @@ public void updateContainerChecksum(Container container, ContainerMerkleTreeWrit * @param container The container which will have a tree generated. */ private void updateContainerChecksumFromMetadataIfNeeded(Container container) { - if (ContainerChecksumTreeManager.checksumFileExist(container)) { + if (!container.getContainerData().needsDataChecksum()) { return; } @@ -1435,24 +1433,24 @@ private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(Cont // checksum to prevent divergence from what SCM sees in the ICR vs what datanode peers will see when pulling the // merkle tree. long originalDataChecksum = containerData.getDataChecksum(); + boolean hadDataChecksum = !containerData.needsDataChecksum(); ContainerProtos.ContainerChecksumInfo updateChecksumInfo = checksumManager.writeContainerDataTree(containerData, treeWriter); long updatedDataChecksum = updateChecksumInfo.getContainerMerkleTree().getDataChecksum(); if (updatedDataChecksum != originalDataChecksum) { containerData.setDataChecksum(updatedDataChecksum); - String message = - "Container data checksum updated from " + checksumToString(originalDataChecksum) + " to " + - checksumToString(updatedDataChecksum); if (sendICR) { sendICR(container); } - if (ContainerChecksumTreeManager.hasContainerChecksumFile(containerData)) { + + String message = "Container " + containerData.getContainerID() + " data checksum updated from " + + checksumToString(originalDataChecksum) + " to " + checksumToString(updatedDataChecksum); + if (hadDataChecksum) { LOG.warn(message); ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); } else { - // If this is the first time the scanner has run with the feature to generate a checksum file, don't - // log a warning for the checksum update. + // If this is the first time the checksum is being generated, don't log a warning about updating the checksum. LOG.debug(message); } } @@ -1465,7 +1463,6 @@ public void markContainerUnhealthy(Container container, ScanResult reason) container.writeLock(); long containerID = 0L; try { - containerID = container.getContainerData().getContainerID(); if (container.getContainerState() == State.UNHEALTHY) { LOG.debug("Call to mark already unhealthy container {} as unhealthy", containerID); @@ -1572,12 +1569,9 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container long containerID = containerData.getContainerID(); // Obtain the original checksum info before reconciling with any peers. - Optional optionalChecksumInfo = checksumManager.read(containerData); - ContainerProtos.ContainerChecksumInfo originalChecksumInfo; - if (optionalChecksumInfo.isPresent()) { - originalChecksumInfo = optionalChecksumInfo.get(); - } else { - // Try creating the checksum info from RocksDB metadata if it is not present. + ContainerProtos.ContainerChecksumInfo originalChecksumInfo = checksumManager.read(containerData); + if (!originalChecksumInfo.hasContainerMerkleTree()) { + // Try creating the merkle tree from RocksDB metadata if it is not present. originalChecksumInfo = updateAndGetContainerChecksumFromMetadata(kvContainer); } // This holds our current most up-to-date checksum info that we are using for the container. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 730c2c8b6586..47842ab23a7c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -161,7 +161,7 @@ public static ContainerMerkleTreeWriter buildTestTree(ConfigurationSource conf, int numCorruptChunks) { ContainerProtos.ContainerMerkleTree.Builder treeBuilder = originalTree.toProto().toBuilder(); - ContainerDiffReport diff = new ContainerDiffReport(); + ContainerDiffReport diff = new ContainerDiffReport(1); introduceMissingBlocks(treeBuilder, numMissingBlocks, diff); introduceMissingChunks(treeBuilder, numMissingChunks, diff); @@ -323,12 +323,12 @@ private static void assertEqualsChunkMerkleTree(List container = ozoneContainer.getController().getContainer(containerID); - return ContainerChecksumTreeManager.checksumFileExist(container); + return getContainerChecksumFile(container.getContainerData()).exists(); } public static void writeContainerDataTreeProto(ContainerData data, ContainerProtos.ContainerMerkleTree tree) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 36b705c9926e..56592efe1a5e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -39,7 +39,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.tuple.Pair; @@ -344,8 +343,8 @@ public void testContainerWithNoDiff() throws Exception { ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo, peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); @@ -368,8 +367,8 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo, peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); assertEquals(1, checksumManager.getMetrics().getRepairContainerDiffs()); @@ -395,8 +394,8 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree).build(); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo, peerChecksumInfo); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); assertEquals(0, checksumManager.getMetrics().getMissingBlocksIdentified()); @@ -409,8 +408,8 @@ public void testFailureContainerMerkleTreeMetric() throws IOException { ContainerProtos.ContainerChecksumInfo peerChecksum = ContainerProtos.ContainerChecksumInfo.newBuilder().build(); ContainerMerkleTreeWriter ourMerkleTree = buildTestTree(config); checksumManager.writeContainerDataTree(container, ourMerkleTree); - Optional checksumInfo = checksumManager.read(container); - assertThrows(StorageContainerException.class, () -> checksumManager.diff(checksumInfo.get(), peerChecksum)); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertThrows(StorageContainerException.class, () -> checksumManager.diff(checksumInfo, peerChecksum)); assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1); } @@ -436,8 +435,8 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { .addAllDeletedBlocks(deletedBlockList).build(); writeContainerDataTreeProto(container, ourMerkleTree); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. @@ -448,7 +447,7 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { // Delete blocks in our merkle tree as well. checksumManager.markBlocksAsDeleted(container, blockIDs); checksumInfo = checksumManager.read(container); - containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in both merkle tree. @@ -474,8 +473,8 @@ void testDeletedBlocksInOurContainerOnly() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); checksumManager.markBlocksAsDeleted(container, deletedBlockList); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in our merkle tree. @@ -507,8 +506,8 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. @@ -539,8 +538,8 @@ void testContainerDiffWithBlockDeletionInPeer() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); - Optional checksumInfo = checksumManager.read(container); - ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. assertFalse(containerDiff.getMissingBlocks().isEmpty()); @@ -553,7 +552,7 @@ void testContainerDiffWithBlockDeletionInPeer() throws Exception { // Clear deleted blocks to add them in missing blocks. peerChecksumInfo = peerChecksumInfoBuilder.clearDeletedBlocks().build(); checksumInfo = checksumManager.read(container); - containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); + containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo); assertFalse(containerDiff.getMissingBlocks().isEmpty()); // Missing block does not contain the deleted blocks 6L to 10L diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java index 6af2e00ade56..5d674de7fcba 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java @@ -18,6 +18,9 @@ package org.apache.hadoop.ozone.container.common; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import java.util.UUID; @@ -116,4 +119,30 @@ public void testKeyValueData(ContainerTestVersionInfo versionInfo) { assertEquals(kvData.getSchemaVersion(), newKvData.getSchemaVersion()); } + @ContainerTestVersionInfo.ContainerTest + public void testNeedsDataChecksum(ContainerTestVersionInfo versionInfo) { + initVersionInfo(versionInfo); + + KeyValueContainerData containerData = new KeyValueContainerData(1, layout, MAXSIZE, UUID.randomUUID().toString(), + UUID.randomUUID().toString()); + + // When the container is initially created without a checksum, the checksum will be 0 but the container still + // indicates it needs the actual one generated. + assertFalse(containerData.isEmpty()); + assertTrue(containerData.needsDataChecksum()); + assertEquals(0, containerData.getDataChecksum()); + + // Once the setter is called with any value, the container should no longer consider the checksum missing. + containerData.setDataChecksum(0); + assertFalse(containerData.needsDataChecksum()); + assertEquals(0, containerData.getDataChecksum()); + + containerData.setDataChecksum(123L); + assertFalse(containerData.isEmpty()); + assertFalse(containerData.needsDataChecksum()); + assertEquals(123L, containerData.getDataChecksum()); + + assertThrows(IllegalArgumentException.class, () -> containerData.setDataChecksum(-1L), + "Negative checksum value should throw an exception."); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index 7fce76a0e4df..68b144d97b2e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -27,6 +27,7 @@ import static org.assertj.core.api.Assertions.fail; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; @@ -159,12 +160,16 @@ public static void setup() throws Exception { // Use this fake host name to track the node through the test since it's easier to visualize than a UUID. dnDetails.setHostName("dn" + (i + 1)); MockDatanode dn = new MockDatanode(dnDetails, containerDir); + // This will close the container and build a data checksum based on the chunk checksums in the metadata. dn.addContainerWithBlocks(CONTAINER_ID, 15); datanodes.add(dn); } + long dataChecksumFromMetadata = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); + assertNotEquals(0, dataChecksumFromMetadata); datanodes.forEach(d -> d.scanContainer(CONTAINER_ID)); healthyDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); + assertEquals(dataChecksumFromMetadata, healthyDataChecksum); // Do not count the initial synchronous scan to build the merkle tree towards the scan count in the tests. // This lets each test run start counting the number of scans from zero. datanodes.forEach(MockDatanode::resetOnDemandScanCount); @@ -352,10 +357,9 @@ public long checkAndGetDataChecksum(long containerID) { KeyValueContainer container = getContainer(containerID); long dataChecksum = 0; try { - Optional containerChecksumInfo = - handler.getChecksumManager().read(container.getContainerData()); - assertTrue(containerChecksumInfo.isPresent()); - dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + ContainerProtos.ContainerChecksumInfo containerChecksumInfo = handler.getChecksumManager() + .read(container.getContainerData()); + dataChecksum = containerChecksumInfo.getContainerMerkleTree().getDataChecksum(); assertEquals(container.getContainerData().getDataChecksum(), dataChecksum); } catch (IOException ex) { fail("Failed to read container checksum from disk", ex); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index 92e7a071cce9..989dc0bddbc3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -38,7 +38,6 @@ import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.io.FileUtils; @@ -200,9 +199,8 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr checksumManager.writeContainerDataTree(containerData, result.getDataTree()); // This will read the corrupted tree from the disk, which represents the current state of the container, and // compare it against the original healthy tree. The diff we get back should match the failures we injected. - Optional generatedChecksumInfo = checksumManager.read(containerData); - assertTrue(generatedChecksumInfo.isPresent()); - ContainerDiffReport diffReport = checksumManager.diff(generatedChecksumInfo.get(), healthyChecksumInfo); + ContainerProtos.ContainerChecksumInfo generatedChecksumInfo = checksumManager.read(container.getContainerData()); + ContainerDiffReport diffReport = checksumManager.diff(generatedChecksumInfo, healthyChecksumInfo); LOG.info("Diff of healthy container with actual container {}", diffReport); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 97fd5a4b8c11..a102dd4a5f72 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -383,7 +383,8 @@ public void testCloseInvalidContainer(ContainerLayoutVersion layoutVersion) // Closing invalid container should return error response. ContainerProtos.ContainerCommandResponseProto response = keyValueHandler.handleCloseContainer(closeContainerRequest, container); - assertTrue(ContainerChecksumTreeManager.checksumFileExist(container)); + // Checksum will not be generated for an invalid container. + assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists()); assertEquals(ContainerProtos.Result.INVALID_CONTAINER_STATE, response.getResult(), @@ -679,7 +680,8 @@ public void testUpdateContainerChecksum(ContainerLayoutVersion layoutVersion) th // Initially, container should have no checksum information. assertEquals(0, containerData.getDataChecksum()); - assertFalse(checksumManager.read(containerData).isPresent()); + assertFalse(checksumManager.read(containerData).hasContainerMerkleTree()); + assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(containerData).exists()); assertEquals(0, icrCount.get()); // Update container with checksum information. @@ -689,7 +691,7 @@ public void testUpdateContainerChecksum(ContainerLayoutVersion layoutVersion) th // Check checksum in memory. assertEquals(updatedDataChecksum, containerData.getDataChecksum()); // Check disk content. - ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(containerData).get(); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(containerData); assertTreesSortedAndMatch(treeWriter.toProto(), checksumInfo.getContainerMerkleTree()); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java index 343c8dcfee88..8361959e6da4 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java @@ -238,14 +238,14 @@ public void testMarkContainerUnhealthyInFailedVolume() throws IOException { // be ignored. hddsVolume.setState(StorageVolume.VolumeState.FAILED); handler.markContainerUnhealthy(container, ContainerTestUtils.getUnhealthyDataScanResult()); - assertFalse(ContainerChecksumTreeManager.checksumFileExist(container)); + assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists()); verify(mockIcrSender, never()).send(any()); // When volume is healthy, ICR should be sent when container is marked // unhealthy. hddsVolume.setState(StorageVolume.VolumeState.NORMAL); handler.markContainerUnhealthy(container, ContainerTestUtils.getUnhealthyDataScanResult()); - assertTrue(ContainerChecksumTreeManager.checksumFileExist(container)); + assertTrue(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists()); verify(mockIcrSender, atMostOnce()).send(any()); } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java index b1d544f48022..79df162cf090 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java @@ -41,7 +41,7 @@ public class ReconcileSubcommand extends ScmSubcommand { public void execute(ScmClient scmClient) throws IOException { scmClient.reconcileContainer(containerId); System.out.println("Reconciliation has been triggered for container " + containerId); - // TODO a better option to check status may be added later. + // TODO HDDS-12078 allow status to be checked from the reconcile subcommand directly. System.out.println("Use \"ozone admin container info --json " + containerId + "\" to see the checksums of each " + "container replica"); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index 2306af221c43..3c761f532f39 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.dn.scanner; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; @@ -27,15 +28,12 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; import org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerDataScanner; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ozone.test.GenericTestUtils.LogCapturer; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; @@ -47,9 +45,6 @@ class TestBackgroundContainerDataScannerIntegration extends TestContainerScannerIntegrationAbstract { - private final LogCapturer logCapturer = - LogCapturer.log4j2(ContainerLogger.LOG_NAME); - @BeforeAll static void init() throws Exception { OzoneConfiguration ozoneConfig = new OzoneConfiguration(); @@ -83,10 +78,12 @@ void testCorruptionDetected(TestContainerCorruptions corruption) Container container = getDnContainer(containerID); assertEquals(State.CLOSED, container.getContainerState()); assertTrue(containerChecksumFileExists(containerID)); + assertFalse(container.getContainerData().needsDataChecksum()); + assertNotEquals(0, container.getContainerData().getDataChecksum()); waitForScmToSeeReplicaState(containerID, CLOSED); long initialReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); - + assertNotEquals(0, initialReportedDataChecksum); corruption.applyTo(container); resumeScanner(); @@ -103,23 +100,28 @@ void testCorruptionDetected(TestContainerCorruptions corruption) corruption == TestContainerCorruptions.MISSING_CONTAINER_DIR) { // In these cases, the new tree will not be able to be written since it exists in the metadata directory. // When the tree write fails, the in-memory checksum should remain at its original value. - assertEquals(initialReportedDataChecksum, newReportedDataChecksum); - assertFalse(containerChecksumFileExists(containerID)); + assertEquals(checksumToString(initialReportedDataChecksum), checksumToString(newReportedDataChecksum)); } else { - assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); + assertNotEquals(checksumToString(initialReportedDataChecksum), checksumToString(newReportedDataChecksum)); // Test that the scanner wrote updated checksum info to the disk. - assertTrue(containerChecksumFileExists(containerID)); - ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = readChecksumFile(container.getContainerData()); - assertEquals(newReportedDataChecksum, updatedChecksumInfo.getContainerMerkleTree().getDataChecksum()); + assertReplicaChecksumMatches(container, newReportedDataChecksum); + assertFalse(container.getContainerData().needsDataChecksum()); } if (corruption == TestContainerCorruptions.TRUNCATED_BLOCK || corruption == TestContainerCorruptions.CORRUPT_BLOCK) { // These errors will affect multiple chunks and result in multiple log messages. - corruption.assertLogged(containerID, logCapturer); + corruption.assertLogged(containerID, getContainerLogCapturer()); } else { // Other corruption types will only lead to a single error. - corruption.assertLogged(containerID, 1, logCapturer); + corruption.assertLogged(containerID, 1, getContainerLogCapturer()); } } + + private void assertReplicaChecksumMatches(Container container, long expectedChecksum) throws Exception { + assertTrue(containerChecksumFileExists(container.getContainerData().getContainerID())); + long dataChecksumFromFile = readChecksumFile(container.getContainerData()) + .getContainerMerkleTree().getDataChecksum(); + assertEquals(checksumToString(expectedChecksum), checksumToString(dataChecksumFromFile)); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index 2584138c126b..3133a81b5cb1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.IOException; @@ -68,6 +69,9 @@ public abstract class TestContainerScannerIntegrationAbstract { private static String volumeName; private static String bucketName; private static OzoneBucket bucket; + // Log4j 2 capturer currently doesn't support capturing specific logs. + // We must use one capturer for both the container and application logs. + private final GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.log4j2(""); public static void buildCluster(OzoneConfiguration ozoneConfig) throws Exception { @@ -166,6 +170,13 @@ protected void closeContainerAndWait(long containerID) throws Exception { () -> TestHelper.isContainerClosed(cluster, containerID, cluster.getHddsDatanodes().get(0).getDatanodeDetails()), 1000, 5000); + + // After the container is marked as closed in the datanode, we must wait for the checksum generation from metadata + // to finish. + LambdaTestUtils.await(5000, 1000, () -> + getContainerReplica(containerID).getDataChecksum() != 0); + long closedChecksum = getContainerReplica(containerID).getDataChecksum(); + assertNotEquals(0, closedChecksum); } protected long writeDataToOpenContainer() throws Exception { @@ -201,6 +212,10 @@ protected void readFromCorruptedKey(String keyName) throws IOException { } } + protected GenericTestUtils.LogCapturer getContainerLogCapturer() { + return logCapturer; + } + private OzoneOutputStream createKey(String keyName) throws Exception { return TestHelper.createKey( keyName, RATIS, ONE, 0, store, volumeName, bucketName);