From ed10b64d660e545ba8d71ed6106c1dd1dce4e2f8 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 9 Oct 2024 11:46:56 -0700 Subject: [PATCH 01/11] HDDS-10928. Implement container comparison and repair logic within datanodes --- .../ContainerChecksumTreeManager.java | 148 +++++++++++++++++- .../ContainerMerkleTreeTestUtils.java | 34 ++++ .../TestContainerChecksumTreeManager.java | 82 ++++++++++ 3 files changed, 258 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 769515d96e3f..16a2d2d13edb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -30,12 +31,16 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Collection; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.utils.SimpleStriped; @@ -143,12 +148,81 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } } - public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo otherInfo) + public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws IOException { - // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. - // Callers can act on this summary to repair their container replica using the peer's replica. - // This method will use the read lock, which is unused in the current implementation. - return new ContainerDiff(); + Preconditions.assertNotNull(thisContainer, "Container data is null"); + Optional thisContainerChecksumInfoBuilder = + read(thisContainer); + if (!thisContainerChecksumInfoBuilder.isPresent()) { + // TODO: To create containerMerkleTree or fail the request. + return null; + } + + if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { + throw new IOException("Container Id does not match"); + } + + ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); + + ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); + ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); + + return compareContainerMerkleTree(thisMerkleTree, peerMerkleTree); + } + + private ContainerDiff compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree thisMerkleTree, + ContainerProtos.ContainerMerkleTree peerMerkleTree) { + + ContainerDiff report = new ContainerDiff(); + if (thisMerkleTree.getDataChecksum() == peerMerkleTree.getDataChecksum()) { + return new ContainerDiff(); + } + + Map thisBlockMerkleTreeMap = thisMerkleTree.getBlockMerkleTreeList() + .stream().collect(Collectors.toMap(ContainerProtos.BlockMerkleTree::getBlockID, + blockMerkleTree -> blockMerkleTree)); + + // Since we are reconciling our container with a peer, We only need to go through the peer's block list + for (ContainerProtos.BlockMerkleTree peerBlockMerkleTree: peerMerkleTree.getBlockMerkleTreeList()) { + // Check if our container has the peer block. + ContainerProtos.BlockMerkleTree thisBlockMerkleTree = thisBlockMerkleTreeMap.get( + peerBlockMerkleTree.getBlockID()); + if (thisBlockMerkleTree == null) { + report.addMissingBlock(peerBlockMerkleTree); + continue; + } + + if (thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { + compareBlockMerkleTree(report, thisBlockMerkleTree, peerBlockMerkleTree); + } + } + return report; + } + + private void compareBlockMerkleTree(ContainerDiff report, ContainerProtos.BlockMerkleTree thisBlockMerkleTree, + ContainerProtos.BlockMerkleTree peerBlockMerkleTree) { + Map thisChunkMerkleTreeMap = thisBlockMerkleTree.getChunkMerkleTreeList() + .stream().collect(Collectors.toMap(ContainerProtos.ChunkMerkleTree::getOffset, + chunkMerkleTree -> chunkMerkleTree)); + List peerChunkMerkleTreeList = peerBlockMerkleTree.getChunkMerkleTreeList(); + + // Since we are reconciling our container with a peer, We only need to go through the peer's chunk list + for (ContainerProtos.ChunkMerkleTree peerChunkMerkleTree: peerChunkMerkleTreeList) { + ContainerProtos.ChunkMerkleTree thisChunkMerkleTree = thisChunkMerkleTreeMap.get( + peerChunkMerkleTree.getOffset()); + if (thisChunkMerkleTree == null) { + report.addMissingChunk(peerChunkMerkleTree); + continue; + } + + if (thisChunkMerkleTree.getChunkChecksum() != peerChunkMerkleTree.getChunkChecksum()) { + ChunkArgs thisChunkArgs = new ChunkArgs(thisBlockMerkleTree.getBlockID(), thisChunkMerkleTree.getOffset(), + thisChunkMerkleTree.getLength(), thisChunkMerkleTree.getChunkChecksum()); + ChunkArgs peerChunkArgs = new ChunkArgs(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree.getOffset(), + peerChunkMerkleTree.getLength(), peerChunkMerkleTree.getChunkChecksum()); + report.addCorruptChunks(Pair.of(thisChunkArgs, peerChunkArgs)); + } + } } /** @@ -245,11 +319,73 @@ public static boolean checksumFileExist(Container container) { * This class represents the difference between our replica of a container and a peer's replica of a container. * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. * - * TODO HDDS-10928 */ public static class ContainerDiff { + private final List missingBlocks; + private final List missingChunks; + private final List> corruptChunks; + public ContainerDiff() { + this.missingBlocks = new ArrayList<>(); + this.missingChunks = new ArrayList<>(); + this.corruptChunks = new ArrayList<>(); + } + + public void addMissingBlock(ContainerProtos.BlockMerkleTree otherBlockMerkleTree) { + this.missingBlocks.add(otherBlockMerkleTree); + } + + public void addMissingChunk(ContainerProtos.ChunkMerkleTree otherChunkMerkleTree) { + this.missingChunks.add(otherChunkMerkleTree); + } + + public void addCorruptChunks(Pair mismatchedChunk) { + this.corruptChunks.add(mismatchedChunk); + } + + public List getMissingBlocks() { + return missingBlocks; + } + + public List getMissingChunks() { + return missingChunks; + } + + public List> getCorruptChunks() { + return corruptChunks; + } + } + + /** + * This class encapsulates chunk merkle tree with block ID which is required for reconciliation. + */ + public static class ChunkArgs { + private final long blockId; + private final long chunkOffset; + private final long chunkLength; + private final long checksum; + + public ChunkArgs(long blockId, long chunkOffset, long chunkLength, long checksum) { + this.blockId = blockId; + this.chunkOffset = chunkOffset; + this.chunkLength = chunkLength; + this.checksum = checksum; + } + + public long getBlockId() { + return blockId; + } + + public long getChunkOffset() { + return chunkOffset; + } + + public long getChunkLength() { + return chunkLength; + } + public long getChecksum() { + return checksum; } } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 0301304db713..96b86ae5cb90 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -146,6 +146,40 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { return tree; } + public static ContainerMerkleTree buildTestTreeWithMismatches(ConfigurationSource conf, boolean missingBlocks, + boolean missingChunks, boolean corruptChunks) { + final long blockID1 = 1; + final long blockID2 = 2; + final long blockID3 = 3; + ContainerProtos.ChunkInfo b1c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ContainerProtos.ChunkInfo b1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{4, 5, 6})); + ContainerProtos.ChunkInfo b2c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{7, 8, 9})); + ContainerProtos.ChunkInfo b2c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{12, 11, 10})); + ContainerProtos.ChunkInfo b3c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); + ContainerProtos.ChunkInfo b3c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + + ContainerMerkleTree tree = new ContainerMerkleTree(); + tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + + if (corruptChunks) { + ContainerProtos.ChunkInfo corruptB1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{2, 4, 6})); + tree.addChunks(blockID1, Arrays.asList(b1c1, corruptB1c2)); + } else { + tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + } + + if (missingChunks) { + tree.addChunks(blockID2, Arrays.asList(b2c1)); + } else { + tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + } + + if (!missingBlocks) { + tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + } + return tree; + } + /** * This function checks whether the container checksum file exists. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b482d746ef30..ad6af41635d8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -37,6 +38,7 @@ import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTreeWithMismatches; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -299,6 +301,86 @@ public void testEmptyFile() throws Exception { assertEquals(CONTAINER_ID, info.getContainerID()); } + @Test + public void testContainerWithNoDiff() throws IOException { + ContainerMerkleTree ourMerkleTree = buildTestTree(config); + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( + new OzoneConfiguration()); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(container.getContainerID()) + .setContainerMerkleTree(peerMerkleTree.toProto()).build(); + ContainerChecksumTreeManager.ContainerDiff diff = + containerChecksumTreeManager.diff(container, peerChecksumInfo); + Assertions.assertTrue(diff.getCorruptChunks().isEmpty()); + Assertions.assertTrue(diff.getMissingBlocks().isEmpty()); + Assertions.assertTrue(diff.getMissingChunks().isEmpty()); + } + + @Test + public void testContainerDiffWithMissingBlocksAndChunks() throws IOException { + ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(config, true, true, false); + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( + new OzoneConfiguration()); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(container.getContainerID()) + .setContainerMerkleTree(peerMerkleTree.toProto()).build(); + ContainerChecksumTreeManager.ContainerDiff diff = + containerChecksumTreeManager.diff(container, peerChecksumInfo); + Assertions.assertTrue(diff.getCorruptChunks().isEmpty()); + Assertions.assertFalse(diff.getMissingBlocks().isEmpty()); + Assertions.assertFalse(diff.getMissingChunks().isEmpty()); + + Assertions.assertEquals(diff.getMissingBlocks().size(), 1); + Assertions.assertEquals(diff.getMissingChunks().size(), 1); + } + + @Test + public void testContainerDiffWithMissingBlocksAndMismatchChunks() throws IOException { + ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(config, true, false, true); + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( + new OzoneConfiguration()); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(container.getContainerID()) + .setContainerMerkleTree(peerMerkleTree.toProto()).build(); + ContainerChecksumTreeManager.ContainerDiff diff = + containerChecksumTreeManager.diff(container, peerChecksumInfo); + Assertions.assertFalse(diff.getCorruptChunks().isEmpty()); + Assertions.assertFalse(diff.getMissingBlocks().isEmpty()); + Assertions.assertTrue(diff.getMissingChunks().isEmpty()); + + Assertions.assertEquals(diff.getCorruptChunks().size(), 1); + Assertions.assertEquals(diff.getMissingBlocks().size(), 1); + } + + /** + * Test if a peer which has missing blocks and chunks affects our container diff. + * Only if our merkle tree has missing entries from the peer we need to add it the Container Diff. + */ + @Test + public void testPeerWithMissingBlockAndMissingChunks() throws IOException { + ContainerMerkleTree ourMerkleTree = buildTestTree(config); + ContainerMerkleTree peerMerkleTree = buildTestTreeWithMismatches(config, true, true, true); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( + new OzoneConfiguration()); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(container.getContainerID()) + .setContainerMerkleTree(peerMerkleTree.toProto()).build(); + ContainerChecksumTreeManager.ContainerDiff diff = + containerChecksumTreeManager.diff(container, peerChecksumInfo); + Assertions.assertFalse(diff.getCorruptChunks().isEmpty()); + Assertions.assertTrue(diff.getMissingBlocks().isEmpty()); + Assertions.assertTrue(diff.getMissingChunks().isEmpty()); + + Assertions.assertEquals(diff.getCorruptChunks().size(), 1); + } + @Test public void testChecksumTreeFilePath() { assertEquals(checksumFile.getAbsolutePath(), From 1d89262295139a2ddfd3529aa2aabe5559c0de1e Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 4 Nov 2024 10:59:40 -0800 Subject: [PATCH 02/11] HDDS-10928. Change implementation and update tests. --- .../ContainerChecksumTreeManager.java | 158 +++++++----- .../checksum/ContainerMerkleTree.java | 72 +++++- .../checksum/ContainerMerkleTreeMetrics.java | 14 + .../ContainerMerkleTreeTestUtils.java | 242 +++++++++++++++--- .../TestContainerChecksumTreeManager.java | 78 +++--- .../main/proto/DatanodeClientProtocol.proto | 1 + 6 files changed, 417 insertions(+), 148 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 16a2d2d13edb..f3f9b036e85d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -16,7 +16,6 @@ */ package org.apache.hadoop.ozone.container.checksum; -import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -33,14 +32,14 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Collection; +import java.util.SortedMap; import java.util.SortedSet; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.utils.SimpleStriped; @@ -151,11 +150,11 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws IOException { Preconditions.assertNotNull(thisContainer, "Container data is null"); + Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); Optional thisContainerChecksumInfoBuilder = read(thisContainer); if (!thisContainerChecksumInfoBuilder.isPresent()) { - // TODO: To create containerMerkleTree or fail the request. - return null; + throw new IOException("The container #" + thisContainer.getContainerID() + " doesn't have container checksum"); } if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { @@ -178,22 +177,42 @@ private ContainerDiff compareContainerMerkleTree(ContainerProtos.ContainerMerkle return new ContainerDiff(); } - Map thisBlockMerkleTreeMap = thisMerkleTree.getBlockMerkleTreeList() - .stream().collect(Collectors.toMap(ContainerProtos.BlockMerkleTree::getBlockID, - blockMerkleTree -> blockMerkleTree)); - + List thisBlockMerkleTreeList = thisMerkleTree.getBlockMerkleTreeList(); + List peerBlockMerkleTreeList = peerMerkleTree.getBlockMerkleTreeList(); // Since we are reconciling our container with a peer, We only need to go through the peer's block list - for (ContainerProtos.BlockMerkleTree peerBlockMerkleTree: peerMerkleTree.getBlockMerkleTreeList()) { - // Check if our container has the peer block. - ContainerProtos.BlockMerkleTree thisBlockMerkleTree = thisBlockMerkleTreeMap.get( - peerBlockMerkleTree.getBlockID()); + int peerIdx = 0, thisIdx = 0; + while (peerIdx < peerBlockMerkleTreeList.size() || thisIdx < thisBlockMerkleTreeList.size()) { + ContainerProtos.BlockMerkleTree thisBlockMerkleTree = thisIdx < thisBlockMerkleTreeList.size() ? + thisBlockMerkleTreeList.get(thisIdx) : null; + ContainerProtos.BlockMerkleTree peerBlockMerkleTree = peerIdx < peerBlockMerkleTreeList.size() ? + peerBlockMerkleTreeList.get(peerIdx) : null; + + // We have checked all the peer blocks, So we can return the ContainerDiff report + if (peerBlockMerkleTree == null) { + return report; + } + + // peerBlockMerkleTree is not null, but thisBlockMerkleTree is null. Means we have missing blocks. if (thisBlockMerkleTree == null) { report.addMissingBlock(peerBlockMerkleTree); + peerIdx++; continue; } - if (thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { - compareBlockMerkleTree(report, thisBlockMerkleTree, peerBlockMerkleTree); + // BlockId matches for both thisBlockMerkleTree and peerBlockMerkleTree + if (thisBlockMerkleTree.getBlockID() == peerBlockMerkleTree.getBlockID()) { + if (thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { + compareBlockMerkleTree(report, thisBlockMerkleTree, peerBlockMerkleTree); + } + peerIdx++; + thisIdx++; + } else { + if (peerBlockMerkleTree.getBlockID() > thisBlockMerkleTree.getBlockID()) { + thisIdx++; + } else { + report.addMissingBlock(peerBlockMerkleTree); + peerIdx++; + } } } return report; @@ -201,26 +220,49 @@ private ContainerDiff compareContainerMerkleTree(ContainerProtos.ContainerMerkle private void compareBlockMerkleTree(ContainerDiff report, ContainerProtos.BlockMerkleTree thisBlockMerkleTree, ContainerProtos.BlockMerkleTree peerBlockMerkleTree) { - Map thisChunkMerkleTreeMap = thisBlockMerkleTree.getChunkMerkleTreeList() - .stream().collect(Collectors.toMap(ContainerProtos.ChunkMerkleTree::getOffset, - chunkMerkleTree -> chunkMerkleTree)); + + List thisChunkMerkleTreeList = thisBlockMerkleTree.getChunkMerkleTreeList(); List peerChunkMerkleTreeList = peerBlockMerkleTree.getChunkMerkleTreeList(); // Since we are reconciling our container with a peer, We only need to go through the peer's chunk list - for (ContainerProtos.ChunkMerkleTree peerChunkMerkleTree: peerChunkMerkleTreeList) { - ContainerProtos.ChunkMerkleTree thisChunkMerkleTree = thisChunkMerkleTreeMap.get( - peerChunkMerkleTree.getOffset()); + int peerIdx = 0, thisIdx = 0; + while (peerIdx < peerChunkMerkleTreeList.size() || thisIdx < thisChunkMerkleTreeList.size()) { + ContainerProtos.ChunkMerkleTree thisChunkMerkleTree = thisIdx < thisChunkMerkleTreeList.size() ? + thisChunkMerkleTreeList.get(thisIdx) : null; + ContainerProtos.ChunkMerkleTree peerChunkMerkleTree = peerIdx < peerChunkMerkleTreeList.size() ? + peerChunkMerkleTreeList.get(peerIdx) : null; + + // We have checked all the peer chunks, So we can return the ContainerDiff report + if (peerChunkMerkleTree == null) { + return; + } + + // peerBlockMerkleTree is not null, but thisBlockMerkleTree is null. Means we have missing blocks. if (thisChunkMerkleTree == null) { - report.addMissingChunk(peerChunkMerkleTree); + report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + peerIdx++; continue; } - if (thisChunkMerkleTree.getChunkChecksum() != peerChunkMerkleTree.getChunkChecksum()) { - ChunkArgs thisChunkArgs = new ChunkArgs(thisBlockMerkleTree.getBlockID(), thisChunkMerkleTree.getOffset(), - thisChunkMerkleTree.getLength(), thisChunkMerkleTree.getChunkChecksum()); - ChunkArgs peerChunkArgs = new ChunkArgs(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree.getOffset(), - peerChunkMerkleTree.getLength(), peerChunkMerkleTree.getChunkChecksum()); - report.addCorruptChunks(Pair.of(thisChunkArgs, peerChunkArgs)); + if (thisChunkMerkleTree.getOffset() == peerChunkMerkleTree.getOffset()) { + // Possible state when this Checksum != peer Checksum: + // thisTree = Healthy, peerTree = Healthy -> We don't know what is healthy. Skip. + // thisTree = Unhealthy, peerTree = Healthy -> Add to corrupt chunk. + // thisTree = Healthy, peerTree = unhealthy -> Do nothing as thisTree is healthy. + // thisTree = Unhealthy, peerTree = Unhealthy -> Do Nothing as both are corrupt. + if (thisChunkMerkleTree.getChunkChecksum() != peerChunkMerkleTree.getChunkChecksum() && + !thisChunkMerkleTree.getIsHealthy() && peerChunkMerkleTree.getIsHealthy()) { + report.addCorruptChunks(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + } + peerIdx++; + thisIdx++; + } else { + if (peerChunkMerkleTree.getOffset() > thisChunkMerkleTree.getOffset()) { + thisIdx++; + } else { + report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + peerIdx++; + } } } } @@ -322,70 +364,44 @@ public static boolean checksumFileExist(Container container) { */ public static class ContainerDiff { private final List missingBlocks; - private final List missingChunks; - private final List> corruptChunks; + private final SortedMap> missingChunks; + private final SortedMap> corruptChunks; public ContainerDiff() { this.missingBlocks = new ArrayList<>(); - this.missingChunks = new ArrayList<>(); - this.corruptChunks = new ArrayList<>(); + this.missingChunks = new TreeMap<>(); + this.corruptChunks = new TreeMap<>(); } - public void addMissingBlock(ContainerProtos.BlockMerkleTree otherBlockMerkleTree) { - this.missingBlocks.add(otherBlockMerkleTree); + public void addMissingBlock(ContainerProtos.BlockMerkleTree missingBlockMerkleTree) { + this.missingBlocks.add(missingBlockMerkleTree); } - public void addMissingChunk(ContainerProtos.ChunkMerkleTree otherChunkMerkleTree) { - this.missingChunks.add(otherChunkMerkleTree); + public void addMissingChunk(long blockId, ContainerProtos.ChunkMerkleTree missingChunkMerkleTree) { + this.missingChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(missingChunkMerkleTree); } - public void addCorruptChunks(Pair mismatchedChunk) { - this.corruptChunks.add(mismatchedChunk); + public void addCorruptChunks(long blockId, ContainerProtos.ChunkMerkleTree corruptChunk) { + this.corruptChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(corruptChunk); } public List getMissingBlocks() { return missingBlocks; } - public List getMissingChunks() { + public SortedMap> getMissingChunks() { return missingChunks; } - public List> getCorruptChunks() { + public SortedMap> getCorruptChunks() { return corruptChunks; } - } - - /** - * This class encapsulates chunk merkle tree with block ID which is required for reconciliation. - */ - public static class ChunkArgs { - private final long blockId; - private final long chunkOffset; - private final long chunkLength; - private final long checksum; - - public ChunkArgs(long blockId, long chunkOffset, long chunkLength, long checksum) { - this.blockId = blockId; - this.chunkOffset = chunkOffset; - this.chunkLength = chunkLength; - this.checksum = checksum; - } - public long getBlockId() { - return blockId; - } - - public long getChunkOffset() { - return chunkOffset; - } - - public long getChunkLength() { - return chunkLength; - } - - public long getChecksum() { - return checksum; + public boolean isHealthy() { + if (missingBlocks.isEmpty() && missingChunks.isEmpty() && corruptChunks.isEmpty()) { + return true; + } + return false; } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index d4fbfeb072ae..ce86b63dc89f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; @@ -62,6 +63,25 @@ public void addChunks(long blockID, Collection chunks id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } + public void addChunk(long blockID, ContainerProtos.ChunkInfo chunk) { + id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunk(chunk); + } + + @VisibleForTesting + public BlockMerkleTree getChunks(long blockID) { + return id2Block.get(blockID); + } + + @VisibleForTesting + public BlockMerkleTree get(long blockID) { + return id2Block.get(blockID); + } + + @VisibleForTesting + public BlockMerkleTree remove(long blockID) { + return id2Block.remove(blockID); + } + /** * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum * computation for the tree happens outside of this method. @@ -91,7 +111,7 @@ public ContainerProtos.ContainerMerkleTree toProto() { /** * Represents a merkle tree for a single block within a container. */ - private static class BlockMerkleTree { + public static class BlockMerkleTree { // Map of each offset within the block to its chunk info. // Chunk order in the checksum is determined by their offset. private final SortedMap offset2Chunk; @@ -114,6 +134,30 @@ public void addChunks(Collection chunks) { } } + public void addChunk(ContainerProtos.ChunkInfo chunk) { + offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); + } + + public void setHealthy(long offset, boolean healthy) { + ChunkMerkleTree chunkMerkleTree = offset2Chunk.get(offset); + if (chunkMerkleTree == null) { + return; + } + chunkMerkleTree.setHealthy(healthy); + } + + public ChunkMerkleTree removeChunk(long offset) { + return offset2Chunk.remove(offset); + } + + public ChunkMerkleTree getChunk(long offset) { + return offset2Chunk.get(offset); + } + + public long getBlockId() { + return blockID; + } + /** * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. All block checksum * computation for the tree happens within this method. @@ -150,13 +194,34 @@ public ContainerProtos.BlockMerkleTree toProto() { * Each chunk has multiple checksums within it at each "bytesPerChecksum" interval. * This class computes one checksum for the whole chunk by aggregating these. */ - private static class ChunkMerkleTree { - private final ContainerProtos.ChunkInfo chunk; + public static class ChunkMerkleTree { + private ContainerProtos.ChunkInfo chunk; + private boolean isHealthy = true; ChunkMerkleTree(ContainerProtos.ChunkInfo chunk) { this.chunk = chunk; } + ChunkMerkleTree(ContainerProtos.ChunkInfo chunk, boolean isHealthy) { + this.chunk = chunk; + this.isHealthy = isHealthy; + } + + @VisibleForTesting + public void setChunk(ContainerProtos.ChunkInfo chunk) { + this.chunk = chunk; + } + + @VisibleForTesting + public void setHealthy(boolean healthy) { + this.isHealthy = healthy; + } + + + public ContainerProtos.ChunkInfo getChunkInfo() { + return chunk; + } + /** * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this * method. @@ -172,6 +237,7 @@ public ContainerProtos.ChunkMerkleTree toProto() { return ContainerProtos.ChunkMerkleTree.newBuilder() .setOffset(chunk.getOffset()) .setLength(chunk.getLen()) + .setIsHealthy(isHealthy) .setChunkChecksum(checksumImpl.getValue()) .build(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index c1bab5aa4856..6e7103fe21b2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -51,6 +51,9 @@ public static void unregister() { @Metric(about = "Number of Merkle tree read failure") private MutableCounterLong numMerkleTreeReadFailure; + @Metric(about = "Number of Merkle tree diff failure") + private MutableCounterLong numMerkleTreeDiffFailure; + @Metric(about = "Merkle tree write latency") private MutableRate merkleTreeWriteLatencyNS; @@ -60,6 +63,9 @@ public static void unregister() { @Metric(about = "Merkle tree creation latency") private MutableRate merkleTreeCreateLatencyNS; + @Metric(about = "Merkle tree diff latency") + private MutableRate merkleTreeDiffLatencyNS; + public void incrementMerkleTreeWriteFailures() { this.numMerkleTreeWriteFailure.incr(); } @@ -68,6 +74,10 @@ public void incrementMerkleTreeReadFailures() { this.numMerkleTreeReadFailure.incr(); } + public void incrementMerkleTreeDiffFailures() { + this.numMerkleTreeDiffFailure.incr(); + } + public MutableRate getWriteContainerMerkleTreeLatencyNS() { return this.merkleTreeWriteLatencyNS; } @@ -79,4 +89,8 @@ public MutableRate getReadContainerMerkleTreeLatencyNS() { public MutableRate getCreateMerkleTreeLatencyNS() { return this.merkleTreeCreateLatencyNS; } + + public MutableRate getMerkleTreeDiffLatencyNS() { + return this.merkleTreeDiffLatencyNS; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 96b86ae5cb90..8f47883a97b1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -32,10 +33,19 @@ import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -108,7 +118,7 @@ public static ContainerProtos.ChunkInfo buildChunk(ConfigurationSource config, i return ContainerProtos.ChunkInfo.newBuilder() .setChecksumData(checksumData) .setChunkName("chunk") - .setOffset(indexInBlock * chunkSize) + .setOffset(indexInBlock) .setLen(chunkSize) .build(); } @@ -131,53 +141,219 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { final long blockID1 = 1; final long blockID2 = 2; final long blockID3 = 3; + final long blockID4 = 4; + final long blockID5 = 5; ContainerProtos.ChunkInfo b1c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); ContainerProtos.ChunkInfo b1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{4, 5, 6})); - ContainerProtos.ChunkInfo b2c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{7, 8, 9})); - ContainerProtos.ChunkInfo b2c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{12, 11, 10})); - ContainerProtos.ChunkInfo b3c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); - ContainerProtos.ChunkInfo b3c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + ContainerProtos.ChunkInfo b1c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{7, 8, 9})); + ContainerProtos.ChunkInfo b1c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{12, 11, 10})); + ContainerProtos.ChunkInfo b2c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); + ContainerProtos.ChunkInfo b2c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + ContainerProtos.ChunkInfo b2c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{19, 20, 21})); + ContainerProtos.ChunkInfo b2c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{22, 23, 24})); + ContainerProtos.ChunkInfo b3c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{25, 26, 27})); + ContainerProtos.ChunkInfo b3c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{28, 29, 30})); + ContainerProtos.ChunkInfo b3c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{31, 32, 33})); + ContainerProtos.ChunkInfo b3c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{34, 35, 36})); + ContainerProtos.ChunkInfo b4c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{37, 38, 29})); + ContainerProtos.ChunkInfo b4c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{40, 41, 42})); + ContainerProtos.ChunkInfo b4c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{43, 44, 45})); + ContainerProtos.ChunkInfo b4c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{46, 47, 48})); + ContainerProtos.ChunkInfo b5c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{49, 50, 51})); + ContainerProtos.ChunkInfo b5c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{52, 53, 54})); + ContainerProtos.ChunkInfo b5c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{55, 56, 57})); + ContainerProtos.ChunkInfo b5c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{58, 59, 60})); ContainerMerkleTree tree = new ContainerMerkleTree(); - tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); - tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); - tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2, b1c3, b1c4)); + tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2, b2c3, b2c4)); + tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2, b3c3, b3c4)); + tree.addChunks(blockID4, Arrays.asList(b4c1, b4c2, b4c3, b4c4)); + tree.addChunks(blockID5, Arrays.asList(b5c1, b5c2, b5c3, b5c4)); return tree; } - public static ContainerMerkleTree buildTestTreeWithMismatches(ConfigurationSource conf, boolean missingBlocks, - boolean missingChunks, boolean corruptChunks) { - final long blockID1 = 1; - final long blockID2 = 2; - final long blockID3 = 3; - ContainerProtos.ChunkInfo b1c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ContainerProtos.ChunkInfo b1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{4, 5, 6})); - ContainerProtos.ChunkInfo b2c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{7, 8, 9})); - ContainerProtos.ChunkInfo b2c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{12, 11, 10})); - ContainerProtos.ChunkInfo b3c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); - ContainerProtos.ChunkInfo b3c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + public static Pair buildTestTreeWithMismatches( + ConfigurationSource conf, int numMissingBlocks, int numMissingChunks, int numCorruptChunks) { - ContainerMerkleTree tree = new ContainerMerkleTree(); - tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + ContainerMerkleTree tree = buildTestTree(conf); + ContainerChecksumTreeManager.ContainerDiff diff = new ContainerChecksumTreeManager.ContainerDiff(); + Random random = new Random(); + + List blockIds = new ArrayList<>(Arrays.asList(1L, 2L, 3L, 4L, 5L)); + + introduceMissingBlocks(tree, diff, blockIds, numMissingBlocks, random); + introduceMissingChunks(tree, diff, blockIds, numMissingChunks, random); + introduceCorruptChunks(tree, diff, blockIds, numCorruptChunks, random, conf); + + return Pair.of(tree, diff); + } + + private static void introduceMissingBlocks(ContainerMerkleTree tree, + ContainerChecksumTreeManager.ContainerDiff diff, + List blockIds, + int numMissingBlocks, + Random random) { + for (int i = 0; i < numMissingBlocks && !blockIds.isEmpty(); i++) { + int index = random.nextInt(blockIds.size()); + long blockId = blockIds.remove(index); + ContainerMerkleTree.BlockMerkleTree blockTree = tree.remove(blockId); + diff.addMissingBlock(blockTree.toProto()); + } + } + + private static void introduceMissingChunks(ContainerMerkleTree tree, + ContainerChecksumTreeManager.ContainerDiff diff, + List blockIds, + int numMissingChunks, + Random random) { + for (int i = 0; i < numMissingChunks && !blockIds.isEmpty(); i++) { + long blockId = blockIds.get(random.nextInt(blockIds.size())); + ContainerMerkleTree.BlockMerkleTree blockTree = tree.get(blockId); + List chunkOffsets = getChunkOffsets(blockTree); + + if (!chunkOffsets.isEmpty()) { + long offset = chunkOffsets.remove(random.nextInt(chunkOffsets.size())); + ContainerMerkleTree.ChunkMerkleTree chunkTree = blockTree.removeChunk(offset); + diff.addMissingChunk(blockId, chunkTree.toProto()); + } + } + } + + private static void introduceCorruptChunks(ContainerMerkleTree tree, + ContainerChecksumTreeManager.ContainerDiff diff, + List blockIds, + int numCorruptChunks, + Random random, + ConfigurationSource conf) { + // Create a map to keep track of corrupted chunks per block + Map> corruptedChunksPerBlock = new HashMap<>(); + + int corruptionsIntroduced = 0; + while (corruptionsIntroduced < numCorruptChunks && !blockIds.isEmpty()) { + // Randomly select a block + int blockIndex = random.nextInt(blockIds.size()); + long blockId = blockIds.get(blockIndex); + ContainerMerkleTree.BlockMerkleTree blockTree = tree.get(blockId); + + // Get available chunk offsets for this block + List availableChunkOffsets = getChunkOffsets(blockTree); + + // Remove already corrupted chunks for this block + availableChunkOffsets.removeAll(corruptedChunksPerBlock.getOrDefault(blockId, new HashSet<>())); - if (corruptChunks) { - ContainerProtos.ChunkInfo corruptB1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{2, 4, 6})); - tree.addChunks(blockID1, Arrays.asList(b1c1, corruptB1c2)); - } else { - tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + if (!availableChunkOffsets.isEmpty()) { + // Randomly select an available chunk offset + int chunkIndex = random.nextInt(availableChunkOffsets.size()); + long offset = availableChunkOffsets.get(chunkIndex); + + // Remove the original chunk + ContainerMerkleTree.ChunkMerkleTree chunkMerkleTree = blockTree.removeChunk(offset); + + // Create and add corrupt chunk + ContainerProtos.ChunkInfo corruptChunk = buildChunk(conf, (int) offset, ByteBuffer.wrap(new byte[]{5, 10, 15})); + tree.addChunk(blockId, corruptChunk); + blockTree.setHealthy(offset, false); + diff.addCorruptChunks(blockId, chunkMerkleTree.toProto()); + + // Mark this chunk as corrupted for this block + corruptedChunksPerBlock.computeIfAbsent(blockId, k -> new HashSet<>()).add(offset); + + corruptionsIntroduced++; + } else { + // If no available chunks in this block, remove it from consideration + blockIds.remove(blockIndex); + } + } + } + + private static List getChunkOffsets(ContainerMerkleTree.BlockMerkleTree blockTree) { + return blockTree.toProto().getChunkMerkleTreeList().stream() + .map(ContainerProtos.ChunkMerkleTree::getOffset) + .collect(Collectors.toList()); + } + + public static void assertContainerDiffMatch(ContainerChecksumTreeManager.ContainerDiff expectedDiff, + ContainerChecksumTreeManager.ContainerDiff actualDiff) { + assertNotNull(expectedDiff, "Expected diff is null"); + assertNotNull(actualDiff, "Actual diff is null"); + assertEquals(expectedDiff.getMissingBlocks().size(), actualDiff.getMissingBlocks().size(), + "Mismatch in number of missing blocks"); + assertEquals(expectedDiff.getMissingChunks().size(), actualDiff.getMissingChunks().size(), + "Mismatch in number of missing chunks"); + assertEquals(expectedDiff.getCorruptChunks().size(), actualDiff.getCorruptChunks().size(), + "Mismatch in number of corrupt blocks"); + + List expectedMissingBlocks = expectedDiff.getMissingBlocks().stream().sorted( + Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); + List actualMissingBlocks = expectedDiff.getMissingBlocks().stream().sorted( + Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); + for (int i = 0; i < expectedMissingBlocks.size(); i++) { + ContainerProtos.BlockMerkleTree expectedBlockMerkleTree = expectedMissingBlocks.get(i); + ContainerProtos.BlockMerkleTree actualBlockMerkleTree = actualMissingBlocks.get(i); + assertEquals(expectedBlockMerkleTree.getBlockID(), actualBlockMerkleTree.getBlockID()); + assertEquals(expectedBlockMerkleTree.getChunkMerkleTreeCount(), + actualBlockMerkleTree.getChunkMerkleTreeCount()); + assertEquals(expectedBlockMerkleTree.getBlockChecksum(), actualBlockMerkleTree.getBlockChecksum()); + + for (int j = 0; j < expectedBlockMerkleTree.getChunkMerkleTreeCount(); j++) { + ContainerProtos.ChunkMerkleTree expectedChunk = expectedBlockMerkleTree.getChunkMerkleTree(j); + ContainerProtos.ChunkMerkleTree actualChunk = expectedBlockMerkleTree.getChunkMerkleTree(j); + assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), "Mismatch in chunk offset"); + assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), "Mismatch in chunk checksum"); + } } - if (missingChunks) { - tree.addChunks(blockID2, Arrays.asList(b2c1)); - } else { - tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + // Check missing chunks + Map> expectedMissingChunks = expectedDiff.getMissingChunks(); + Map> actualMissingChunks = actualDiff.getMissingChunks(); + + for (Map.Entry> entry : expectedMissingChunks.entrySet()) { + Long blockId = entry.getKey(); + List expectedChunks = entry.getValue().stream().sorted( + Comparator.comparing(ContainerProtos.ChunkMerkleTree::getOffset)).collect(Collectors.toList()); + List actualChunks = actualMissingChunks.get(blockId).stream().sorted( + Comparator.comparing(ContainerProtos.ChunkMerkleTree::getOffset)).collect(Collectors.toList()); + + assertNotNull(actualChunks, "Missing chunks for block " + blockId + " not found in actual diff"); + assertEquals(expectedChunks.size(), actualChunks.size(), + "Mismatch in number of missing chunks for block " + blockId); + + for (int i = 0; i < expectedChunks.size(); i++) { + ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunks.get(i); + ContainerProtos.ChunkMerkleTree actualChunk = actualChunks.get(i); + assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), + "Mismatch in chunk offset for block " + blockId); + assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), + "Mismatch in chunk checksum for block " + blockId); + } } - if (!missingBlocks) { - tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + // Check corrupt chunks + Map> expectedCorruptChunks = expectedDiff.getCorruptChunks(); + Map> actualCorruptChunks = actualDiff.getCorruptChunks(); + + for (Map.Entry> entry : expectedCorruptChunks.entrySet()) { + Long blockId = entry.getKey(); + List expectedChunks = entry.getValue().stream().sorted( + Comparator.comparing(ContainerProtos.ChunkMerkleTree::getOffset)).collect(Collectors.toList()); + List actualChunks = actualCorruptChunks.get(blockId).stream().sorted( + Comparator.comparing(ContainerProtos.ChunkMerkleTree::getOffset)).collect(Collectors.toList()); + + assertNotNull(actualChunks, "Corrupt chunks for block " + blockId + " not found in actual diff"); + assertEquals(expectedChunks.size(), actualChunks.size(), + "Mismatch in number of corrupt chunks for block " + blockId); + + for (int i = 0; i < expectedChunks.size(); i++) { + ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunks.get(i); + ContainerProtos.ChunkMerkleTree actualChunk = actualChunks.get(i); + assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), + "Mismatch in chunk offset for block " + blockId); + assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), + "Mismatch in chunk checksum for block " + blockId); + } } - return tree; } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index ad6af41635d8..0771969a7d3d 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -16,14 +16,17 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +38,9 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Stream; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertContainerDiffMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTreeWithMismatches; @@ -61,6 +66,19 @@ class TestContainerChecksumTreeManager { private ContainerMerkleTreeMetrics metrics; private ConfigurationSource config; + public static Stream getParameters() { + return Stream.of( + Arguments.of(1, 2, 3), + Arguments.of(2, 3, 1), + Arguments.of(3, 1, 2), + Arguments.of(2, 2, 3), + Arguments.of(3, 2, 2), + Arguments.of(3, 3, 3), + Arguments.of(2, 1, 4), + Arguments.of(2, 3, 4), + Arguments.of(1, 2, 4)); + } + @BeforeEach public void init() { container = mock(KeyValueContainerData.class); @@ -313,14 +331,17 @@ public void testContainerWithNoDiff() throws IOException { .setContainerMerkleTree(peerMerkleTree.toProto()).build(); ContainerChecksumTreeManager.ContainerDiff diff = containerChecksumTreeManager.diff(container, peerChecksumInfo); - Assertions.assertTrue(diff.getCorruptChunks().isEmpty()); - Assertions.assertTrue(diff.getMissingBlocks().isEmpty()); - Assertions.assertTrue(diff.getMissingChunks().isEmpty()); + assertTrue(diff.isHealthy()); } - @Test - public void testContainerDiffWithMissingBlocksAndChunks() throws IOException { - ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(config, true, true, false); + @ParameterizedTest + @MethodSource("getParameters") + public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, + int numCorruptChunk) throws IOException { + Pair buildResult = + buildTestTreeWithMismatches(config, numMissingBlock, numMissingChunk, numCorruptChunk); + ContainerChecksumTreeManager.ContainerDiff expectedDiff = buildResult.getRight(); + ContainerMerkleTree ourMerkleTree = buildResult.getLeft(); ContainerMerkleTree peerMerkleTree = buildTestTree(config); checksumManager.writeContainerDataTree(container, ourMerkleTree); ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( @@ -330,42 +351,21 @@ public void testContainerDiffWithMissingBlocksAndChunks() throws IOException { .setContainerMerkleTree(peerMerkleTree.toProto()).build(); ContainerChecksumTreeManager.ContainerDiff diff = containerChecksumTreeManager.diff(container, peerChecksumInfo); - Assertions.assertTrue(diff.getCorruptChunks().isEmpty()); - Assertions.assertFalse(diff.getMissingBlocks().isEmpty()); - Assertions.assertFalse(diff.getMissingChunks().isEmpty()); - - Assertions.assertEquals(diff.getMissingBlocks().size(), 1); - Assertions.assertEquals(diff.getMissingChunks().size(), 1); - } - - @Test - public void testContainerDiffWithMissingBlocksAndMismatchChunks() throws IOException { - ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(config, true, false, true); - ContainerMerkleTree peerMerkleTree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, ourMerkleTree); - ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( - new OzoneConfiguration()); - ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() - .setContainerID(container.getContainerID()) - .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = - containerChecksumTreeManager.diff(container, peerChecksumInfo); - Assertions.assertFalse(diff.getCorruptChunks().isEmpty()); - Assertions.assertFalse(diff.getMissingBlocks().isEmpty()); - Assertions.assertTrue(diff.getMissingChunks().isEmpty()); - - Assertions.assertEquals(diff.getCorruptChunks().size(), 1); - Assertions.assertEquals(diff.getMissingBlocks().size(), 1); + assertContainerDiffMatch(expectedDiff, diff); } /** * Test if a peer which has missing blocks and chunks affects our container diff. * Only if our merkle tree has missing entries from the peer we need to add it the Container Diff. */ - @Test - public void testPeerWithMissingBlockAndMissingChunks() throws IOException { + @ParameterizedTest + @MethodSource("getParameters") + public void testPeerWithMissingBlockAndMissingChunks(int numMissingBlock, int numMissingChunk, + int numCorruptChunk) throws IOException { + Pair buildResult = + buildTestTreeWithMismatches(config, numMissingBlock, numMissingChunk, numCorruptChunk); ContainerMerkleTree ourMerkleTree = buildTestTree(config); - ContainerMerkleTree peerMerkleTree = buildTestTreeWithMismatches(config, true, true, true); + ContainerMerkleTree peerMerkleTree = buildResult.getLeft(); checksumManager.writeContainerDataTree(container, ourMerkleTree); ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( new OzoneConfiguration()); @@ -374,11 +374,7 @@ public void testPeerWithMissingBlockAndMissingChunks() throws IOException { .setContainerMerkleTree(peerMerkleTree.toProto()).build(); ContainerChecksumTreeManager.ContainerDiff diff = containerChecksumTreeManager.diff(container, peerChecksumInfo); - Assertions.assertFalse(diff.getCorruptChunks().isEmpty()); - Assertions.assertTrue(diff.getMissingBlocks().isEmpty()); - Assertions.assertTrue(diff.getMissingChunks().isEmpty()); - - Assertions.assertEquals(diff.getCorruptChunks().size(), 1); + assertTrue(diff.isHealthy()); } @Test diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 66af02c83a8f..6cd2ad3c578a 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -558,6 +558,7 @@ message ChunkMerkleTree { optional int64 offset = 1; optional int64 length = 2; optional int64 chunkChecksum = 3; + optional bool isHealthy = 4; } message BlockMerkleTree { From 6c59ee00967c1bf5f6e1c8873818c7e67d547b96 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 8 Nov 2024 11:43:16 -0800 Subject: [PATCH 03/11] HDDS-10928. Update implementation and address review comments. --- .../ContainerChecksumTreeManager.java | 186 ++++++++++-------- .../checksum/ContainerMerkleTreeMetrics.java | 20 ++ .../ContainerMerkleTreeTestUtils.java | 76 +++---- .../TestContainerChecksumTreeManager.java | 50 +++-- 4 files changed, 175 insertions(+), 157 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index f3f9b036e85d..b65e55f3d571 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -31,12 +31,14 @@ import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Collection; -import java.util.SortedMap; +import java.util.Set; import java.util.SortedSet; -import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -149,100 +151,103 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws IOException { - Preconditions.assertNotNull(thisContainer, "Container data is null"); - Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); - Optional thisContainerChecksumInfoBuilder = - read(thisContainer); - if (!thisContainerChecksumInfoBuilder.isPresent()) { - throw new IOException("The container #" + thisContainer.getContainerID() + " doesn't have container checksum"); - } + ContainerDiff report = new ContainerDiff(); + try { + captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { + Preconditions.assertNotNull(thisContainer, "Container data is null"); + Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); + Optional thisContainerChecksumInfoBuilder = + read(thisContainer); + if (!thisContainerChecksumInfoBuilder.isPresent()) { + throw new IOException("The container #" + thisContainer.getContainerID() + + " doesn't have container checksum"); + } - if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { - throw new IOException("Container Id does not match"); - } + if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { + throw new IOException("Container Id does not match for container " + thisContainer.getContainerID()); + } + + ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); - ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); + ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); + ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); - ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); - ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); + Set thisDeletedBlockSet = new HashSet<>(thisChecksumInfo.getDeletedBlocksList()); + Set peerDeletedBlockSet = new HashSet<>(peerChecksumInfo.getDeletedBlocksList()); + // Common deleted blocks between two merkle trees; + thisDeletedBlockSet.retainAll(peerDeletedBlockSet); + compareContainerMerkleTree(thisMerkleTree, peerMerkleTree, thisDeletedBlockSet, report); + }); + } catch (IOException ex) { + metrics.incrementMerkleTreeDiffFailures(); + throw new IOException("Container Diff failed for container #" + thisContainer.getContainerID(), ex); + } - return compareContainerMerkleTree(thisMerkleTree, peerMerkleTree); + // Update Container Diff metrics based on the diff report. + if (report.isHealthy()) { + metrics.incrementHealthyContainerDiffs(); + } else { + metrics.incrementUnhealthyContainerDiffs(); + } + metrics.incrementMerkleTreeDiffSuccesses(); + return report; } - private ContainerDiff compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree thisMerkleTree, - ContainerProtos.ContainerMerkleTree peerMerkleTree) { + private void compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree thisMerkleTree, + ContainerProtos.ContainerMerkleTree peerMerkleTree, + Set commonDeletedBlockSet, + ContainerDiff report) { - ContainerDiff report = new ContainerDiff(); if (thisMerkleTree.getDataChecksum() == peerMerkleTree.getDataChecksum()) { - return new ContainerDiff(); + return; } List thisBlockMerkleTreeList = thisMerkleTree.getBlockMerkleTreeList(); List peerBlockMerkleTreeList = peerMerkleTree.getBlockMerkleTreeList(); - // Since we are reconciling our container with a peer, We only need to go through the peer's block list - int peerIdx = 0, thisIdx = 0; - while (peerIdx < peerBlockMerkleTreeList.size() || thisIdx < thisBlockMerkleTreeList.size()) { - ContainerProtos.BlockMerkleTree thisBlockMerkleTree = thisIdx < thisBlockMerkleTreeList.size() ? - thisBlockMerkleTreeList.get(thisIdx) : null; - ContainerProtos.BlockMerkleTree peerBlockMerkleTree = peerIdx < peerBlockMerkleTreeList.size() ? - peerBlockMerkleTreeList.get(peerIdx) : null; - - // We have checked all the peer blocks, So we can return the ContainerDiff report - if (peerBlockMerkleTree == null) { - return report; - } + int thisIdx = 0, peerIdx = 0; - // peerBlockMerkleTree is not null, but thisBlockMerkleTree is null. Means we have missing blocks. - if (thisBlockMerkleTree == null) { - report.addMissingBlock(peerBlockMerkleTree); - peerIdx++; - continue; - } + // Step 1: Process both lists while elements are present in both + while (thisIdx < thisBlockMerkleTreeList.size() && peerIdx < peerBlockMerkleTreeList.size()) { + ContainerProtos.BlockMerkleTree thisBlockMerkleTree = thisBlockMerkleTreeList.get(thisIdx); + ContainerProtos.BlockMerkleTree peerBlockMerkleTree = peerBlockMerkleTreeList.get(peerIdx); - // BlockId matches for both thisBlockMerkleTree and peerBlockMerkleTree if (thisBlockMerkleTree.getBlockID() == peerBlockMerkleTree.getBlockID()) { - if (thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { - compareBlockMerkleTree(report, thisBlockMerkleTree, peerBlockMerkleTree); + // Matching block ID; check checksum + if (!commonDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) + && thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { + compareBlockMerkleTree(thisBlockMerkleTree, peerBlockMerkleTree, report); } + thisIdx++; peerIdx++; + } else if (thisBlockMerkleTree.getBlockID() < peerBlockMerkleTree.getBlockID()) { + // This block's ID is smaller; advance thisIdx to catch up thisIdx++; } else { - if (peerBlockMerkleTree.getBlockID() > thisBlockMerkleTree.getBlockID()) { - thisIdx++; - } else { - report.addMissingBlock(peerBlockMerkleTree); - peerIdx++; - } + // Peer block's ID is smaller; record missing block and advance peerIdx + report.addMissingBlock(peerBlockMerkleTree); + peerIdx++; } } - return report; + + // Step 2: Process remaining blocks in the peer list + while (peerIdx < peerBlockMerkleTreeList.size()) { + report.addMissingBlock(peerBlockMerkleTreeList.get(peerIdx)); + peerIdx++; + } } - private void compareBlockMerkleTree(ContainerDiff report, ContainerProtos.BlockMerkleTree thisBlockMerkleTree, - ContainerProtos.BlockMerkleTree peerBlockMerkleTree) { + private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMerkleTree, + ContainerProtos.BlockMerkleTree peerBlockMerkleTree, + ContainerDiff report) { List thisChunkMerkleTreeList = thisBlockMerkleTree.getChunkMerkleTreeList(); List peerChunkMerkleTreeList = peerBlockMerkleTree.getChunkMerkleTreeList(); + int thisIdx = 0, peerIdx = 0; - // Since we are reconciling our container with a peer, We only need to go through the peer's chunk list - int peerIdx = 0, thisIdx = 0; - while (peerIdx < peerChunkMerkleTreeList.size() || thisIdx < thisChunkMerkleTreeList.size()) { - ContainerProtos.ChunkMerkleTree thisChunkMerkleTree = thisIdx < thisChunkMerkleTreeList.size() ? - thisChunkMerkleTreeList.get(thisIdx) : null; - ContainerProtos.ChunkMerkleTree peerChunkMerkleTree = peerIdx < peerChunkMerkleTreeList.size() ? - peerChunkMerkleTreeList.get(peerIdx) : null; - - // We have checked all the peer chunks, So we can return the ContainerDiff report - if (peerChunkMerkleTree == null) { - return; - } - - // peerBlockMerkleTree is not null, but thisBlockMerkleTree is null. Means we have missing blocks. - if (thisChunkMerkleTree == null) { - report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); - peerIdx++; - continue; - } + // Step 1: Process both lists while elements are present in both + while (thisIdx < thisChunkMerkleTreeList.size() && peerIdx < peerChunkMerkleTreeList.size()) { + ContainerProtos.ChunkMerkleTree thisChunkMerkleTree = thisChunkMerkleTreeList.get(thisIdx); + ContainerProtos.ChunkMerkleTree peerChunkMerkleTree = peerChunkMerkleTreeList.get(peerIdx); if (thisChunkMerkleTree.getOffset() == peerChunkMerkleTree.getOffset()) { // Possible state when this Checksum != peer Checksum: @@ -252,19 +257,25 @@ private void compareBlockMerkleTree(ContainerDiff report, ContainerProtos.BlockM // thisTree = Unhealthy, peerTree = Unhealthy -> Do Nothing as both are corrupt. if (thisChunkMerkleTree.getChunkChecksum() != peerChunkMerkleTree.getChunkChecksum() && !thisChunkMerkleTree.getIsHealthy() && peerChunkMerkleTree.getIsHealthy()) { - report.addCorruptChunks(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + report.addCorruptChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); } + thisIdx++; peerIdx++; + } else if (thisChunkMerkleTree.getOffset() < peerChunkMerkleTree.getOffset()) { + // This chunk's offset is smaller; advance thisIdx thisIdx++; } else { - if (peerChunkMerkleTree.getOffset() > thisChunkMerkleTree.getOffset()) { - thisIdx++; - } else { - report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); - peerIdx++; - } + // Peer chunk's offset is smaller; record missing chunk and advance peerIdx + report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTree); + peerIdx++; } } + + // Step 2: Process remaining chunks in the peer list + while (peerIdx < peerChunkMerkleTreeList.size()) { + report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTreeList.get(peerIdx)); + peerIdx++; + } } /** @@ -364,13 +375,13 @@ public static boolean checksumFileExist(Container container) { */ public static class ContainerDiff { private final List missingBlocks; - private final SortedMap> missingChunks; - private final SortedMap> corruptChunks; + private final Map> missingChunks; + private final Map> corruptChunks; public ContainerDiff() { this.missingBlocks = new ArrayList<>(); - this.missingChunks = new TreeMap<>(); - this.corruptChunks = new TreeMap<>(); + this.missingChunks = new HashMap<>(); + this.corruptChunks = new HashMap<>(); } public void addMissingBlock(ContainerProtos.BlockMerkleTree missingBlockMerkleTree) { @@ -381,7 +392,7 @@ public void addMissingChunk(long blockId, ContainerProtos.ChunkMerkleTree missin this.missingChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(missingChunkMerkleTree); } - public void addCorruptChunks(long blockId, ContainerProtos.ChunkMerkleTree corruptChunk) { + public void addCorruptChunk(long blockId, ContainerProtos.ChunkMerkleTree corruptChunk) { this.corruptChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(corruptChunk); } @@ -389,19 +400,20 @@ public List getMissingBlocks() { return missingBlocks; } - public SortedMap> getMissingChunks() { + public Map> getMissingChunks() { return missingChunks; } - public SortedMap> getCorruptChunks() { + public Map> getCorruptChunks() { return corruptChunks; } + /** + * If isHealthy is true, It means current replica is healthy. The peer replica still may have corruption, + * which it will fix when it reconciles with other peers. + */ public boolean isHealthy() { - if (missingBlocks.isEmpty() && missingChunks.isEmpty() && corruptChunks.isEmpty()) { - return true; - } - return false; + return missingBlocks.isEmpty() && missingChunks.isEmpty() && corruptChunks.isEmpty(); } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index 6e7103fe21b2..116d9c3eb2d1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -54,6 +54,15 @@ public static void unregister() { @Metric(about = "Number of Merkle tree diff failure") private MutableCounterLong numMerkleTreeDiffFailure; + @Metric(about = "Number of Merkle tree diff success") + private MutableCounterLong numMerkleTreeDiffSuccess; + + @Metric(about = "Number of healthy container diff") + private MutableCounterLong numHealthyContainerDiff; + + @Metric(about = "Number of unhealthy container diff") + private MutableCounterLong numUnhealthyContainerDiff; + @Metric(about = "Merkle tree write latency") private MutableRate merkleTreeWriteLatencyNS; @@ -78,6 +87,17 @@ public void incrementMerkleTreeDiffFailures() { this.numMerkleTreeDiffFailure.incr(); } + public void incrementMerkleTreeDiffSuccesses() { + this.numMerkleTreeDiffSuccess.incr(); + } + + public void incrementHealthyContainerDiffs() { + this.numHealthyContainerDiff.incr(); + } + public void incrementUnhealthyContainerDiffs() { + this.numUnhealthyContainerDiff.incr(); + } + public MutableRate getWriteContainerMerkleTreeLatencyNS() { return this.merkleTreeWriteLatencyNS; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 8f47883a97b1..52febbaddee9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -138,42 +138,21 @@ public static ContainerProtos.ContainerChecksumInfo readChecksumFile(ContainerDa * structure is preserved throughout serialization, deserialization, and API calls. */ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { - final long blockID1 = 1; - final long blockID2 = 2; - final long blockID3 = 3; - final long blockID4 = 4; - final long blockID5 = 5; - ContainerProtos.ChunkInfo b1c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ContainerProtos.ChunkInfo b1c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{4, 5, 6})); - ContainerProtos.ChunkInfo b1c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{7, 8, 9})); - ContainerProtos.ChunkInfo b1c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{12, 11, 10})); - ContainerProtos.ChunkInfo b2c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); - ContainerProtos.ChunkInfo b2c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); - ContainerProtos.ChunkInfo b2c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{19, 20, 21})); - ContainerProtos.ChunkInfo b2c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{22, 23, 24})); - ContainerProtos.ChunkInfo b3c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{25, 26, 27})); - ContainerProtos.ChunkInfo b3c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{28, 29, 30})); - ContainerProtos.ChunkInfo b3c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{31, 32, 33})); - ContainerProtos.ChunkInfo b3c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{34, 35, 36})); - ContainerProtos.ChunkInfo b4c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{37, 38, 29})); - ContainerProtos.ChunkInfo b4c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{40, 41, 42})); - ContainerProtos.ChunkInfo b4c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{43, 44, 45})); - ContainerProtos.ChunkInfo b4c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{46, 47, 48})); - ContainerProtos.ChunkInfo b5c1 = buildChunk(conf, 0, ByteBuffer.wrap(new byte[]{49, 50, 51})); - ContainerProtos.ChunkInfo b5c2 = buildChunk(conf, 1, ByteBuffer.wrap(new byte[]{52, 53, 54})); - ContainerProtos.ChunkInfo b5c3 = buildChunk(conf, 2, ByteBuffer.wrap(new byte[]{55, 56, 57})); - ContainerProtos.ChunkInfo b5c4 = buildChunk(conf, 3, ByteBuffer.wrap(new byte[]{58, 59, 60})); - ContainerMerkleTree tree = new ContainerMerkleTree(); - tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2, b1c3, b1c4)); - tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2, b2c3, b2c4)); - tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2, b3c3, b3c4)); - tree.addChunks(blockID4, Arrays.asList(b4c1, b4c2, b4c3, b4c4)); - tree.addChunks(blockID5, Arrays.asList(b5c1, b5c2, b5c3, b5c4)); - + byte byteValue = 1; + for (int blockIndex = 1; blockIndex <= 5; blockIndex++) { + List chunks = new ArrayList<>(); + for (int chunkIndex = 0; chunkIndex < 4; chunkIndex++) { + chunks.add(buildChunk(conf, chunkIndex, ByteBuffer.wrap(new byte[]{byteValue++, byteValue++, byteValue++}))); + } + tree.addChunks(blockIndex, chunks); + } return tree; } + /** + * Returns a Pair of merkle tree and the expected container diff for that merkle tree. + */ public static Pair buildTestTreeWithMismatches( ConfigurationSource conf, int numMissingBlocks, int numMissingChunks, int numCorruptChunks) { @@ -182,7 +161,6 @@ public static Pair blockIds = new ArrayList<>(Arrays.asList(1L, 2L, 3L, 4L, 5L)); - introduceMissingBlocks(tree, diff, blockIds, numMissingBlocks, random); introduceMissingChunks(tree, diff, blockIds, numMissingChunks, random); introduceCorruptChunks(tree, diff, blockIds, numCorruptChunks, random, conf); @@ -255,7 +233,7 @@ private static void introduceCorruptChunks(ContainerMerkleTree tree, ContainerProtos.ChunkInfo corruptChunk = buildChunk(conf, (int) offset, ByteBuffer.wrap(new byte[]{5, 10, 15})); tree.addChunk(blockId, corruptChunk); blockTree.setHealthy(offset, false); - diff.addCorruptChunks(blockId, chunkMerkleTree.toProto()); + diff.addCorruptChunk(blockId, chunkMerkleTree.toProto()); // Mark this chunk as corrupted for this block corruptedChunksPerBlock.computeIfAbsent(blockId, k -> new HashSet<>()).add(offset); @@ -296,13 +274,8 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertEquals(expectedBlockMerkleTree.getChunkMerkleTreeCount(), actualBlockMerkleTree.getChunkMerkleTreeCount()); assertEquals(expectedBlockMerkleTree.getBlockChecksum(), actualBlockMerkleTree.getBlockChecksum()); - - for (int j = 0; j < expectedBlockMerkleTree.getChunkMerkleTreeCount(); j++) { - ContainerProtos.ChunkMerkleTree expectedChunk = expectedBlockMerkleTree.getChunkMerkleTree(j); - ContainerProtos.ChunkMerkleTree actualChunk = expectedBlockMerkleTree.getChunkMerkleTree(j); - assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), "Mismatch in chunk offset"); - assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), "Mismatch in chunk checksum"); - } + assertEqualsChunkMerkleTree(expectedBlockMerkleTree.getChunkMerkleTreeList(), + actualBlockMerkleTree.getChunkMerkleTreeList()); } // Check missing chunks @@ -319,7 +292,7 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertNotNull(actualChunks, "Missing chunks for block " + blockId + " not found in actual diff"); assertEquals(expectedChunks.size(), actualChunks.size(), "Mismatch in number of missing chunks for block " + blockId); - + assertEqualsChunkMerkleTree(expectedChunks, actualChunks); for (int i = 0; i < expectedChunks.size(); i++) { ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunks.get(i); ContainerProtos.ChunkMerkleTree actualChunk = actualChunks.get(i); @@ -344,15 +317,18 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertNotNull(actualChunks, "Corrupt chunks for block " + blockId + " not found in actual diff"); assertEquals(expectedChunks.size(), actualChunks.size(), "Mismatch in number of corrupt chunks for block " + blockId); + assertEqualsChunkMerkleTree(expectedChunks, actualChunks); + } + } - for (int i = 0; i < expectedChunks.size(); i++) { - ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunks.get(i); - ContainerProtos.ChunkMerkleTree actualChunk = actualChunks.get(i); - assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), - "Mismatch in chunk offset for block " + blockId); - assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), - "Mismatch in chunk checksum for block " + blockId); - } + private static void assertEqualsChunkMerkleTree(List expectedChunkMerkleTreeList, + List actualChunkMerkleTreeList) { + assertEquals(expectedChunkMerkleTreeList.size(), actualChunkMerkleTreeList.size()); + for (int j = 0; j < expectedChunkMerkleTreeList.size(); j++) { + ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunkMerkleTreeList.get(j); + ContainerProtos.ChunkMerkleTree actualChunk = actualChunkMerkleTreeList.get(j); + assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), "Mismatch in chunk offset"); + assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), "Mismatch in chunk checksum"); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 0771969a7d3d..0aad829d17a6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -66,8 +66,16 @@ class TestContainerChecksumTreeManager { private ContainerMerkleTreeMetrics metrics; private ConfigurationSource config; - public static Stream getParameters() { + /** + * The number of mismatched to be introduced in the container diff. The order is of follow, + * Number of missing blocks, Number of missing chunks, Number of corrupt chunks. + */ + public static Stream getContainerDiffMismatches() { return Stream.of( + Arguments.of(0, 0, 0), + Arguments.of(0, 0, 1), + Arguments.of(0, 1, 0), + Arguments.of(1, 0, 0), Arguments.of(1, 2, 3), Arguments.of(2, 3, 1), Arguments.of(3, 1, 2), @@ -76,7 +84,11 @@ public static Stream getParameters() { Arguments.of(3, 3, 3), Arguments.of(2, 1, 4), Arguments.of(2, 3, 4), - Arguments.of(1, 2, 4)); + Arguments.of(1, 2, 4), + Arguments.of(3, 3, 3), + Arguments.of(3, 3, 0), + Arguments.of(3, 0, 3), + Arguments.of(0, 3, 3)); } @BeforeEach @@ -324,18 +336,20 @@ public void testContainerWithNoDiff() throws IOException { ContainerMerkleTree ourMerkleTree = buildTestTree(config); ContainerMerkleTree peerMerkleTree = buildTestTree(config); checksumManager.writeContainerDataTree(container, ourMerkleTree); - ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( - new OzoneConfiguration()); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = - containerChecksumTreeManager.diff(container, peerChecksumInfo); + ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); + assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertTrue(diff.isHealthy()); } + /** + * Test if our merkle tree has missing blocks and chunks. If our tree has mismatches with respect to the + * peer then we need to include that mismatch in the container diff. + */ @ParameterizedTest - @MethodSource("getParameters") + @MethodSource("getContainerDiffMismatches") public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws IOException { Pair buildResult = @@ -344,36 +358,32 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC ContainerMerkleTree ourMerkleTree = buildResult.getLeft(); ContainerMerkleTree peerMerkleTree = buildTestTree(config); checksumManager.writeContainerDataTree(container, ourMerkleTree); - ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( - new OzoneConfiguration()); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = - containerChecksumTreeManager.diff(container, peerChecksumInfo); + ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); + assertTrue(metrics.getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); } /** - * Test if a peer which has missing blocks and chunks affects our container diff. - * Only if our merkle tree has missing entries from the peer we need to add it the Container Diff. + * Test if a peer which has missing blocks and chunks affects our container diff. If the peer tree has mismatches + * with respect to our merkle tree then we need to should not include that mismatch in the container diff. + * The ContainerDiff generated by the peer when it reconciles with our merkle tree will capture that mismatch. */ @ParameterizedTest - @MethodSource("getParameters") - public void testPeerWithMissingBlockAndMissingChunks(int numMissingBlock, int numMissingChunk, - int numCorruptChunk) throws IOException { + @MethodSource("getContainerDiffMismatches") + public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingChunk, + int numCorruptChunk) throws IOException { Pair buildResult = buildTestTreeWithMismatches(config, numMissingBlock, numMissingChunk, numCorruptChunk); ContainerMerkleTree ourMerkleTree = buildTestTree(config); ContainerMerkleTree peerMerkleTree = buildResult.getLeft(); checksumManager.writeContainerDataTree(container, ourMerkleTree); - ContainerChecksumTreeManager containerChecksumTreeManager = new ContainerChecksumTreeManager( - new OzoneConfiguration()); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = - containerChecksumTreeManager.diff(container, peerChecksumInfo); + ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(diff.isHealthy()); } From 11a256a9995648190e28cd0d8f2bad3f0f0b9837 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 13 Nov 2024 14:02:13 -0800 Subject: [PATCH 04/11] Address review comments --- .../ContainerChecksumTreeManager.java | 15 +++++----- .../checksum/ContainerMerkleTree.java | 28 ------------------- .../TestContainerChecksumTreeManager.java | 9 +++--- 3 files changed, 12 insertions(+), 40 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index b65e55f3d571..98eea96da25b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -184,10 +184,10 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C } // Update Container Diff metrics based on the diff report. - if (report.isHealthy()) { - metrics.incrementHealthyContainerDiffs(); - } else { + if (report.needsRepair()) { metrics.incrementUnhealthyContainerDiffs(); + } else { + metrics.incrementHealthyContainerDiffs(); } metrics.incrementMerkleTreeDiffSuccesses(); return report; @@ -409,11 +409,12 @@ public Map> getCorruptChunks() { } /** - * If isHealthy is true, It means current replica is healthy. The peer replica still may have corruption, - * which it will fix when it reconciles with other peers. + * If needRepair is true, It means current replica needs blocks/chunks from the peer to repair + * it's container replica. The peer replica still may have corruption, which it will fix when + * it reconciles with other peers. */ - public boolean isHealthy() { - return missingBlocks.isEmpty() && missingChunks.isEmpty() && corruptChunks.isEmpty(); + public boolean needsRepair() { + return !missingBlocks.isEmpty() || !missingChunks.isEmpty() || !corruptChunks.isEmpty(); } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index ce86b63dc89f..d1c91e46c818 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -67,11 +67,6 @@ public void addChunk(long blockID, ContainerProtos.ChunkInfo chunk) { id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunk(chunk); } - @VisibleForTesting - public BlockMerkleTree getChunks(long blockID) { - return id2Block.get(blockID); - } - @VisibleForTesting public BlockMerkleTree get(long blockID) { return id2Block.get(blockID); @@ -150,14 +145,6 @@ public ChunkMerkleTree removeChunk(long offset) { return offset2Chunk.remove(offset); } - public ChunkMerkleTree getChunk(long offset) { - return offset2Chunk.get(offset); - } - - public long getBlockId() { - return blockID; - } - /** * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. All block checksum * computation for the tree happens within this method. @@ -202,26 +189,11 @@ public static class ChunkMerkleTree { this.chunk = chunk; } - ChunkMerkleTree(ContainerProtos.ChunkInfo chunk, boolean isHealthy) { - this.chunk = chunk; - this.isHealthy = isHealthy; - } - - @VisibleForTesting - public void setChunk(ContainerProtos.ChunkInfo chunk) { - this.chunk = chunk; - } - @VisibleForTesting public void setHealthy(boolean healthy) { this.isHealthy = healthy; } - - public ContainerProtos.ChunkInfo getChunkInfo() { - return chunk; - } - /** * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this * method. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 0aad829d17a6..24b4df58e8cf 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -81,7 +81,6 @@ public static Stream getContainerDiffMismatches() { Arguments.of(3, 1, 2), Arguments.of(2, 2, 3), Arguments.of(3, 2, 2), - Arguments.of(3, 3, 3), Arguments.of(2, 1, 4), Arguments.of(2, 3, 4), Arguments.of(1, 2, 4), @@ -341,14 +340,14 @@ public void testContainerWithNoDiff() throws IOException { .setContainerMerkleTree(peerMerkleTree.toProto()).build(); ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); - assertTrue(diff.isHealthy()); + assertFalse(diff.needsRepair()); } /** * Test if our merkle tree has missing blocks and chunks. If our tree has mismatches with respect to the * peer then we need to include that mismatch in the container diff. */ - @ParameterizedTest + @ParameterizedTest(name = "Missing blocks: {0}, Missing chunks: {1}, Corrupt chunks: {2}") @MethodSource("getContainerDiffMismatches") public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws IOException { @@ -371,7 +370,7 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC * with respect to our merkle tree then we need to should not include that mismatch in the container diff. * The ContainerDiff generated by the peer when it reconciles with our merkle tree will capture that mismatch. */ - @ParameterizedTest + @ParameterizedTest(name = "Missing blocks: {0}, Missing chunks: {1}, Corrupt chunks: {2}") @MethodSource("getContainerDiffMismatches") public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws IOException { @@ -384,7 +383,7 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); - assertTrue(diff.isHealthy()); + assertFalse(diff.needsRepair()); } @Test From 1fdd3a5085e63b838bb59db9ecca23724d23050d Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 13 Nov 2024 16:00:43 -0800 Subject: [PATCH 05/11] Update deleted blocks logic --- .../ContainerChecksumTreeManager.java | 45 +++++++++++-------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 98eea96da25b..f00fdfe32163 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -168,15 +168,7 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C } ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); - - ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); - ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); - - Set thisDeletedBlockSet = new HashSet<>(thisChecksumInfo.getDeletedBlocksList()); - Set peerDeletedBlockSet = new HashSet<>(peerChecksumInfo.getDeletedBlocksList()); - // Common deleted blocks between two merkle trees; - thisDeletedBlockSet.retainAll(peerDeletedBlockSet); - compareContainerMerkleTree(thisMerkleTree, peerMerkleTree, thisDeletedBlockSet, report); + compareContainerMerkleTree(thisChecksumInfo, peerChecksumInfo, report); }); } catch (IOException ex) { metrics.incrementMerkleTreeDiffFailures(); @@ -193,10 +185,13 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C return report; } - private void compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree thisMerkleTree, - ContainerProtos.ContainerMerkleTree peerMerkleTree, - Set commonDeletedBlockSet, - ContainerDiff report) { + private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo thisChecksumInfo, + ContainerProtos.ContainerChecksumInfo peerChecksumInfo, + ContainerDiff report) { + ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); + ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); + Set thisDeletedBlockSet = new HashSet<>(thisChecksumInfo.getDeletedBlocksList()); + Set peerDeletedBlockSet = new HashSet<>(peerChecksumInfo.getDeletedBlocksList()); if (thisMerkleTree.getDataChecksum() == peerMerkleTree.getDataChecksum()) { return; @@ -212,9 +207,15 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree this ContainerProtos.BlockMerkleTree peerBlockMerkleTree = peerBlockMerkleTreeList.get(peerIdx); if (thisBlockMerkleTree.getBlockID() == peerBlockMerkleTree.getBlockID()) { - // Matching block ID; check checksum - if (!commonDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) - && thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { + // Matching block ID; check if the block is deleted and handle the cases; + // 1) If the block is deleted in both the block merkle tree, We can ignore comparing them. + // 2) If the block is only deleted in our merkle tree, The BG service should have deleted our + // block and the peer's BG service hasn't run yet. We can ignore comparing them. + // 3) If the block is only deleted in peer merkle tree, we can't reconcile for this block. It might be + // deleted by peer's BG service. We can ignore comparing them. + if (!thisDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && + !peerDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && + thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { compareBlockMerkleTree(thisBlockMerkleTree, peerBlockMerkleTree, report); } thisIdx++; @@ -223,15 +224,21 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerMerkleTree this // This block's ID is smaller; advance thisIdx to catch up thisIdx++; } else { - // Peer block's ID is smaller; record missing block and advance peerIdx - report.addMissingBlock(peerBlockMerkleTree); + // Peer block's ID is smaller; record missing block if peerDeletedBlockSet doesn't contain the blockId + // and advance peerIdx + if (!peerDeletedBlockSet.contains(peerBlockMerkleTree.getBlockID())) { + report.addMissingBlock(peerBlockMerkleTree); + } peerIdx++; } } // Step 2: Process remaining blocks in the peer list while (peerIdx < peerBlockMerkleTreeList.size()) { - report.addMissingBlock(peerBlockMerkleTreeList.get(peerIdx)); + ContainerProtos.BlockMerkleTree peerBlockMerkleTree = peerBlockMerkleTreeList.get(peerIdx); + if (!peerDeletedBlockSet.contains(peerBlockMerkleTree.getBlockID())) { + report.addMissingBlock(peerBlockMerkleTree); + } peerIdx++; } } From 9fc13b68c2260319fca04f0bb698612c8ec7b2ce Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 14 Nov 2024 13:47:02 -0800 Subject: [PATCH 06/11] Update tests. --- .../ContainerChecksumTreeManager.java | 10 +- .../checksum/ContainerMerkleTree.java | 36 ---- .../checksum/ContainerMerkleTreeMetrics.java | 16 +- .../container/keyvalue/KeyValueHandler.java | 4 +- .../ContainerMerkleTreeTestUtils.java | 158 +++++++++--------- .../TestContainerChecksumTreeManager.java | 48 +++--- .../TestContainerCommandReconciliation.java | 2 +- 7 files changed, 116 insertions(+), 158 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index f00fdfe32163..9fdb6daeea7e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -85,7 +85,7 @@ public void stop() { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) throws IOException { + public void writeContainerDataTree(ContainerData data, ContainerProtos.ContainerMerkleTree tree) throws IOException { long containerID = data.getContainerID(); Lock writeLock = getLock(containerID); writeLock.lock(); @@ -103,7 +103,7 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) checksumInfoBuilder .setContainerID(containerID) - .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)); + .setContainerMerkleTree(tree); write(data, checksumInfoBuilder.build()); LOG.debug("Data merkle tree for container {} updated", containerID); } finally { @@ -177,9 +177,9 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C // Update Container Diff metrics based on the diff report. if (report.needsRepair()) { - metrics.incrementUnhealthyContainerDiffs(); + metrics.incrementRepairContainerDiffs(); } else { - metrics.incrementHealthyContainerDiffs(); + metrics.incrementNoRepairContainerDiffs(); } metrics.incrementMerkleTreeDiffSuccesses(); return report; @@ -417,7 +417,7 @@ public Map> getCorruptChunks() { /** * If needRepair is true, It means current replica needs blocks/chunks from the peer to repair - * it's container replica. The peer replica still may have corruption, which it will fix when + * its container replica. The peer replica still may have corruption, which it will fix when * it reconciles with other peers. */ public boolean needsRepair() { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index d1c91e46c818..6562718ebcab 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -16,7 +16,6 @@ */ package org.apache.hadoop.ozone.container.checksum; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; @@ -63,20 +62,6 @@ public void addChunks(long blockID, Collection chunks id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } - public void addChunk(long blockID, ContainerProtos.ChunkInfo chunk) { - id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunk(chunk); - } - - @VisibleForTesting - public BlockMerkleTree get(long blockID) { - return id2Block.get(blockID); - } - - @VisibleForTesting - public BlockMerkleTree remove(long blockID) { - return id2Block.remove(blockID); - } - /** * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum * computation for the tree happens outside of this method. @@ -129,22 +114,6 @@ public void addChunks(Collection chunks) { } } - public void addChunk(ContainerProtos.ChunkInfo chunk) { - offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); - } - - public void setHealthy(long offset, boolean healthy) { - ChunkMerkleTree chunkMerkleTree = offset2Chunk.get(offset); - if (chunkMerkleTree == null) { - return; - } - chunkMerkleTree.setHealthy(healthy); - } - - public ChunkMerkleTree removeChunk(long offset) { - return offset2Chunk.remove(offset); - } - /** * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. All block checksum * computation for the tree happens within this method. @@ -189,11 +158,6 @@ public static class ChunkMerkleTree { this.chunk = chunk; } - @VisibleForTesting - public void setHealthy(boolean healthy) { - this.isHealthy = healthy; - } - /** * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this * method. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index 116d9c3eb2d1..e62bf2573575 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -57,11 +57,11 @@ public static void unregister() { @Metric(about = "Number of Merkle tree diff success") private MutableCounterLong numMerkleTreeDiffSuccess; - @Metric(about = "Number of healthy container diff") - private MutableCounterLong numHealthyContainerDiff; + @Metric(about = "Number of container diff that doesn't require repair") + private MutableCounterLong numNoRepairContainerDiff; - @Metric(about = "Number of unhealthy container diff") - private MutableCounterLong numUnhealthyContainerDiff; + @Metric(about = "Number of container diff that requires repair") + private MutableCounterLong numRepairContainerDiff; @Metric(about = "Merkle tree write latency") private MutableRate merkleTreeWriteLatencyNS; @@ -91,11 +91,11 @@ public void incrementMerkleTreeDiffSuccesses() { this.numMerkleTreeDiffSuccess.incr(); } - public void incrementHealthyContainerDiffs() { - this.numHealthyContainerDiff.incr(); + public void incrementNoRepairContainerDiffs() { + this.numNoRepairContainerDiff.incr(); } - public void incrementUnhealthyContainerDiffs() { - this.numUnhealthyContainerDiff.incr(); + public void incrementRepairContainerDiffs() { + this.numRepairContainerDiff.incr(); } public MutableRate getWriteContainerMerkleTreeLatencyNS() { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 66dc54e30247..4cd4c34ae63a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -137,6 +137,7 @@ import static org.apache.hadoop.ozone.ClientVersion.EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST; import static org.apache.hadoop.ozone.OzoneConsts.INCREMENTAL_CHUNK_LIST; import static org.apache.hadoop.ozone.container.common.interfaces.Container.ScanResult; +import static org.apache.hadoop.ozone.util.MetricUtil.captureLatencyNs; import org.apache.hadoop.util.Time; import org.apache.ratis.statemachine.StateMachine; @@ -563,7 +564,8 @@ private void createContainerMerkleTree(Container container) { merkleTree.addChunks(blockData.getLocalID(), chunkInfos); } } - checksumManager.writeContainerDataTree(containerData, merkleTree); + checksumManager.writeContainerDataTree(containerData, captureLatencyNs( + checksumManager.getMetrics().getCreateMerkleTreeLatencyNS(), merkleTree::toProto)); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 52febbaddee9..dda7bce7d56a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -153,105 +153,101 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { /** * Returns a Pair of merkle tree and the expected container diff for that merkle tree. */ - public static Pair buildTestTreeWithMismatches( - ConfigurationSource conf, int numMissingBlocks, int numMissingChunks, int numCorruptChunks) { + public static Pair + buildTestTreeWithMismatches(ContainerMerkleTree originalTree, int numMissingBlocks, int numMissingChunks, + int numCorruptChunks) { - ContainerMerkleTree tree = buildTestTree(conf); + ContainerProtos.ContainerMerkleTree.Builder treeBuilder = originalTree.toProto().toBuilder(); ContainerChecksumTreeManager.ContainerDiff diff = new ContainerChecksumTreeManager.ContainerDiff(); - Random random = new Random(); - - List blockIds = new ArrayList<>(Arrays.asList(1L, 2L, 3L, 4L, 5L)); - introduceMissingBlocks(tree, diff, blockIds, numMissingBlocks, random); - introduceMissingChunks(tree, diff, blockIds, numMissingChunks, random); - introduceCorruptChunks(tree, diff, blockIds, numCorruptChunks, random, conf); - return Pair.of(tree, diff); + introduceMissingBlocks(treeBuilder, numMissingBlocks, diff); + introduceMissingChunks(treeBuilder, numMissingChunks, diff); + introduceCorruptChunks(treeBuilder, numCorruptChunks, diff); + ContainerProtos.ContainerMerkleTree build = treeBuilder.build(); + return Pair.of(build, diff); } - private static void introduceMissingBlocks(ContainerMerkleTree tree, - ContainerChecksumTreeManager.ContainerDiff diff, - List blockIds, + /** + * Introduces missing blocks by removing random blocks from the tree. + */ + private static void introduceMissingBlocks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numMissingBlocks, - Random random) { - for (int i = 0; i < numMissingBlocks && !blockIds.isEmpty(); i++) { - int index = random.nextInt(blockIds.size()); - long blockId = blockIds.remove(index); - ContainerMerkleTree.BlockMerkleTree blockTree = tree.remove(blockId); - diff.addMissingBlock(blockTree.toProto()); + ContainerChecksumTreeManager.ContainerDiff diff) { + // Set to track unique blocks selected for mismatches + Set selectedBlocks = new HashSet<>(); + Random random = new Random(); + for (int i = 0; i < numMissingBlocks; i++) { + int randomBlockIndex; + do { + randomBlockIndex = random.nextInt(treeBuilder.getBlockMerkleTreeCount()); + } while (selectedBlocks.contains(randomBlockIndex)); + selectedBlocks.add(randomBlockIndex); + ContainerProtos.BlockMerkleTree blockMerkleTree = treeBuilder.getBlockMerkleTree(randomBlockIndex); + diff.addMissingBlock(blockMerkleTree); + treeBuilder.removeBlockMerkleTree(randomBlockIndex); + treeBuilder.setDataChecksum(random.nextLong()); } } - private static void introduceMissingChunks(ContainerMerkleTree tree, - ContainerChecksumTreeManager.ContainerDiff diff, - List blockIds, + /** + * Introduces missing chunks by removing random chunks from selected blocks. + */ + private static void introduceMissingChunks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numMissingChunks, - Random random) { - for (int i = 0; i < numMissingChunks && !blockIds.isEmpty(); i++) { - long blockId = blockIds.get(random.nextInt(blockIds.size())); - ContainerMerkleTree.BlockMerkleTree blockTree = tree.get(blockId); - List chunkOffsets = getChunkOffsets(blockTree); - - if (!chunkOffsets.isEmpty()) { - long offset = chunkOffsets.remove(random.nextInt(chunkOffsets.size())); - ContainerMerkleTree.ChunkMerkleTree chunkTree = blockTree.removeChunk(offset); - diff.addMissingChunk(blockId, chunkTree.toProto()); + ContainerChecksumTreeManager.ContainerDiff diff) { + // Set to track unique blocks selected for mismatches + Random random = new Random(); + for (int i = 0; i < numMissingChunks; i++) { + int randomBlockIndex = random.nextInt(treeBuilder.getBlockMerkleTreeCount()); + + // Work on the chosen block to remove a random chunk + ContainerProtos.BlockMerkleTree.Builder blockBuilder = treeBuilder.getBlockMerkleTreeBuilder(randomBlockIndex); + if (blockBuilder.getChunkMerkleTreeCount() > 0) { + int randomChunkIndex = random.nextInt(blockBuilder.getChunkMerkleTreeCount()); + ContainerProtos.ChunkMerkleTree chunkMerkleTree = blockBuilder.getChunkMerkleTree(randomChunkIndex); + diff.addMissingChunk(blockBuilder.getBlockID(), chunkMerkleTree); + blockBuilder.removeChunkMerkleTree(randomChunkIndex); + blockBuilder.setBlockChecksum(random.nextLong()); + treeBuilder.setDataChecksum(random.nextLong()); } } } - private static void introduceCorruptChunks(ContainerMerkleTree tree, - ContainerChecksumTreeManager.ContainerDiff diff, - List blockIds, + /** + * Introduces corrupt chunks by altering the checksum and setting them as unhealthy, + * ensuring each chunk in a block is only selected once for corruption. + */ + private static void introduceCorruptChunks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numCorruptChunks, - Random random, - ConfigurationSource conf) { - // Create a map to keep track of corrupted chunks per block - Map> corruptedChunksPerBlock = new HashMap<>(); - - int corruptionsIntroduced = 0; - while (corruptionsIntroduced < numCorruptChunks && !blockIds.isEmpty()) { - // Randomly select a block - int blockIndex = random.nextInt(blockIds.size()); - long blockId = blockIds.get(blockIndex); - ContainerMerkleTree.BlockMerkleTree blockTree = tree.get(blockId); - - // Get available chunk offsets for this block - List availableChunkOffsets = getChunkOffsets(blockTree); - - // Remove already corrupted chunks for this block - availableChunkOffsets.removeAll(corruptedChunksPerBlock.getOrDefault(blockId, new HashSet<>())); - - if (!availableChunkOffsets.isEmpty()) { - // Randomly select an available chunk offset - int chunkIndex = random.nextInt(availableChunkOffsets.size()); - long offset = availableChunkOffsets.get(chunkIndex); - - // Remove the original chunk - ContainerMerkleTree.ChunkMerkleTree chunkMerkleTree = blockTree.removeChunk(offset); - - // Create and add corrupt chunk - ContainerProtos.ChunkInfo corruptChunk = buildChunk(conf, (int) offset, ByteBuffer.wrap(new byte[]{5, 10, 15})); - tree.addChunk(blockId, corruptChunk); - blockTree.setHealthy(offset, false); - diff.addCorruptChunk(blockId, chunkMerkleTree.toProto()); - - // Mark this chunk as corrupted for this block - corruptedChunksPerBlock.computeIfAbsent(blockId, k -> new HashSet<>()).add(offset); - - corruptionsIntroduced++; - } else { - // If no available chunks in this block, remove it from consideration - blockIds.remove(blockIndex); + ContainerChecksumTreeManager.ContainerDiff diff) { + Map> corruptedChunksByBlock = new HashMap<>(); + Random random = new Random(); + + for (int i = 0; i < numCorruptChunks; i++) { + // Select a random block + int randomBlockIndex = random.nextInt(treeBuilder.getBlockMerkleTreeCount()); + ContainerProtos.BlockMerkleTree.Builder blockBuilder = treeBuilder.getBlockMerkleTreeBuilder(randomBlockIndex); + + // Ensure each chunk in the block is only corrupted once + Set corruptedChunks = corruptedChunksByBlock.computeIfAbsent(randomBlockIndex, k -> new HashSet<>()); + if (corruptedChunks.size() < blockBuilder.getChunkMerkleTreeCount()) { + int randomChunkIndex; + do { + randomChunkIndex = random.nextInt(blockBuilder.getChunkMerkleTreeCount()); + } while (corruptedChunks.contains(randomChunkIndex)); + corruptedChunks.add(randomChunkIndex); + + // Corrupt the selected chunk + ContainerProtos.ChunkMerkleTree.Builder chunkBuilder = blockBuilder.getChunkMerkleTreeBuilder(randomChunkIndex); + diff.addCorruptChunk(blockBuilder.getBlockID(), chunkBuilder.build()); + chunkBuilder.setChunkChecksum(chunkBuilder.getChunkChecksum() + random.nextInt(1000) + 1); + chunkBuilder.setIsHealthy(false); + blockBuilder.setBlockChecksum(random.nextLong()); + treeBuilder.setDataChecksum(random.nextLong()); } } } - private static List getChunkOffsets(ContainerMerkleTree.BlockMerkleTree blockTree) { - return blockTree.toProto().getChunkMerkleTreeList().stream() - .map(ContainerProtos.ChunkMerkleTree::getOffset) - .collect(Collectors.toList()); - } - public static void assertContainerDiffMatch(ContainerChecksumTreeManager.ContainerDiff expectedDiff, ContainerChecksumTreeManager.ContainerDiff actualDiff) { assertNotNull(expectedDiff, "Expected diff is null"); @@ -261,7 +257,7 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertEquals(expectedDiff.getMissingChunks().size(), actualDiff.getMissingChunks().size(), "Mismatch in number of missing chunks"); assertEquals(expectedDiff.getCorruptChunks().size(), actualDiff.getCorruptChunks().size(), - "Mismatch in number of corrupt blocks"); + "Mismatch in number of corrupt chunks"); List expectedMissingBlocks = expectedDiff.getMissingBlocks().stream().sorted( Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 24b4df58e8cf..6ebee543e8f0 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -105,9 +105,8 @@ public void init() { public void testWriteEmptyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); + checksumManager.writeContainerDataTree(container, new ContainerMerkleTree().toProto()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); @@ -138,11 +137,10 @@ public void testWriteOnlyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); - assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. @@ -201,13 +199,12 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); - assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); @@ -219,7 +216,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); @@ -228,7 +225,6 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); - assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); @@ -239,9 +235,9 @@ public void testReadContainerMerkleTreeMetric() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); } @@ -258,14 +254,14 @@ public void testTmpFileWriteFailure() throws Exception { assertFalse(tmpFile.exists()); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertFalse(tmpFile.exists()); assertTrue(finalFile.exists()); // Make the write to the tmp file fail by removing permissions on its parent. assertTrue(tmpFile.getParentFile().setWritable(false)); try { - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); fail("Write to the tmp file should have failed."); } catch (IOException ex) { LOG.info("Write to the tmp file failed as expected with the following exception: ", ex); @@ -282,7 +278,7 @@ public void testCorruptedFile() throws Exception { File finalFile = ContainerChecksumTreeManager.getContainerChecksumFile(container); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTrue(finalFile.exists()); // Corrupt the file so it is not a valid protobuf. @@ -294,7 +290,7 @@ public void testCorruptedFile() throws Exception { // The manager's read/modify/write cycle should account for the corruption and overwrite the entry. // No exception should be thrown. - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTreesSortedAndMatch(tree.toProto(), readChecksumFile(container).getContainerMerkleTree()); } @@ -308,7 +304,7 @@ public void testEmptyFile() throws Exception { File finalFile = ContainerChecksumTreeManager.getContainerChecksumFile(container); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); assertTrue(finalFile.exists()); // Truncate the file to zero length. @@ -324,7 +320,7 @@ public void testEmptyFile() throws Exception { // The manager's read/modify/write cycle should account for the empty file and overwrite it with a valid entry. // No exception should be thrown. - checksumManager.writeContainerDataTree(container, tree); + checksumManager.writeContainerDataTree(container, tree.toProto()); ContainerProtos.ContainerChecksumInfo info = readChecksumFile(container); assertTreesSortedAndMatch(tree.toProto(), info.getContainerMerkleTree()); assertEquals(CONTAINER_ID, info.getContainerID()); @@ -334,7 +330,7 @@ public void testEmptyFile() throws Exception { public void testContainerWithNoDiff() throws IOException { ContainerMerkleTree ourMerkleTree = buildTestTree(config); ContainerMerkleTree peerMerkleTree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, ourMerkleTree); + checksumManager.writeContainerDataTree(container, ourMerkleTree.toProto()); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); @@ -351,11 +347,11 @@ public void testContainerWithNoDiff() throws IOException { @MethodSource("getContainerDiffMismatches") public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws IOException { - Pair buildResult = - buildTestTreeWithMismatches(config, numMissingBlock, numMissingChunk, numCorruptChunk); - ContainerChecksumTreeManager.ContainerDiff expectedDiff = buildResult.getRight(); - ContainerMerkleTree ourMerkleTree = buildResult.getLeft(); ContainerMerkleTree peerMerkleTree = buildTestTree(config); + Pair buildResult = + buildTestTreeWithMismatches(peerMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); + ContainerChecksumTreeManager.ContainerDiff expectedDiff = buildResult.getRight(); + ContainerProtos.ContainerMerkleTree ourMerkleTree = buildResult.getLeft(); checksumManager.writeContainerDataTree(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) @@ -374,14 +370,14 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC @MethodSource("getContainerDiffMismatches") public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws IOException { - Pair buildResult = - buildTestTreeWithMismatches(config, numMissingBlock, numMissingChunk, numCorruptChunk); ContainerMerkleTree ourMerkleTree = buildTestTree(config); - ContainerMerkleTree peerMerkleTree = buildResult.getLeft(); - checksumManager.writeContainerDataTree(container, ourMerkleTree); + Pair buildResult = + buildTestTreeWithMismatches(ourMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); + ContainerProtos.ContainerMerkleTree peerMerkleTree = buildResult.getLeft(); + checksumManager.writeContainerDataTree(container, ourMerkleTree.toProto()); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) - .setContainerMerkleTree(peerMerkleTree.toProto()).build(); + .setContainerMerkleTree(peerMerkleTree).build(); ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertFalse(diff.needsRepair()); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 64af0148a872..01430b3a9169 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -284,7 +284,7 @@ public static void writeChecksumFileToDatanodes(long containerID, ContainerMerkl (KeyValueContainer) dn.getDatanodeStateMachine().getContainer().getController() .getContainer(containerID); keyValueHandler.getChecksumManager().writeContainerDataTree( - keyValueContainer.getContainerData(), tree); + keyValueContainer.getContainerData(), tree.toProto()); } } } From 9ada3f5efb3f2f3c92b979e2f33a756361cdef7c Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 14 Nov 2024 14:13:56 -0800 Subject: [PATCH 07/11] Address review comments. --- .../ContainerChecksumTreeManager.java | 20 +++++++++++++------ .../checksum/ContainerMerkleTreeMetrics.java | 16 +++++++++++++++ .../TestContainerChecksumTreeManager.java | 19 ++++++++++++++---- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 9fdb6daeea7e..55ef82f5364d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -150,7 +150,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) - throws IOException { + throws Exception { ContainerDiff report = new ContainerDiff(); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { @@ -170,9 +170,9 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); compareContainerMerkleTree(thisChecksumInfo, peerChecksumInfo, report); }); - } catch (IOException ex) { + } catch (Exception ex) { metrics.incrementMerkleTreeDiffFailures(); - throw new IOException("Container Diff failed for container #" + thisContainer.getContainerID(), ex); + throw new Exception("Container Diff failed for container #" + thisContainer.getContainerID(), ex); } // Update Container Diff metrics based on the diff report. @@ -221,7 +221,8 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo th thisIdx++; peerIdx++; } else if (thisBlockMerkleTree.getBlockID() < peerBlockMerkleTree.getBlockID()) { - // This block's ID is smaller; advance thisIdx to catch up + // this block merkle tree's block id is smaller. Which means our merkle tree has some blocks which the peer + // doesn't have. We can skip these, the peer will pick up these block when it reconciles with our merkle tree. thisIdx++; } else { // Peer block's ID is smaller; record missing block if peerDeletedBlockSet doesn't contain the blockId @@ -241,6 +242,9 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo th } peerIdx++; } + + // If we have remaining block in thisMerkleTree, we can skip these blocks. The peers will pick this block from + // us when they reconcile. } private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMerkleTree, @@ -258,7 +262,7 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer if (thisChunkMerkleTree.getOffset() == peerChunkMerkleTree.getOffset()) { // Possible state when this Checksum != peer Checksum: - // thisTree = Healthy, peerTree = Healthy -> We don't know what is healthy. Skip. + // thisTree = Healthy, peerTree = Healthy -> Both are healthy, No repair needed. Skip. // thisTree = Unhealthy, peerTree = Healthy -> Add to corrupt chunk. // thisTree = Healthy, peerTree = unhealthy -> Do nothing as thisTree is healthy. // thisTree = Unhealthy, peerTree = Unhealthy -> Do Nothing as both are corrupt. @@ -269,7 +273,8 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer thisIdx++; peerIdx++; } else if (thisChunkMerkleTree.getOffset() < peerChunkMerkleTree.getOffset()) { - // This chunk's offset is smaller; advance thisIdx + // this chunk merkle tree's offset is smaller. Which means our merkle tree has some chunks which the peer + // doesn't have. We can skip these, the peer will pick up these chunks when it reconciles with our merkle tree. thisIdx++; } else { // Peer chunk's offset is smaller; record missing chunk and advance peerIdx @@ -283,6 +288,9 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer report.addMissingChunk(peerBlockMerkleTree.getBlockID(), peerChunkMerkleTreeList.get(peerIdx)); peerIdx++; } + + // If we have remaining chunks in thisBlockMerkleTree, we can skip these chunks. The peers will pick these + // chunks from us when they reconcile. } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index e62bf2573575..4e89fa027b4f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -113,4 +113,20 @@ public MutableRate getCreateMerkleTreeLatencyNS() { public MutableRate getMerkleTreeDiffLatencyNS() { return this.merkleTreeDiffLatencyNS; } + + public long getNoRepairContainerDiffs() { + return this.numNoRepairContainerDiff.value(); + } + + public long getRepairContainerDiffs() { + return this.numRepairContainerDiff.value(); + } + + public long getMerkleTreeDiffSuccess() { + return this.numMerkleTreeDiffSuccess.value(); + } + + public long getMerkleTreeDiffFailure() { + return this.numMerkleTreeDiffFailure.value(); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 6ebee543e8f0..b8274a3b1cb1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -72,7 +72,6 @@ class TestContainerChecksumTreeManager { */ public static Stream getContainerDiffMismatches() { return Stream.of( - Arguments.of(0, 0, 0), Arguments.of(0, 0, 1), Arguments.of(0, 1, 0), Arguments.of(1, 0, 0), @@ -327,7 +326,7 @@ public void testEmptyFile() throws Exception { } @Test - public void testContainerWithNoDiff() throws IOException { + public void testContainerWithNoDiff() throws Exception { ContainerMerkleTree ourMerkleTree = buildTestTree(config); ContainerMerkleTree peerMerkleTree = buildTestTree(config); checksumManager.writeContainerDataTree(container, ourMerkleTree.toProto()); @@ -337,6 +336,8 @@ public void testContainerWithNoDiff() throws IOException { ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertFalse(diff.needsRepair()); + assertTrue(checksumManager.getMetrics().getNoRepairContainerDiffs() > 0); + assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); } /** @@ -346,7 +347,7 @@ public void testContainerWithNoDiff() throws IOException { @ParameterizedTest(name = "Missing blocks: {0}, Missing chunks: {1}, Corrupt chunks: {2}") @MethodSource("getContainerDiffMismatches") public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, - int numCorruptChunk) throws IOException { + int numCorruptChunk) throws Exception { ContainerMerkleTree peerMerkleTree = buildTestTree(config); Pair buildResult = buildTestTreeWithMismatches(peerMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); @@ -359,6 +360,8 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(metrics.getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); + assertTrue(checksumManager.getMetrics().getRepairContainerDiffs() > 0); + assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); } /** @@ -369,7 +372,7 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC @ParameterizedTest(name = "Missing blocks: {0}, Missing chunks: {1}, Corrupt chunks: {2}") @MethodSource("getContainerDiffMismatches") public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingChunk, - int numCorruptChunk) throws IOException { + int numCorruptChunk) throws Exception { ContainerMerkleTree ourMerkleTree = buildTestTree(config); Pair buildResult = buildTestTreeWithMismatches(ourMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); @@ -380,6 +383,14 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC .setContainerMerkleTree(peerMerkleTree).build(); ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); assertFalse(diff.needsRepair()); + assertTrue(checksumManager.getMetrics().getNoRepairContainerDiffs() > 0); + assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); + } + + @Test + public void testFailureContainerMerkleTreeMetric() { + assertThrows(Exception.class, () -> checksumManager.diff(container, null)); + assertTrue(checksumManager.getMetrics().getMerkleTreeDiffFailure() > 0); } @Test From 82b3d5915334c75a70cb6f5065f00db2b85c4340 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 15 Nov 2024 10:29:10 -0800 Subject: [PATCH 08/11] Address tests for deleted blocks. --- .../TestContainerChecksumTreeManager.java | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b8274a3b1cb1..8e12a58147a3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -393,6 +393,96 @@ public void testFailureContainerMerkleTreeMetric() { assertTrue(checksumManager.getMetrics().getMerkleTreeDiffFailure() > 0); } + /** + * Test to check if the container diff consists of blocks that are missing in our merkle tree but + * they are deleted in the peer's merkle tree. + */ + @Test + void testDeletedBlocksInPeerAndBoth() throws Exception { + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + // Introduce missing blocks in our merkle tree + ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 3, 0, 0).getLeft(); + List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); + // Mark all the blocks as deleted in peer merkle tree + ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID) + .addAllDeletedBlocks(deletedBlockList); + + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + + // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted + // in peer merkle tree. + assertTrue(containerDiff.getMissingBlocks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + + // Delete blocks in our merkle tree as well. + checksumManager.markBlocksAsDeleted(container, deletedBlockList); + containerDiff = checksumManager.diff(container, peerChecksumInfo); + + // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted + // in both merkle tree. + assertTrue(containerDiff.getMissingBlocks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + } + + /** + * Test to check if the container diff consists of blocks that are corrupted in our merkle tree but also deleted in + * our merkle tree. + */ + @Test + void testDeletedBlocksInOurContainerOnly() throws Exception { + // Setup deleted blocks only in the peer container checksum + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + // Introduce block corruption in our merkle tree. + ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 0, 3, 3).getLeft(); + List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); + ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID); + + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + checksumManager.markBlocksAsDeleted(container, deletedBlockList); + + ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + + // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted + // in our merkle tree. + assertTrue(containerDiff.getMissingBlocks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + } + + /** + * Test to check if the container diff consists of blocks that are corrupted in our merkle tree but also deleted in + * our peer tree. + */ + @Test + void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { + // Setup deleted blocks only in the peer container checksum + ContainerMerkleTree peerMerkleTree = buildTestTree(config); + // Introduce block corruption in our merkle tree. + ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 0, 3, 3).getLeft(); + List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); + ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID) + .addAllDeletedBlocks(deletedBlockList); + + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + + ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + + // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted + // in peer merkle tree. + assertTrue(containerDiff.getMissingBlocks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + assertTrue(containerDiff.getMissingChunks().isEmpty()); + } + @Test public void testChecksumTreeFilePath() { assertEquals(checksumFile.getAbsolutePath(), From f1679cee6668550eef76ffb870be11db4d8c28df Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 18 Nov 2024 11:09:03 -0800 Subject: [PATCH 09/11] Address review comments. --- .../ContainerChecksumTreeManager.java | 68 ++------------- .../checksum/ContainerDiffReport.java | 75 ++++++++++++++++ .../checksum/ContainerMerkleTree.java | 4 +- .../container/keyvalue/KeyValueHandler.java | 4 +- .../BackgroundContainerDataScanner.java | 7 +- .../ContainerMerkleTreeTestUtils.java | 86 ++++++++++++------- .../TestContainerChecksumTreeManager.java | 79 +++++++++-------- .../TestContainerCommandReconciliation.java | 2 +- 8 files changed, 184 insertions(+), 141 deletions(-) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 55ef82f5364d..39d265a4ab2d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -30,11 +30,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Collection; import java.util.Set; @@ -85,7 +82,7 @@ public void stop() { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerDataTree(ContainerData data, ContainerProtos.ContainerMerkleTree tree) throws IOException { + public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) throws IOException { long containerID = data.getContainerID(); Lock writeLock = getLock(containerID); writeLock.lock(); @@ -103,7 +100,7 @@ public void writeContainerDataTree(ContainerData data, ContainerProtos.Container checksumInfoBuilder .setContainerID(containerID) - .setContainerMerkleTree(tree); + .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)); write(data, checksumInfoBuilder.build()); LOG.debug("Data merkle tree for container {} updated", containerID); } finally { @@ -149,9 +146,10 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } } - public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) - throws Exception { - ContainerDiff report = new ContainerDiff(); + public ContainerDiffReport diff(KeyValueContainerData thisContainer, + ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws Exception { + + ContainerDiffReport report = new ContainerDiffReport(); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { Preconditions.assertNotNull(thisContainer, "Container data is null"); @@ -187,7 +185,7 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo thisChecksumInfo, ContainerProtos.ContainerChecksumInfo peerChecksumInfo, - ContainerDiff report) { + ContainerDiffReport report) { ContainerProtos.ContainerMerkleTree thisMerkleTree = thisChecksumInfo.getContainerMerkleTree(); ContainerProtos.ContainerMerkleTree peerMerkleTree = peerChecksumInfo.getContainerMerkleTree(); Set thisDeletedBlockSet = new HashSet<>(thisChecksumInfo.getDeletedBlocksList()); @@ -213,6 +211,7 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo th // block and the peer's BG service hasn't run yet. We can ignore comparing them. // 3) If the block is only deleted in peer merkle tree, we can't reconcile for this block. It might be // deleted by peer's BG service. We can ignore comparing them. + // TODO: Handle missed block deletions from the deleted block ids. if (!thisDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && !peerDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { @@ -249,7 +248,7 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo th private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMerkleTree, ContainerProtos.BlockMerkleTree peerBlockMerkleTree, - ContainerDiff report) { + ContainerDiffReport report) { List thisChunkMerkleTreeList = thisBlockMerkleTree.getChunkMerkleTreeList(); List peerChunkMerkleTreeList = peerBlockMerkleTree.getChunkMerkleTreeList(); @@ -383,53 +382,4 @@ public static boolean checksumFileExist(Container container) { return checksumFile.exists(); } - /** - * This class represents the difference between our replica of a container and a peer's replica of a container. - * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. - * - */ - public static class ContainerDiff { - private final List missingBlocks; - private final Map> missingChunks; - private final Map> corruptChunks; - - public ContainerDiff() { - this.missingBlocks = new ArrayList<>(); - this.missingChunks = new HashMap<>(); - this.corruptChunks = new HashMap<>(); - } - - public void addMissingBlock(ContainerProtos.BlockMerkleTree missingBlockMerkleTree) { - this.missingBlocks.add(missingBlockMerkleTree); - } - - public void addMissingChunk(long blockId, ContainerProtos.ChunkMerkleTree missingChunkMerkleTree) { - this.missingChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(missingChunkMerkleTree); - } - - public void addCorruptChunk(long blockId, ContainerProtos.ChunkMerkleTree corruptChunk) { - this.corruptChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(corruptChunk); - } - - public List getMissingBlocks() { - return missingBlocks; - } - - public Map> getMissingChunks() { - return missingChunks; - } - - public Map> getCorruptChunks() { - return corruptChunks; - } - - /** - * If needRepair is true, It means current replica needs blocks/chunks from the peer to repair - * its container replica. The peer replica still may have corruption, which it will fix when - * it reconciles with other peers. - */ - public boolean needsRepair() { - return !missingBlocks.isEmpty() || !missingChunks.isEmpty() || !corruptChunks.isEmpty(); - } - } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java new file mode 100644 index 000000000000..6d1a07ed8e40 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * This class represents the difference between our replica of a container and a peer's replica of a container. + * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. + */ +public class ContainerDiffReport { + private final List missingBlocks; + private final Map> missingChunks; + private final Map> corruptChunks; + + public ContainerDiffReport() { + this.missingBlocks = new ArrayList<>(); + this.missingChunks = new HashMap<>(); + this.corruptChunks = new HashMap<>(); + } + + public void addMissingBlock(ContainerProtos.BlockMerkleTree missingBlockMerkleTree) { + this.missingBlocks.add(missingBlockMerkleTree); + } + + public void addMissingChunk(long blockId, ContainerProtos.ChunkMerkleTree missingChunkMerkleTree) { + this.missingChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(missingChunkMerkleTree); + } + + public void addCorruptChunk(long blockId, ContainerProtos.ChunkMerkleTree corruptChunk) { + this.corruptChunks.computeIfAbsent(blockId, any -> new ArrayList<>()).add(corruptChunk); + } + + public List getMissingBlocks() { + return missingBlocks; + } + + public Map> getMissingChunks() { + return missingChunks; + } + + public Map> getCorruptChunks() { + return corruptChunks; + } + + /** + * If needRepair is true, It means current replica needs blocks/chunks from the peer to repair + * its container replica. The peer replica still may have corruption, which it will fix when + * it reconciles with other peers. + */ + public boolean needsRepair() { + return !missingBlocks.isEmpty() || !missingChunks.isEmpty() || !corruptChunks.isEmpty(); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 6562718ebcab..7dba5b4309ce 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -91,7 +91,7 @@ public ContainerProtos.ContainerMerkleTree toProto() { /** * Represents a merkle tree for a single block within a container. */ - public static class BlockMerkleTree { + private static class BlockMerkleTree { // Map of each offset within the block to its chunk info. // Chunk order in the checksum is determined by their offset. private final SortedMap offset2Chunk; @@ -150,7 +150,7 @@ public ContainerProtos.BlockMerkleTree toProto() { * Each chunk has multiple checksums within it at each "bytesPerChecksum" interval. * This class computes one checksum for the whole chunk by aggregating these. */ - public static class ChunkMerkleTree { + private static class ChunkMerkleTree { private ContainerProtos.ChunkInfo chunk; private boolean isHealthy = true; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 664bcd5fa885..d587748e6f80 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -138,7 +138,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import static org.apache.hadoop.ozone.ClientVersion.EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST; import static org.apache.hadoop.ozone.OzoneConsts.INCREMENTAL_CHUNK_LIST; -import static org.apache.hadoop.ozone.util.MetricUtil.captureLatencyNs; import org.apache.hadoop.util.Time; import org.apache.ratis.statemachine.StateMachine; @@ -565,8 +564,7 @@ private void createContainerMerkleTree(Container container) { merkleTree.addChunks(blockData.getLocalID(), chunkInfos); } } - checksumManager.writeContainerDataTree(containerData, captureLatencyNs( - checksumManager.getMetrics().getCreateMerkleTreeLatencyNS(), merkleTree::toProto)); + checksumManager.writeContainerDataTree(containerData, merkleTree); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index d486daf932e8..1a4f0bf64608 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; @@ -34,8 +33,6 @@ import java.util.Iterator; import java.util.Optional; -import static org.apache.hadoop.ozone.util.MetricUtil.captureLatencyNs; - /** * Data scanner that full checks a volume. Each volume gets a separate thread. */ @@ -106,9 +103,7 @@ public void scanContainer(Container c) metrics.incNumUnHealthyContainers(); } } - ContainerMerkleTree dataTree = result.getDataTree(); - checksumManager.writeContainerDataTree(containerData, captureLatencyNs( - checksumManager.getMetrics().getCreateMerkleTreeLatencyNS(), dataTree::toProto)); + checksumManager.writeContainerDataTree(containerData, result.getDataTree()); metrics.incNumContainersScanned(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index dda7bce7d56a..c74423fb19c2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -30,7 +30,9 @@ import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -44,6 +46,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -52,7 +55,8 @@ * Helper methods for testing container checksum tree files and container reconciliation. */ public final class ContainerMerkleTreeTestUtils { - private ContainerMerkleTreeTestUtils() { } + private ContainerMerkleTreeTestUtils() { + } public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, ContainerProtos.ContainerMerkleTree actualTree) { @@ -94,10 +98,10 @@ public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree * as either the leaves of pre-computed merkle trees that serve as expected values, or as building blocks to pass * to ContainerMerkleTree to have it build the whole tree from this information. * - * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated - * from this based on a fixed length. + * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated + * from this based on a fixed length. * @param chunkChecksums The checksums within the chunk. Each is assumed to apply to a fixed value - * "bytesPerChecksum" amount of data and are assumed to be contiguous. + * "bytesPerChecksum" amount of data and are assumed to be contiguous. * @return The ChunkInfo proto object built from this information. */ public static ContainerProtos.ChunkInfo buildChunk(ConfigurationSource config, int indexInBlock, @@ -116,11 +120,11 @@ public static ContainerProtos.ChunkInfo buildChunk(ConfigurationSource config, i .build(); return ContainerProtos.ChunkInfo.newBuilder() - .setChecksumData(checksumData) - .setChunkName("chunk") - .setOffset(indexInBlock) - .setLen(chunkSize) - .build(); + .setChecksumData(checksumData) + .setChunkName("chunk") + .setOffset(indexInBlock) + .setLen(chunkSize) + .build(); } /** @@ -128,7 +132,7 @@ public static ContainerProtos.ChunkInfo buildChunk(ConfigurationSource config, i * and writers within a datanode. */ public static ContainerProtos.ContainerChecksumInfo readChecksumFile(ContainerData data) throws IOException { - try (FileInputStream inStream = new FileInputStream(ContainerChecksumTreeManager.getContainerChecksumFile(data))) { + try (FileInputStream inStream = new FileInputStream(getContainerChecksumFile(data))) { return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); } } @@ -138,9 +142,13 @@ public static ContainerProtos.ContainerChecksumInfo readChecksumFile(ContainerDa * structure is preserved throughout serialization, deserialization, and API calls. */ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { + return buildTestTree(conf, 5); + } + + public static ContainerMerkleTree buildTestTree(ConfigurationSource conf, int numBlocks) { ContainerMerkleTree tree = new ContainerMerkleTree(); byte byteValue = 1; - for (int blockIndex = 1; blockIndex <= 5; blockIndex++) { + for (int blockIndex = 1; blockIndex <= numBlocks; blockIndex++) { List chunks = new ArrayList<>(); for (int chunkIndex = 0; chunkIndex < 4; chunkIndex++) { chunks.add(buildChunk(conf, chunkIndex, ByteBuffer.wrap(new byte[]{byteValue++, byteValue++, byteValue++}))); @@ -153,12 +161,12 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { /** * Returns a Pair of merkle tree and the expected container diff for that merkle tree. */ - public static Pair + public static Pair buildTestTreeWithMismatches(ContainerMerkleTree originalTree, int numMissingBlocks, int numMissingChunks, int numCorruptChunks) { ContainerProtos.ContainerMerkleTree.Builder treeBuilder = originalTree.toProto().toBuilder(); - ContainerChecksumTreeManager.ContainerDiff diff = new ContainerChecksumTreeManager.ContainerDiff(); + ContainerDiffReport diff = new ContainerDiffReport(); introduceMissingBlocks(treeBuilder, numMissingBlocks, diff); introduceMissingChunks(treeBuilder, numMissingChunks, diff); @@ -172,7 +180,7 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf) { */ private static void introduceMissingBlocks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numMissingBlocks, - ContainerChecksumTreeManager.ContainerDiff diff) { + ContainerDiffReport diff) { // Set to track unique blocks selected for mismatches Set selectedBlocks = new HashSet<>(); Random random = new Random(); @@ -194,7 +202,7 @@ private static void introduceMissingBlocks(ContainerProtos.ContainerMerkleTree.B */ private static void introduceMissingChunks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numMissingChunks, - ContainerChecksumTreeManager.ContainerDiff diff) { + ContainerDiffReport diff) { // Set to track unique blocks selected for mismatches Random random = new Random(); for (int i = 0; i < numMissingChunks; i++) { @@ -219,7 +227,7 @@ private static void introduceMissingChunks(ContainerProtos.ContainerMerkleTree.B */ private static void introduceCorruptChunks(ContainerProtos.ContainerMerkleTree.Builder treeBuilder, int numCorruptChunks, - ContainerChecksumTreeManager.ContainerDiff diff) { + ContainerDiffReport diff) { Map> corruptedChunksByBlock = new HashMap<>(); Random random = new Random(); @@ -248,8 +256,8 @@ private static void introduceCorruptChunks(ContainerProtos.ContainerMerkleTree.B } } - public static void assertContainerDiffMatch(ContainerChecksumTreeManager.ContainerDiff expectedDiff, - ContainerChecksumTreeManager.ContainerDiff actualDiff) { + public static void assertContainerDiffMatch(ContainerDiffReport expectedDiff, + ContainerDiffReport actualDiff) { assertNotNull(expectedDiff, "Expected diff is null"); assertNotNull(actualDiff, "Actual diff is null"); assertEquals(expectedDiff.getMissingBlocks().size(), actualDiff.getMissingBlocks().size(), @@ -260,7 +268,7 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain "Mismatch in number of corrupt chunks"); List expectedMissingBlocks = expectedDiff.getMissingBlocks().stream().sorted( - Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); + Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); List actualMissingBlocks = expectedDiff.getMissingBlocks().stream().sorted( Comparator.comparing(ContainerProtos.BlockMerkleTree::getBlockID)).collect(Collectors.toList()); for (int i = 0; i < expectedMissingBlocks.size(); i++) { @@ -271,7 +279,7 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain actualBlockMerkleTree.getChunkMerkleTreeCount()); assertEquals(expectedBlockMerkleTree.getBlockChecksum(), actualBlockMerkleTree.getBlockChecksum()); assertEqualsChunkMerkleTree(expectedBlockMerkleTree.getChunkMerkleTreeList(), - actualBlockMerkleTree.getChunkMerkleTreeList()); + actualBlockMerkleTree.getChunkMerkleTreeList(), expectedBlockMerkleTree.getBlockID()); } // Check missing chunks @@ -288,15 +296,7 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertNotNull(actualChunks, "Missing chunks for block " + blockId + " not found in actual diff"); assertEquals(expectedChunks.size(), actualChunks.size(), "Mismatch in number of missing chunks for block " + blockId); - assertEqualsChunkMerkleTree(expectedChunks, actualChunks); - for (int i = 0; i < expectedChunks.size(); i++) { - ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunks.get(i); - ContainerProtos.ChunkMerkleTree actualChunk = actualChunks.get(i); - assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), - "Mismatch in chunk offset for block " + blockId); - assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), - "Mismatch in chunk checksum for block " + blockId); - } + assertEqualsChunkMerkleTree(expectedChunks, actualChunks, blockId); } // Check corrupt chunks @@ -313,18 +313,21 @@ public static void assertContainerDiffMatch(ContainerChecksumTreeManager.Contain assertNotNull(actualChunks, "Corrupt chunks for block " + blockId + " not found in actual diff"); assertEquals(expectedChunks.size(), actualChunks.size(), "Mismatch in number of corrupt chunks for block " + blockId); - assertEqualsChunkMerkleTree(expectedChunks, actualChunks); + assertEqualsChunkMerkleTree(expectedChunks, actualChunks, blockId); } } private static void assertEqualsChunkMerkleTree(List expectedChunkMerkleTreeList, - List actualChunkMerkleTreeList) { + List actualChunkMerkleTreeList, + Long blockId) { assertEquals(expectedChunkMerkleTreeList.size(), actualChunkMerkleTreeList.size()); for (int j = 0; j < expectedChunkMerkleTreeList.size(); j++) { ContainerProtos.ChunkMerkleTree expectedChunk = expectedChunkMerkleTreeList.get(j); ContainerProtos.ChunkMerkleTree actualChunk = actualChunkMerkleTreeList.get(j); - assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), "Mismatch in chunk offset"); - assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), "Mismatch in chunk checksum"); + assertEquals(expectedChunk.getOffset(), actualChunk.getOffset(), "Mismatch in chunk offset for block " + + blockId); + assertEquals(expectedChunk.getChunkChecksum(), actualChunk.getChunkChecksum(), + "Mismatch in chunk checksum for block " + blockId); } } @@ -337,4 +340,21 @@ public static boolean containerChecksumFileExists(HddsDatanodeService hddsDatano Container container = ozoneContainer.getController().getContainer(containerInfo.getContainerID()); return ContainerChecksumTreeManager.checksumFileExist(container); } + + public static void writeContainerDataTreeProto(ContainerData data, ContainerProtos.ContainerMerkleTree tree) + throws IOException { + ContainerProtos.ContainerChecksumInfo checksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(data.getContainerID()) + .setContainerMerkleTree(tree).build(); + File checksumFile = getContainerChecksumFile(data); + + try (FileOutputStream outputStream = new FileOutputStream(checksumFile)) { + checksumInfo.writeTo(outputStream); + } catch (IOException ex) { + // If the move failed and left behind the tmp file, the tmp file will be overwritten on the next successful write. + // Nothing reads directly from the tmp file. + throw new IOException("Error occurred when writing container merkle tree for containerID " + + data.getContainerID(), ex); + } + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 8e12a58147a3..b5a5ced2385a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -45,6 +45,7 @@ import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTreeWithMismatches; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -67,8 +68,8 @@ class TestContainerChecksumTreeManager { private ConfigurationSource config; /** - * The number of mismatched to be introduced in the container diff. The order is of follow, - * Number of missing blocks, Number of missing chunks, Number of corrupt chunks. + * The number of mismatched to be introduced in the container diff. The arguments are + * number of missing blocks, number of missing chunks, number of corrupt chunks. */ public static Stream getContainerDiffMismatches() { return Stream.of( @@ -104,8 +105,9 @@ public void init() { public void testWriteEmptyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.writeContainerDataTree(container, new ContainerMerkleTree().toProto()); + checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); @@ -136,10 +138,11 @@ public void testWriteOnlyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. @@ -198,12 +201,13 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); @@ -215,7 +219,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); @@ -224,6 +228,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); @@ -234,9 +239,9 @@ public void testReadContainerMerkleTreeMetric() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); } @@ -253,14 +258,14 @@ public void testTmpFileWriteFailure() throws Exception { assertFalse(tmpFile.exists()); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertFalse(tmpFile.exists()); assertTrue(finalFile.exists()); // Make the write to the tmp file fail by removing permissions on its parent. assertTrue(tmpFile.getParentFile().setWritable(false)); try { - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); fail("Write to the tmp file should have failed."); } catch (IOException ex) { LOG.info("Write to the tmp file failed as expected with the following exception: ", ex); @@ -277,7 +282,7 @@ public void testCorruptedFile() throws Exception { File finalFile = ContainerChecksumTreeManager.getContainerChecksumFile(container); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTrue(finalFile.exists()); // Corrupt the file so it is not a valid protobuf. @@ -289,7 +294,7 @@ public void testCorruptedFile() throws Exception { // The manager's read/modify/write cycle should account for the corruption and overwrite the entry. // No exception should be thrown. - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTreesSortedAndMatch(tree.toProto(), readChecksumFile(container).getContainerMerkleTree()); } @@ -303,7 +308,7 @@ public void testEmptyFile() throws Exception { File finalFile = ContainerChecksumTreeManager.getContainerChecksumFile(container); assertFalse(finalFile.exists()); ContainerMerkleTree tree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); assertTrue(finalFile.exists()); // Truncate the file to zero length. @@ -319,7 +324,7 @@ public void testEmptyFile() throws Exception { // The manager's read/modify/write cycle should account for the empty file and overwrite it with a valid entry. // No exception should be thrown. - checksumManager.writeContainerDataTree(container, tree.toProto()); + checksumManager.writeContainerDataTree(container, tree); ContainerProtos.ContainerChecksumInfo info = readChecksumFile(container); assertTreesSortedAndMatch(tree.toProto(), info.getContainerMerkleTree()); assertEquals(CONTAINER_ID, info.getContainerID()); @@ -329,15 +334,15 @@ public void testEmptyFile() throws Exception { public void testContainerWithNoDiff() throws Exception { ContainerMerkleTree ourMerkleTree = buildTestTree(config); ContainerMerkleTree peerMerkleTree = buildTestTree(config); - checksumManager.writeContainerDataTree(container, ourMerkleTree.toProto()); + checksumManager.writeContainerDataTree(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); + ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertFalse(diff.needsRepair()); - assertTrue(checksumManager.getMetrics().getNoRepairContainerDiffs() > 0); - assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); + assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); + assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } /** @@ -349,24 +354,24 @@ public void testContainerWithNoDiff() throws Exception { public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws Exception { ContainerMerkleTree peerMerkleTree = buildTestTree(config); - Pair buildResult = + Pair buildResult = buildTestTreeWithMismatches(peerMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); - ContainerChecksumTreeManager.ContainerDiff expectedDiff = buildResult.getRight(); + ContainerDiffReport expectedDiff = buildResult.getRight(); ContainerProtos.ContainerMerkleTree ourMerkleTree = buildResult.getLeft(); - checksumManager.writeContainerDataTree(container, ourMerkleTree); + writeContainerDataTreeProto(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); + ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); assertTrue(metrics.getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); - assertTrue(checksumManager.getMetrics().getRepairContainerDiffs() > 0); - assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); + assertEquals(checksumManager.getMetrics().getRepairContainerDiffs(), 1); + assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } /** * Test if a peer which has missing blocks and chunks affects our container diff. If the peer tree has mismatches - * with respect to our merkle tree then we need to should not include that mismatch in the container diff. + * with respect to our merkle tree then we should not include that mismatch in the container diff. * The ContainerDiff generated by the peer when it reconciles with our merkle tree will capture that mismatch. */ @ParameterizedTest(name = "Missing blocks: {0}, Missing chunks: {1}, Corrupt chunks: {2}") @@ -374,23 +379,23 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingChunk, int numCorruptChunk) throws Exception { ContainerMerkleTree ourMerkleTree = buildTestTree(config); - Pair buildResult = + Pair buildResult = buildTestTreeWithMismatches(ourMerkleTree, numMissingBlock, numMissingChunk, numCorruptChunk); ContainerProtos.ContainerMerkleTree peerMerkleTree = buildResult.getLeft(); - checksumManager.writeContainerDataTree(container, ourMerkleTree.toProto()); + checksumManager.writeContainerDataTree(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree).build(); - ContainerChecksumTreeManager.ContainerDiff diff = checksumManager.diff(container, peerChecksumInfo); + ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); assertFalse(diff.needsRepair()); - assertTrue(checksumManager.getMetrics().getNoRepairContainerDiffs() > 0); - assertTrue(checksumManager.getMetrics().getMerkleTreeDiffSuccess() > 0); + assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); + assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } @Test public void testFailureContainerMerkleTreeMetric() { assertThrows(Exception.class, () -> checksumManager.diff(container, null)); - assertTrue(checksumManager.getMetrics().getMerkleTreeDiffFailure() > 0); + assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1); } /** @@ -409,8 +414,8 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { .addAllDeletedBlocks(deletedBlockList); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); - checksumManager.writeContainerDataTree(container, ourMerkleTree); - ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + writeContainerDataTreeProto(container, ourMerkleTree); + ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. @@ -444,10 +449,10 @@ void testDeletedBlocksInOurContainerOnly() throws Exception { .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); - checksumManager.writeContainerDataTree(container, ourMerkleTree); + writeContainerDataTreeProto(container, ourMerkleTree); checksumManager.markBlocksAsDeleted(container, deletedBlockList); - ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in our merkle tree. @@ -472,9 +477,9 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { .addAllDeletedBlocks(deletedBlockList); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); - checksumManager.writeContainerDataTree(container, ourMerkleTree); + writeContainerDataTreeProto(container, ourMerkleTree); - ContainerChecksumTreeManager.ContainerDiff containerDiff = checksumManager.diff(container, peerChecksumInfo); + ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 01430b3a9169..64af0148a872 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -284,7 +284,7 @@ public static void writeChecksumFileToDatanodes(long containerID, ContainerMerkl (KeyValueContainer) dn.getDatanodeStateMachine().getContainer().getController() .getContainer(containerID); keyValueHandler.getChecksumManager().writeContainerDataTree( - keyValueContainer.getContainerData(), tree.toProto()); + keyValueContainer.getContainerData(), tree); } } } From a5ff0978774fd858743f389e7acc80143069200a Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 18 Nov 2024 16:45:39 -0800 Subject: [PATCH 10/11] Address review comments. --- .../hdds/scm/exceptions/SCMException.java | 3 +- .../ContainerChecksumTreeManager.java | 40 +++++++----- .../checksum/ContainerDiffReport.java | 8 +++ .../checksum/ContainerMerkleTreeMetrics.java | 12 +--- .../TestContainerChecksumTreeManager.java | 62 ++++++++++++++----- .../src/main/proto/ScmServerProtocol.proto | 1 + 6 files changed, 82 insertions(+), 44 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index 02f8c6615acc..5862ab79d327 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -146,6 +146,7 @@ public enum ResultCodes { CA_ROTATION_IN_POST_PROGRESS, CONTAINER_ALREADY_CLOSED, CONTAINER_ALREADY_CLOSING, - UNSUPPORTED_OPERATION + UNSUPPORTED_OPERATION, + CONTAINER_CHECKSUM_ERROR } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 39d265a4ab2d..bdcb40134695 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -90,7 +91,7 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null; try { // If the file is not present, we will create the data for the first time. This happens under a write lock. - checksumInfoBuilder = read(data) + checksumInfoBuilder = readBuilder(data) .orElse(ContainerProtos.ContainerChecksumInfo.newBuilder()); } catch (IOException ex) { LOG.error("Failed to read container checksum tree file for container {}. Overwriting it with a new instance.", @@ -121,7 +122,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null; try { // If the file is not present, we will create the data for the first time. This happens under a write lock. - checksumInfoBuilder = read(data) + checksumInfoBuilder = readBuilder(data) .orElse(ContainerProtos.ContainerChecksumInfo.newBuilder()); } catch (IOException ex) { LOG.error("Failed to read container checksum tree file for container {}. Overwriting it with a new instance.", @@ -147,39 +148,39 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } public ContainerDiffReport diff(KeyValueContainerData thisContainer, - ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws Exception { + ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws SCMException { ContainerDiffReport report = new ContainerDiffReport(); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { Preconditions.assertNotNull(thisContainer, "Container data is null"); Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); - Optional thisContainerChecksumInfoBuilder = - read(thisContainer); - if (!thisContainerChecksumInfoBuilder.isPresent()) { - throw new IOException("The container #" + thisContainer.getContainerID() + - " doesn't have container checksum"); + Optional thisContainerChecksumInfo = read(thisContainer); + if (!thisContainerChecksumInfo.isPresent()) { + throw new SCMException("The container #" + thisContainer.getContainerID() + + " doesn't have container checksum", SCMException.ResultCodes.IO_EXCEPTION); } if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { - throw new IOException("Container Id does not match for container " + thisContainer.getContainerID()); + throw new SCMException("Container Id does not match for container " + thisContainer.getContainerID(), + SCMException.ResultCodes.CONTAINER_CHECKSUM_ERROR); } - ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfoBuilder.get().build(); + ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfo.get(); compareContainerMerkleTree(thisChecksumInfo, peerChecksumInfo, report); }); - } catch (Exception ex) { + } catch (IOException ex) { metrics.incrementMerkleTreeDiffFailures(); - throw new Exception("Container Diff failed for container #" + thisContainer.getContainerID(), ex); + throw new SCMException("Container Diff failed for container #" + thisContainer.getContainerID(), ex, + SCMException.ResultCodes.CONTAINER_CHECKSUM_ERROR); } // Update Container Diff metrics based on the diff report. if (report.needsRepair()) { metrics.incrementRepairContainerDiffs(); - } else { - metrics.incrementNoRepairContainerDiffs(); + return report; } - metrics.incrementMerkleTreeDiffSuccesses(); + metrics.incrementNoRepairContainerDiffs(); return report; } @@ -313,7 +314,7 @@ private Lock getLock(long containerID) { * Callers are not required to hold a lock while calling this since writes are done to a tmp file and atomically * swapped into place. */ - private Optional read(ContainerData data) throws IOException { + private Optional read(ContainerData data) throws IOException { long containerID = data.getContainerID(); File checksumFile = getContainerChecksumFile(data); try { @@ -323,7 +324,7 @@ private Optional read(ContainerDa } try (FileInputStream inStream = new FileInputStream(checksumFile)) { return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), - () -> Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream).toBuilder())); + () -> Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream))); } } catch (IOException ex) { metrics.incrementMerkleTreeReadFailures(); @@ -332,6 +333,11 @@ private Optional read(ContainerDa } } + private Optional readBuilder(ContainerData data) throws IOException { + Optional checksumInfo = read(data); + return checksumInfo.map(ContainerProtos.ContainerChecksumInfo::toBuilder); + } + /** * Callers should have acquired the write lock before calling this method. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java index 6d1a07ed8e40..e24b52da49a5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java @@ -72,4 +72,12 @@ public Map> getCorruptChunks() { public boolean needsRepair() { return !missingBlocks.isEmpty() || !missingChunks.isEmpty() || !corruptChunks.isEmpty(); } + + @Override + public String toString() { + return "ContainerDiffReport:" + + " MissingBlocks= " + missingBlocks.size() + + ", MissingChunks= " + missingChunks.values().stream().mapToInt(List::size).sum() + + ", CorruptChunks= " + corruptChunks.values().stream().mapToInt(List::size).sum(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index 4e89fa027b4f..c3285bbf9ab0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -54,9 +54,6 @@ public static void unregister() { @Metric(about = "Number of Merkle tree diff failure") private MutableCounterLong numMerkleTreeDiffFailure; - @Metric(about = "Number of Merkle tree diff success") - private MutableCounterLong numMerkleTreeDiffSuccess; - @Metric(about = "Number of container diff that doesn't require repair") private MutableCounterLong numNoRepairContainerDiff; @@ -87,13 +84,10 @@ public void incrementMerkleTreeDiffFailures() { this.numMerkleTreeDiffFailure.incr(); } - public void incrementMerkleTreeDiffSuccesses() { - this.numMerkleTreeDiffSuccess.incr(); - } - public void incrementNoRepairContainerDiffs() { this.numNoRepairContainerDiff.incr(); } + public void incrementRepairContainerDiffs() { this.numRepairContainerDiff.incr(); } @@ -122,10 +116,6 @@ public long getRepairContainerDiffs() { return this.numRepairContainerDiff.value(); } - public long getMerkleTreeDiffSuccess() { - return this.numMerkleTreeDiffSuccess.value(); - } - public long getMerkleTreeDiffFailure() { return this.numMerkleTreeDiffFailure.value(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b5a5ced2385a..d425d689adc6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -342,7 +343,6 @@ public void testContainerWithNoDiff() throws Exception { assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); - assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } /** @@ -366,7 +366,6 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC assertTrue(metrics.getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); assertEquals(checksumManager.getMetrics().getRepairContainerDiffs(), 1); - assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } /** @@ -389,12 +388,12 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); - assertEquals(checksumManager.getMetrics().getMerkleTreeDiffSuccess(), 1); } @Test public void testFailureContainerMerkleTreeMetric() { - assertThrows(Exception.class, () -> checksumManager.diff(container, null)); + ContainerProtos.ContainerChecksumInfo peerChecksum = ContainerProtos.ContainerChecksumInfo.newBuilder().build(); + assertThrows(SCMException.class, () -> checksumManager.diff(container, peerChecksum)); assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1); } @@ -409,11 +408,10 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 3, 0, 0).getLeft(); List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); // Mark all the blocks as deleted in peer merkle tree - ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo - .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID) - .addAllDeletedBlocks(deletedBlockList); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(peerMerkleTree.toProto()).setContainerID(CONTAINER_ID) + .addAllDeletedBlocks(deletedBlockList).build(); - ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); writeContainerDataTreeProto(container, ourMerkleTree); ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); @@ -445,10 +443,9 @@ void testDeletedBlocksInOurContainerOnly() throws Exception { // Introduce block corruption in our merkle tree. ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 0, 3, 3).getLeft(); List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); - ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo - .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(peerMerkleTree.toProto()).setContainerID(CONTAINER_ID).build(); - ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); writeContainerDataTreeProto(container, ourMerkleTree); checksumManager.markBlocksAsDeleted(container, deletedBlockList); @@ -472,11 +469,10 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { // Introduce block corruption in our merkle tree. ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(peerMerkleTree, 0, 3, 3).getLeft(); List deletedBlockList = Arrays.asList(1L, 2L, 3L, 4L, 5L); - ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo - .newBuilder().setContainerMerkleTree(ourMerkleTree).setContainerID(CONTAINER_ID) - .addAllDeletedBlocks(deletedBlockList); + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(peerMerkleTree.toProto()).setContainerID(CONTAINER_ID) + .addAllDeletedBlocks(deletedBlockList).build(); - ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); writeContainerDataTreeProto(container, ourMerkleTree); ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); @@ -488,6 +484,42 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { assertTrue(containerDiff.getMissingChunks().isEmpty()); } + @Test + void testContainerDiffWithBlockDeletionInPeer() throws Exception { + // Setup deleted blocks only in the peer container checksum + ContainerMerkleTree peerMerkleTree = buildTestTree(config, 10); + // Create only 5 blocks + ContainerMerkleTree dummy = buildTestTree(config, 5); + // Introduce block corruption in our merkle tree. + ContainerProtos.ContainerMerkleTree ourMerkleTree = buildTestTreeWithMismatches(dummy, 3, 3, 3).getLeft(); + List deletedBlockList = Arrays.asList(6L, 7L, 8L, 9L, 10L); + ContainerProtos.ContainerChecksumInfo.Builder peerChecksumInfoBuilder = ContainerProtos.ContainerChecksumInfo + .newBuilder().setContainerMerkleTree(peerMerkleTree.toProto()).setContainerID(CONTAINER_ID) + .addAllDeletedBlocks(deletedBlockList); + writeContainerDataTreeProto(container, ourMerkleTree); + + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); + + ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); + // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted + // in peer merkle tree. + assertFalse(containerDiff.getMissingBlocks().isEmpty()); + // Missing block does not contain the deleted blocks 6L to 10L + assertFalse(containerDiff.getMissingBlocks().stream().anyMatch(any -> + deletedBlockList.contains(any.getBlockID()))); + assertFalse(containerDiff.getMissingBlocks().isEmpty()); + assertFalse(containerDiff.getMissingChunks().isEmpty()); + + // Clear deleted blocks to add them in missing blocks. + peerChecksumInfo = peerChecksumInfoBuilder.clearDeletedBlocks().build(); + containerDiff = checksumManager.diff(container, peerChecksumInfo); + + assertFalse(containerDiff.getMissingBlocks().isEmpty()); + // Missing block does not contain the deleted blocks 6L to 10L + assertTrue(containerDiff.getMissingBlocks().stream().anyMatch(any -> + deletedBlockList.contains(any.getBlockID()))); + } + @Test public void testChecksumTreeFilePath() { assertEquals(checksumFile.getAbsolutePath(), diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index fc24d2562f9c..c4c6299d36d4 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -143,6 +143,7 @@ enum Status { CONTAINER_ALREADY_CLOSED = 45; CONTAINER_ALREADY_CLOSING = 46; UNSUPPORTED_OPERATION = 47; + CONTAINER_CHECKSUM_ERROR = 48; } /** From 5dba34cc9e2a2f0fe048960248a691989e4030d6 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 20 Nov 2024 12:12:19 -0800 Subject: [PATCH 11/11] Update exception and other review comments. --- .../hdds/scm/exceptions/SCMException.java | 3 +-- .../ContainerChecksumTreeManager.java | 19 ++++++++++--------- .../checksum/ContainerDiffReport.java | 9 ++++++--- .../ContainerMerkleTreeTestUtils.java | 2 -- .../TestContainerChecksumTreeManager.java | 4 ++-- .../main/proto/DatanodeClientProtocol.proto | 1 + .../src/main/proto/ScmServerProtocol.proto | 1 - 7 files changed, 20 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java index 5862ab79d327..02f8c6615acc 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/exceptions/SCMException.java @@ -146,7 +146,6 @@ public enum ResultCodes { CA_ROTATION_IN_POST_PROGRESS, CONTAINER_ALREADY_CLOSED, CONTAINER_ALREADY_CLOSING, - UNSUPPORTED_OPERATION, - CONTAINER_CHECKSUM_ERROR + UNSUPPORTED_OPERATION } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index bdcb40134695..2c10313c2fc9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -19,7 +19,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -148,7 +148,8 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } public ContainerDiffReport diff(KeyValueContainerData thisContainer, - ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws SCMException { + ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws + StorageContainerException { ContainerDiffReport report = new ContainerDiffReport(); try { @@ -157,13 +158,13 @@ public ContainerDiffReport diff(KeyValueContainerData thisContainer, Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); Optional thisContainerChecksumInfo = read(thisContainer); if (!thisContainerChecksumInfo.isPresent()) { - throw new SCMException("The container #" + thisContainer.getContainerID() + - " doesn't have container checksum", SCMException.ResultCodes.IO_EXCEPTION); + throw new StorageContainerException("The container #" + thisContainer.getContainerID() + + " doesn't have container checksum", ContainerProtos.Result.IO_EXCEPTION); } if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { - throw new SCMException("Container Id does not match for container " + thisContainer.getContainerID(), - SCMException.ResultCodes.CONTAINER_CHECKSUM_ERROR); + throw new StorageContainerException("Container Id does not match for container " + + thisContainer.getContainerID(), ContainerProtos.Result.CONTAINER_ID_MISMATCH); } ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfo.get(); @@ -171,8 +172,8 @@ public ContainerDiffReport diff(KeyValueContainerData thisContainer, }); } catch (IOException ex) { metrics.incrementMerkleTreeDiffFailures(); - throw new SCMException("Container Diff failed for container #" + thisContainer.getContainerID(), ex, - SCMException.ResultCodes.CONTAINER_CHECKSUM_ERROR); + throw new StorageContainerException("Container Diff failed for container #" + thisContainer.getContainerID(), ex, + ContainerProtos.Result.IO_EXCEPTION); } // Update Container Diff metrics based on the diff report. @@ -212,7 +213,7 @@ private void compareContainerMerkleTree(ContainerProtos.ContainerChecksumInfo th // block and the peer's BG service hasn't run yet. We can ignore comparing them. // 3) If the block is only deleted in peer merkle tree, we can't reconcile for this block. It might be // deleted by peer's BG service. We can ignore comparing them. - // TODO: Handle missed block deletions from the deleted block ids. + // TODO: HDDS-11765 - Handle missed block deletions from the deleted block ids. if (!thisDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && !peerDeletedBlockSet.contains(thisBlockMerkleTree.getBlockID()) && thisBlockMerkleTree.getBlockChecksum() != peerBlockMerkleTree.getBlockChecksum()) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java index e24b52da49a5..c7a307ca7ceb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java @@ -73,11 +73,14 @@ public boolean needsRepair() { return !missingBlocks.isEmpty() || !missingChunks.isEmpty() || !corruptChunks.isEmpty(); } + // TODO: HDDS-11763 - Add metrics for missing blocks, missing chunks, corrupt chunks. @Override public String toString() { return "ContainerDiffReport:" + - " MissingBlocks= " + missingBlocks.size() + - ", MissingChunks= " + missingChunks.values().stream().mapToInt(List::size).sum() + - ", CorruptChunks= " + corruptChunks.values().stream().mapToInt(List::size).sum(); + " MissingBlocks= " + missingBlocks.size() + " blocks" + + ", MissingChunks= " + missingChunks.values().stream().mapToInt(List::size).sum() + + " chunks from " + missingChunks.size() + " blocks" + + ", CorruptChunks= " + corruptChunks.values().stream().mapToInt(List::size).sum() + + " chunks from " + corruptChunks.size() + " blocks"; } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index c74423fb19c2..db2a8c319b67 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -351,8 +351,6 @@ public static void writeContainerDataTreeProto(ContainerData data, ContainerProt try (FileOutputStream outputStream = new FileOutputStream(checksumFile)) { checksumInfo.writeTo(outputStream); } catch (IOException ex) { - // If the move failed and left behind the tmp file, the tmp file will be overwritten on the next successful write. - // Nothing reads directly from the tmp file. throw new IOException("Error occurred when writing container merkle tree for containerID " + data.getContainerID(), ex); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index d425d689adc6..c143290f7d68 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.scm.exceptions.SCMException; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -393,7 +393,7 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC @Test public void testFailureContainerMerkleTreeMetric() { ContainerProtos.ContainerChecksumInfo peerChecksum = ContainerProtos.ContainerChecksumInfo.newBuilder().build(); - assertThrows(SCMException.class, () -> checksumManager.diff(container, peerChecksum)); + assertThrows(StorageContainerException.class, () -> checksumManager.diff(container, peerChecksum)); assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1); } diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 6cd2ad3c578a..e751adc6e410 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -158,6 +158,7 @@ enum Result { EXPORT_CONTAINER_METADATA_FAILED = 45; IMPORT_CONTAINER_METADATA_FAILED = 46; BLOCK_ALREADY_FINALIZED = 47; + CONTAINER_ID_MISMATCH = 48; } /** diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto index c4c6299d36d4..fc24d2562f9c 100644 --- a/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto +++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerProtocol.proto @@ -143,7 +143,6 @@ enum Status { CONTAINER_ALREADY_CLOSED = 45; CONTAINER_ALREADY_CLOSING = 46; UNSUPPORTED_OPERATION = 47; - CONTAINER_CHECKSUM_ERROR = 48; } /**