Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo che
throw new IOException("Error occurred when writing container merkle tree for containerID "
+ data.getContainerID(), ex);
}
// Set in-memory data checksum.
data.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum());
}

/**
Expand All @@ -378,6 +380,21 @@ public ByteString getContainerChecksumInfo(KeyValueContainerData data) throws IO
}
}

public static Optional<ContainerProtos.ContainerChecksumInfo> readChecksumInfo(KeyValueContainerData data) {
File checksumFile = getContainerChecksumFile(data);
if (!checksumFile.exists()) {
LOG.error("Checksum file not found for container {}", data.getContainerID());
return Optional.empty();
}

try (FileInputStream inStream = new FileInputStream(checksumFile)) {
return Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream));
} catch (IOException ex) {
LOG.error("Error while reading the checksum file for container {}", data.getContainerID(), ex);
return Optional.empty();
}
}

@VisibleForTesting
public ContainerMerkleTreeMetrics getMetrics() {
return this.metrics;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator;
Expand Down Expand Up @@ -277,6 +280,19 @@ public static void parseKVContainerData(KeyValueContainerData kvContainerData,
}
}

private static void populateContainerDataChecksum(KeyValueContainerData kvContainerData) {
if (kvContainerData.isOpen()) {
return;
}

Optional<ContainerChecksumInfo> optionalContainerChecksumInfo = ContainerChecksumTreeManager
.readChecksumInfo(kvContainerData);
if (optionalContainerChecksumInfo.isPresent()) {
ContainerChecksumInfo containerChecksumInfo = optionalContainerChecksumInfo.get();
kvContainerData.setDataChecksum(containerChecksumInfo.getContainerMerkleTree().getDataChecksum());
}
}

private static void populateContainerMetadata(
KeyValueContainerData kvContainerData, DatanodeStore store,
boolean bCheckChunksFilePath)
Expand Down Expand Up @@ -356,6 +372,7 @@ private static void populateContainerMetadata(

// Load finalizeBlockLocalIds for container in memory.
populateContainerFinalizeBlock(kvContainerData, store);
populateContainerDataChecksum(kvContainerData);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ public String toString() {
",replicaIndex=" + replicaIndex :
"") +
", isEmpty=" + isEmpty +
", dataChecksum=" + dataChecksum +
'}';
}

Expand Down
4 changes: 2 additions & 2 deletions hadoop-ozone/dist/src/main/smoketest/admincli/container.robot
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ Reconcile closed container
# TODO When the scanner is computing checksums automatically, this test may need to be updated.
${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1
${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1
# 0 is the hex value of an empty checksum.
Should Be Equal As Strings 0 ${data_checksum}
# 0 is the hex value of an empty checksum. After container close the data checksum should not be 0.
Should Not Be Equal As Strings 0 ${data_checksum}
# When reconciliation finishes, replica checksums should be shown.
Execute ozone admin container reconcile ${container}
Wait until keyword succeeds 1min 5sec Reconciliation complete ${container}
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ public void testReplicasAreReportedForClosedContainerAfterRestart()
// Ensure 3 replicas are reported successfully as expected.
GenericTestUtils.waitFor(() ->
getContainerReplicas(newContainer).size() == 3, 200, 30000);
for (ContainerReplica replica : getContainerReplicas(newContainer)) {
assertNotEquals(0, replica.getDataChecksum());
}
}

/**
Expand Down Expand Up @@ -198,6 +201,10 @@ public void testCloseClosedContainer()
assertTrue(containerChecksumFileExists(hddsDatanode, container));
}

for (ContainerReplica replica : getContainerReplicas(container)) {
assertNotEquals(0, replica.getDataChecksum());
}

assertThrows(IOException.class,
() -> cluster.getStorageContainerLocationClient()
.closeContainer(container.getContainerID()),
Expand Down Expand Up @@ -269,6 +276,12 @@ public void testContainerChecksumForClosedContainer() throws Exception {
assertNotEquals(prevExpectedChecksumInfo1.getContainerID(), prevExpectedChecksumInfo2.getContainerID());
assertNotEquals(prevExpectedChecksumInfo1.getContainerMerkleTree().getDataChecksum(),
prevExpectedChecksumInfo2.getContainerMerkleTree().getDataChecksum());
for (ContainerReplica replica : getContainerReplicas(containerInfo1)) {
assertNotEquals(0, replica.getDataChecksum());
}
for (ContainerReplica replica : getContainerReplicas(containerInfo2)) {
assertNotEquals(0, replica.getDataChecksum());
}
}

private boolean checkContainerCloseInDatanode(HddsDatanodeService hddsDatanode,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch;
import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

Expand All @@ -32,6 +33,7 @@
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
Expand All @@ -40,6 +42,7 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
Expand Down Expand Up @@ -84,6 +87,7 @@ public static void init() throws Exception {
conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath());
// Disable the container scanner so it does not create merkle tree files that interfere with this test.
conf.getObject(ContainerScannerConfiguration.class).setEnabled(false);
conf.setBoolean("hdds.container.scrub.enabled", false);
cluster = MiniOzoneCluster.newBuilder(conf)
.setNumDatanodes(3)
.build();
Expand Down Expand Up @@ -247,6 +251,35 @@ public void testGetChecksumInfoSuccess() throws Exception {
}
}

@Test
public void testDataChecksumReportedAtSCM() throws Exception {
long containerID = writeDataAndGetContainer(true);
// Check non-zero checksum after container close
Set<ContainerReplica> containerReplicas = cluster.getStorageContainerManager().getContainerManager()
.getContainerReplicas(ContainerID.valueOf(containerID));
for (ContainerReplica containerReplica: containerReplicas) {
assertNotEquals(0, containerReplica.getDataChecksum());
}
cluster.getStorageContainerLocationClient().reconcileContainer(containerID);
Thread.sleep(10000);

// Check non-zero checksum after container reconciliation
containerReplicas = cluster.getStorageContainerManager().getContainerManager()
.getContainerReplicas(ContainerID.valueOf(containerID));
for (ContainerReplica containerReplica: containerReplicas) {
assertNotEquals(0, containerReplica.getDataChecksum());
}

// Check non-zero checksum after datanode restart
// Restarting all the nodes take more time in mini ozone cluster, so restarting only one node
cluster.restartHddsDatanode(0, true);
containerReplicas = cluster.getStorageContainerManager().getContainerManager()
.getContainerReplicas(ContainerID.valueOf(containerID));
for (ContainerReplica containerReplica: containerReplicas) {
assertNotEquals(0, containerReplica.getDataChecksum());
}
}

private long writeDataAndGetContainer(boolean close) throws Exception {
String volumeName = UUID.randomUUID().toString();
String bucketName = UUID.randomUUID().toString();
Expand Down