diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index 803ab19ade84..f54db66db753 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -54,3 +54,9 @@ Test ozone debug read-replicas Test ozone debug version ${output} = Execute ozone debug version Execute echo '${output}' | jq -r '.' # validate JSON + +Test ozone debug replicas verify metadata when block exists + ${json} = Execute replicas verify metadata CLI tool + Should Be Equal ${json}[status] BLOCK_EXISTS + Should Be True ${json}[pass] + diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot index 9bb77d00d6d3..f6d07f38a305 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot @@ -99,3 +99,8 @@ Verify Stale Replica Should Be Equal As Integers ${json}[blocks][${block}][length] ${filesize} END END + +Execute replicas verify metadata CLI tool + ${output} = Execute ozone debug replicas verify --block-existence --output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE} + ${json} = Evaluate json.loads('''${output}''') json + [Return] ${json} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/replicas/TestBlockExistenceVerifier.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/replicas/TestBlockExistenceVerifier.java new file mode 100644 index 000000000000..5468c56f94a6 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/debug/replicas/TestBlockExistenceVerifier.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.UUID; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestMethodOrder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class checks block existence using GetBlock calls to the Datanodes. + */ +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +public class TestBlockExistenceVerifier { + + private static final Logger LOG = LoggerFactory.getLogger(TestBlockExistenceVerifier.class); + private static MiniOzoneCluster cluster; + private static OzoneClient client; + private static OzoneConfiguration conf; + private static BlockExistenceVerifier blockExistenceVerifier; + private static final String VOLUME_NAME = UUID.randomUUID().toString(); + private static final String BUCKET_NAME = UUID.randomUUID().toString(); + private static final String KEY_NAME = UUID.randomUUID().toString(); + private static final StringWriter OUT = new StringWriter(); + private static PrintWriter printWriter; + + @BeforeAll + public static void setUp() throws Exception { + conf = new OzoneConfiguration(); + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .build(); + cluster.waitForClusterToBeReady(); + client = cluster.newClient(); + + writeKey(KEY_NAME); + + printWriter = new PrintWriter(OUT); + blockExistenceVerifier = new BlockExistenceVerifier(client, LOG, printWriter, conf); + } + + @AfterEach + public void cleanUp() { + OUT.getBuffer().setLength(0); + } + + @AfterAll + public static void tearDown() { + IOUtils.closeQuietly(client, cluster); + } + + @Order(1) + @Test + void testBlockExists() throws IOException { + OzoneKeyDetails keyDetails = client.getProxy().getKeyDetails(VOLUME_NAME, BUCKET_NAME, KEY_NAME); + + blockExistenceVerifier.verifyKey(keyDetails); + String cliOutput = OUT.toString(); + + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonNode = objectMapper.readTree(cliOutput); + + assertThat(jsonNode.get("status").asText()).isEqualTo("BLOCK_EXISTS"); + assertThat(jsonNode.get("pass").asBoolean()).isTrue(); + } + + @Order(2) + @Test + void testMissingReplicas() throws IOException { + OzoneKeyDetails keyDetails = client.getProxy().getKeyDetails(VOLUME_NAME, BUCKET_NAME, KEY_NAME); + + List keyLocations = lookupKey(cluster); + assertThat(keyLocations).isNotEmpty(); + + OmKeyLocationInfo keyLocation = keyLocations.get(0); + BlockID blockID = keyLocation.getBlockID(); + // Iterate over Datanodes + for (HddsDatanodeService datanode : cluster.getHddsDatanodes()) { + ContainerSet dnContainerSet = datanode.getDatanodeStateMachine().getContainer().getContainerSet(); + + // Retrieve the container for the block + KeyValueContainer container = (KeyValueContainer) dnContainerSet.getContainer(blockID.getContainerID()); + KeyValueContainerData containerData = container.getContainerData(); + + try (DBHandle db = BlockUtils.getDB(containerData, conf)) { + Table blockDataTable = db.getStore().getBlockDataTable(); + + String blockKey = containerData.getBlockKey(blockID.getLocalID()); + + // Ensure the block exists before deletion + assertNotNull(blockDataTable.get(blockKey)); + + // Delete the block from RocksDB + blockDataTable.delete(blockKey); + + // Verify deletion + assertNull(blockDataTable.get(blockKey)); + } + } + + blockExistenceVerifier.verifyKey(keyDetails); + String cliOutput = OUT.toString(); + + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonNode = objectMapper.readTree(cliOutput); + + assertThat(jsonNode.get("status").asText()).isEqualTo("MISSING_REPLICAS"); + assertThat(jsonNode.get("pass").asBoolean()).isFalse(); + } + + private static List lookupKey(MiniOzoneCluster ozoneCluster) + throws IOException { + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(VOLUME_NAME) + .setBucketName(BUCKET_NAME) + .setKeyName(KEY_NAME) + .build(); + OmKeyInfo keyInfo = ozoneCluster.getOzoneManager().lookupKey(keyArgs); + + OmKeyLocationInfoGroup locations = keyInfo.getLatestVersionLocations(); + assertNotNull(locations); + return locations.getLocationList(); + } + + private static void writeKey(String keyName) throws IOException { + try (OzoneClient client = OzoneClientFactory.getRpcClient(conf)) { + TestDataUtil.createVolumeAndBucket(client, VOLUME_NAME, BUCKET_NAME); + TestDataUtil.createKey( + client.getObjectStore().getVolume(VOLUME_NAME).getBucket(BUCKET_NAME), + keyName, "test".getBytes(StandardCharsets.UTF_8)); + } + } + +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java new file mode 100644 index 000000000000..b984ef7e54a3 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; +import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.hdds.server.JsonUtils; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import org.slf4j.Logger; + +/** + * Checks block existence using GetBlock calls to the Datanodes. + */ +public class BlockExistenceVerifier implements ReplicaVerifier { + + private OzoneClient client; + private Logger log; + private PrintWriter printWriter; + private OzoneConfiguration conf; + + public BlockExistenceVerifier(OzoneClient client, Logger log, PrintWriter printWriter, OzoneConfiguration conf) { + this.client = client; + this.log = log; + this.printWriter = printWriter; + this.conf = conf; + } + + @Override + public void verifyKey(OzoneKeyDetails keyDetails) { + ObjectNode result = JsonUtils.createObjectNode(null); + + try (ContainerOperationClient containerOperationClient = new ContainerOperationClient(conf); + XceiverClientManager xceiverClientManager = containerOperationClient.getXceiverClientManager()) { + + OzoneManagerProtocol ozoneManagerClient = client.getObjectStore().getClientProxy().getOzoneManagerClient(); + OmKeyArgs keyArgs = new OmKeyArgs.Builder() + .setVolumeName(keyDetails.getVolumeName()) + .setBucketName(keyDetails.getBucketName()) + .setKeyName(keyDetails.getName()) + .build(); + + OmKeyInfo keyInfo = ozoneManagerClient.lookupKey(keyArgs); + List keyLocations = keyInfo.getLatestVersionLocations().getBlocksLatestVersionOnly(); + + if (keyLocations.isEmpty()) { + printJsonResult(keyDetails, "NO_BLOCKS", null, false, result); + return; + } + + String blockId = null; + boolean allReplicasHaveBlock = true; + for (OmKeyLocationInfo keyLocation : keyLocations) { + Pipeline keyPipeline = keyLocation.getPipeline(); + boolean isECKey = keyPipeline.getReplicationConfig().getReplicationType() == HddsProtos.ReplicationType.EC; + + Pipeline pipeline = isECKey ? keyPipeline : + Pipeline.newBuilder(keyPipeline).setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)).build(); + + XceiverClientSpi xceiverClient = xceiverClientManager.acquireClientForReadData(pipeline); + try { + Map responses = + ContainerProtocolCalls.getBlockFromAllNodes(xceiverClient, + keyLocation.getBlockID().getDatanodeBlockIDProtobuf(), keyLocation.getToken()); + + blockId = keyLocation.getBlockID().toString(); + int totalExpectedReplicas = responses.size(); + int availableReplicas = 0; + + for (Map.Entry entry : responses.entrySet()) { + if (entry.getValue() != null && entry.getValue().hasBlockData()) { + availableReplicas++; + } + } + + if (availableReplicas < totalExpectedReplicas || totalExpectedReplicas == 0) { + allReplicasHaveBlock = false; + } + + } finally { + xceiverClientManager.releaseClientForReadData(xceiverClient, false); + } + } + + if (allReplicasHaveBlock) { + printJsonResult(keyDetails, "BLOCK_EXISTS", blockId, true, result); + } else { + printJsonResult(keyDetails, "MISSING_REPLICAS", blockId, false, result); + } + + } catch (IOException | InterruptedException e) { + log.error("Error checking block existence for key {}: {}", keyDetails.getName(), e.getMessage()); + printJsonError(keyDetails, e.getMessage(), false, result); + } + } + + /** + * Helper method to print JSON results. + */ + private void printJsonResult(OzoneKeyDetails keyParts, String status, String blockId, + boolean pass, ObjectNode result) { + result.put("key", keyParts.getVolumeName() + "/" + keyParts.getBucketName() + "/" + keyParts.getName()); + result.put("blockID", blockId); + result.put("status", status); + result.put("pass", pass); + + printWriter.println(result); + } + + /** + * Helper method to print JSON error messages. + */ + private void printJsonError(OzoneKeyDetails keyParts, String errorMessage, boolean pass, ObjectNode result) { + result.put("key", keyParts.getVolumeName() + "/" + keyParts.getBucketName() + "/" + keyParts.getName()); + result.put("status", "ERROR"); + result.put("message", errorMessage); + result.put("pass", pass); + + printWriter.println(result); + } + +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java index 30443dbb8db2..a2d967946cec 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java @@ -64,6 +64,11 @@ static class Verification { defaultValue = "false") private boolean doExecuteChecksums; + @CommandLine.Option(names = "--block-existence", + description = "Check for block existence on datanodes.", + defaultValue = "false") + private boolean doExecuteBlockExistence; + } private List replicaVerifiers; @@ -75,6 +80,10 @@ protected void execute(OzoneClient client, OzoneAddress address) throws IOExcept replicaVerifiers.add(new Checksums(client, outputDir, LOG, getConf())); } + if (verification.doExecuteBlockExistence) { + replicaVerifiers.add(new BlockExistenceVerifier(client, LOG, out(), getConf())); + } + findCandidateKeys(client, address); }