diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java index 90da0d676990..e3a575896347 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/protocol/ClientProtocol.java @@ -50,6 +50,7 @@ import org.apache.hadoop.ozone.om.helpers.ErrorInfo; import org.apache.hadoop.ozone.om.helpers.LeaseKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartUploadCompleteInfo; @@ -436,6 +437,17 @@ OzoneDataStreamOutput createStreamKey(String volumeName, String bucketName, OzoneInputStream getKey(String volumeName, String bucketName, String keyName) throws IOException; + /** + * Reads key info from an existing bucket. + * @param volumeName Name of the Volume + * @param bucketName Name of the Bucket + * @param keyName Name of the Key + * @param forceUpdateContainerCache if true force OM to update container cache location from SCM + * @return {@link OmKeyInfo} + * @throws IOException + */ + OmKeyInfo getKeyInfo(String volumeName, String bucketName, String keyName, + boolean forceUpdateContainerCache) throws IOException; /** * Deletes an existing key. diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index 1befbb3c9e53..aa4d3513e99e 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -1854,7 +1854,8 @@ private OmKeyInfo getS3PartKeyInfo( return keyInfoWithS3Context.getKeyInfo(); } - private OmKeyInfo getKeyInfo( + @Override + public OmKeyInfo getKeyInfo( String volumeName, String bucketName, String keyName, boolean forceUpdateContainerCache) throws IOException { Preconditions.checkNotNull(volumeName); diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index cd129bb07872..b875bfcafc8e 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -23,30 +23,31 @@ key="testfile" execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests.robot +#TODO HDDS-12890: Add acceptance robot tests for ozone debug replicas verify # get block locations for key -chunkinfo="${key}-blocks-${prefix}" -docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo" -host="$(jq -r '.KeyLocations[0][0]["Datanode-HostName"]' ${chunkinfo})" -container="${host%%.*}" - -# corrupt the first block of key on one of the datanodes -datafile="$(jq -r '.KeyLocations[0][0].Locations.files[0]' ${chunkinfo})" -docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}" - -execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/ozone-debug-corrupt-block.robot - -docker stop "${container}" - -wait_for_datanode "${container}" STALE 60 -execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/ozone-debug-stale-datanode.robot - -wait_for_datanode "${container}" DEAD 60 -execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-dead-datanode.robot - -docker start "${container}" - -wait_for_datanode "${container}" HEALTHY 60 +#chunkinfo="${key}-blocks-${prefix}" +#docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo" +#host="$(jq -r '.KeyLocations[0][0]["Datanode-HostName"]' ${chunkinfo})" +#container="${host%%.*}" +# +## corrupt the first block of key on one of the datanodes +#datafile="$(jq -r '.KeyLocations[0][0].Locations.files[0]' ${chunkinfo})" +#docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}" +# +#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/ozone-debug-corrupt-block.robot +# +#docker stop "${container}" +# +#wait_for_datanode "${container}" STALE 60 +#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/ozone-debug-stale-datanode.robot +# +#wait_for_datanode "${container}" DEAD 60 +#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-dead-datanode.robot +# +#docker start "${container}" +# +#wait_for_datanode "${container}" HEALTHY 60 -start_docker_env 9 -execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot -execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec6-3.robot \ No newline at end of file +#start_docker_env 9 +#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot +#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec6-3.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index 02de6794b237..97fe36d2c50e 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -36,13 +36,13 @@ Write keys Execute ozone sh key put o3://om/${VOLUME}/${BUCKET}/${TESTFILE} ${TEMP_DIR}/${TESTFILE} *** Test Cases *** -Test ozone debug read-replicas - ${directory} = Execute replicas verify checksums CLI tool - Set Test Variable ${DIR} ${directory} - - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 +Test ozone debug replicas verify checksums + ${output} = Execute ozone debug replicas verify --checksums o3://om/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} + ${json} = Evaluate json.loads('''${output}''') json + # 'keys' array should be empty if all keys and their replicas passed checksum verification + Should Be Empty ${json}[keys] + Should Be True ${json}[pass] ${True} Test ozone debug version ${output} = Execute ozone debug version diff --git a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs-obs.robot b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs-obs.robot index c221c59f99ae..f6491ceebf67 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs-obs.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozonefs/ozonefs-obs.robot @@ -23,7 +23,7 @@ Test Timeout 5 minutes *** Variables *** ${SCHEME} ofs -${volume} volume1 +${volume} obs-volume1 ${bucket} obs-bucket1 ${PREFIX} ozone diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDebugShell.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDebugShell.java index bc5ee8cc3080..eb5c87fabb04 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDebugShell.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneDebugShell.java @@ -87,6 +87,29 @@ void shutdown() { IOUtils.closeQuietly(client); } + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testReplicasVerifyCmd(boolean isEcKey) throws Exception { + final String volumeName = UUID.randomUUID().toString(); + final String bucketName = UUID.randomUUID().toString(); + final String keyName = UUID.randomUUID().toString(); + + writeKey(volumeName, bucketName, keyName, isEcKey, BucketLayout.FILE_SYSTEM_OPTIMIZED); + + String bucketPath = Path.SEPARATOR + volumeName + Path.SEPARATOR + bucketName; + String fullKeyPath = bucketPath + Path.SEPARATOR + keyName; + + //TODO HDDS-12715: Create common integration test cluster for debug and repair tools + String[] args = new String[] { + getSetConfStringFromConf(OMConfigKeys.OZONE_OM_ADDRESS_KEY), + getSetConfStringFromConf(ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY), + "replicas", "verify", "--checksums", "--block-existence", fullKeyPath, "--output-dir", "/"//, "--all-results" + }; + + int exitCode = ozoneDebugShell.execute(args); + assertEquals(0, exitCode); + } + @ParameterizedTest @ValueSource(booleans = {true, false}) public void testChunkInfoCmdBeforeAfterCloseContainer(boolean isEcKey) throws Exception { diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java index 17f62e77ceee..739babce1d06 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/ClientProtocolStub.java @@ -41,6 +41,7 @@ import org.apache.hadoop.ozone.om.helpers.ErrorInfo; import org.apache.hadoop.ozone.om.helpers.LeaseKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo; import org.apache.hadoop.ozone.om.helpers.OmMultipartUploadCompleteInfo; @@ -254,6 +255,13 @@ public OzoneInputStream getKey(String volumeName, String bucketName, return getBucket(volumeName, bucketName).readKey(keyName); } + @Override + public OmKeyInfo getKeyInfo(String volumeName, String bucketName, String keyName, + boolean forceUpdateContainerCache) throws IOException { + return objectStoreStub.getClientProxy().getKeyInfo( + volumeName, bucketName, keyName, forceUpdateContainerCache); + } + private OzoneBucket getBucket(String volumeName, String bucketName) throws IOException { return objectStoreStub.getVolume(volumeName).getBucket(bucketName); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java new file mode 100644 index 000000000000..9d910700ea40 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockExistenceVerifier.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; + +import java.io.IOException; +import java.util.Collections; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; +import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; + +/** + * Verifies block existence by making getBlock calls to the datanode. + */ +public class BlockExistenceVerifier implements ReplicaVerifier { + private final ContainerOperationClient containerClient; + private final XceiverClientManager xceiverClientManager; + private static final String CHECK_TYPE = "blockExistence"; + + @Override + public String getType() { + return CHECK_TYPE; + } + + public BlockExistenceVerifier(OzoneConfiguration conf) throws IOException { + this.containerClient = new ContainerOperationClient(conf); + this.xceiverClientManager = containerClient.getXceiverClientManager(); + } + + @Override + public BlockVerificationResult verifyBlock(DatanodeDetails datanode, OmKeyLocationInfo keyLocation, + int replicaIndex) { + XceiverClientSpi client = null; + try { + Pipeline pipeline = Pipeline.newBuilder(keyLocation.getPipeline()) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setNodes(Collections.singletonList(datanode)) + .setReplicaIndexes(Collections.singletonMap(datanode, replicaIndex)) + .build(); + + client = xceiverClientManager.acquireClientForReadData(pipeline); + ContainerProtos.GetBlockResponseProto response = ContainerProtocolCalls.getBlock( + client, + keyLocation.getBlockID(), + keyLocation.getToken(), + pipeline.getReplicaIndexes() + ); + + boolean hasBlock = response != null && response.hasBlockData(); + + if (hasBlock) { + return BlockVerificationResult.pass(); + } else { + return BlockVerificationResult.failCheck("Block does not exist on this replica"); + } + } catch (IOException e) { + return BlockVerificationResult.failIncomplete(e.getMessage()); + } finally { + xceiverClientManager.releaseClient(client, false); + } + } +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockVerificationResult.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockVerificationResult.java new file mode 100644 index 000000000000..c73635ba4755 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/BlockVerificationResult.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import java.util.Collections; +import java.util.List; + +/** + * Json structure for replicas to pass through each check and give output. + */ +public class BlockVerificationResult { + + private final boolean completed; + private final boolean pass; + private final List failures; + + public BlockVerificationResult(boolean completed, boolean pass, List failures) { + this.completed = completed; + this.pass = pass; + this.failures = failures; + } + + public static BlockVerificationResult pass() { + return new BlockVerificationResult(true, true, Collections.emptyList()); + } + + public static BlockVerificationResult failCheck(String message) { + return new BlockVerificationResult(true, false, Collections.singletonList(message)); + } + + public static BlockVerificationResult failIncomplete(String message) { + return new BlockVerificationResult(false, false, Collections.singletonList(message)); + } + + public boolean isCompleted() { + return completed; + } + + public boolean passed() { + return pass; + } + + public List getFailures() { + return failures; + } + +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ChecksumVerifier.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ChecksumVerifier.java new file mode 100644 index 000000000000..0f4729fbae27 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ChecksumVerifier.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl; +import org.apache.hadoop.ozone.common.OzoneChecksumException; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; + +/** + * Verifies the checksum of blocks by checking each replica associated + * with a given key. + */ +public class ChecksumVerifier implements ReplicaVerifier { + private final OzoneConfiguration conf; + private final ContainerOperationClient containerClient; + private final XceiverClientManager xceiverClientManager; + private static final String CHECK_TYPE = "checksum"; + + @Override + public String getType() { + return CHECK_TYPE; + } + + public ChecksumVerifier(OzoneConfiguration conf) throws IOException { + this.conf = conf; + this.containerClient = new ContainerOperationClient(conf); + this.xceiverClientManager = containerClient.getXceiverClientManager(); + } + + @Override + public BlockVerificationResult verifyBlock(DatanodeDetails datanode, OmKeyLocationInfo keyLocation, + int replicaIndex) { + Pipeline pipeline = Pipeline.newBuilder(keyLocation.getPipeline()) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setNodes(Collections.singletonList(datanode)) + .setReplicaIndexes(Collections.singletonMap(datanode, replicaIndex)) + .build(); + + try (InputStream is = new BlockInputStreamFactoryImpl().create( + keyLocation.getPipeline().getReplicationConfig(), + keyLocation, + pipeline, + keyLocation.getToken(), + xceiverClientManager, + null, + conf.getObject(OzoneClientConfig.class))) { + IOUtils.copyLarge(is, NullOutputStream.INSTANCE); + return BlockVerificationResult.pass(); + } catch (IOException e) { + Throwable cause = e.getCause() != null ? e.getCause() : e; + if (cause instanceof OzoneChecksumException) { + return BlockVerificationResult.failCheck(cause.getMessage()); + } else { + return BlockVerificationResult.failIncomplete(cause.getMessage()); + } + } + } +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java deleted file mode 100644 index f9d983ccef6d..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.debug.replicas; - -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import jakarta.annotation.Nonnull; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Map; -import org.apache.commons.io.IOUtils; -import org.apache.commons.io.output.NullOutputStream; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.server.JsonUtils; -import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.client.OzoneKeyDetails; -import org.apache.hadoop.ozone.client.io.OzoneInputStream; -import org.apache.hadoop.ozone.client.protocol.ClientProtocol; -import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; - -/** - * Class that downloads every replica for all the blocks associated with a - * given key. It also generates a manifest file with information about the - * downloaded replicas. - */ -public class Checksums implements ReplicaVerifier { - - private static final String JSON_PROPERTY_FILE_NAME = "filename"; - private static final String JSON_PROPERTY_FILE_SIZE = "datasize"; - private static final String JSON_PROPERTY_FILE_BLOCKS = "blocks"; - private static final String JSON_PROPERTY_BLOCK_INDEX = "blockIndex"; - private static final String JSON_PROPERTY_BLOCK_CONTAINERID = "containerId"; - private static final String JSON_PROPERTY_BLOCK_LOCALID = "localId"; - private static final String JSON_PROPERTY_BLOCK_LENGTH = "length"; - private static final String JSON_PROPERTY_BLOCK_OFFSET = "offset"; - private static final String JSON_PROPERTY_BLOCK_REPLICAS = "replicas"; - private static final String JSON_PROPERTY_REPLICA_HOSTNAME = "hostname"; - private static final String JSON_PROPERTY_REPLICA_UUID = "uuid"; - private static final String JSON_PROPERTY_REPLICA_EXCEPTION = "exception"; - - private String outputDir; - private OzoneClient client; - - public Checksums(OzoneClient client, String outputDir) { - this.client = client; - this.outputDir = outputDir; - } - - private void downloadReplicasAndCreateManifest( - Map> replicas, - ArrayNode blocks) throws IOException { - int blockIndex = 0; - - for (Map.Entry> - block : replicas.entrySet()) { - ObjectNode blockJson = JsonUtils.createObjectNode(null); - ArrayNode replicasJson = JsonUtils.createArrayNode(); - - blockIndex += 1; - OmKeyLocationInfo locationInfo = block.getKey(); - blockJson.put(JSON_PROPERTY_BLOCK_INDEX, blockIndex); - blockJson.put(JSON_PROPERTY_BLOCK_CONTAINERID, locationInfo.getContainerID()); - blockJson.put(JSON_PROPERTY_BLOCK_LOCALID, locationInfo.getLocalID()); - blockJson.put(JSON_PROPERTY_BLOCK_LENGTH, locationInfo.getLength()); - blockJson.put(JSON_PROPERTY_BLOCK_OFFSET, locationInfo.getOffset()); - - for (Map.Entry - replica : block.getValue().entrySet()) { - DatanodeDetails datanode = replica.getKey(); - - ObjectNode replicaJson = JsonUtils.createObjectNode(null); - - replicaJson.put(JSON_PROPERTY_REPLICA_HOSTNAME, datanode.getHostName()); - replicaJson.put(JSON_PROPERTY_REPLICA_UUID, datanode.getUuidString()); - - try (InputStream is = replica.getValue()) { - IOUtils.copyLarge(is, NullOutputStream.INSTANCE); - } catch (IOException e) { - replicaJson.put(JSON_PROPERTY_REPLICA_EXCEPTION, e.getMessage()); - } - replicasJson.add(replicaJson); - } - blockJson.set(JSON_PROPERTY_BLOCK_REPLICAS, replicasJson); - blocks.add(blockJson); - } - } - - @Nonnull - private File createDirectory(String volumeName, String bucketName, - String keyName) throws IOException { - String fileSuffix - = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); - String directoryName = volumeName + "_" + bucketName + "_" + keyName + - "_" + fileSuffix; - System.out.println("Creating directory : " + directoryName); - File dir = new File(outputDir, directoryName); - if (!dir.exists()) { - if (dir.mkdirs()) { - System.out.println("Successfully created!"); - } else { - throw new IOException(String.format( - "Failed to create directory %s.", dir)); - } - } - return dir; - } - - @Override - public void verifyKey(OzoneKeyDetails keyDetails) { - String volumeName = keyDetails.getVolumeName(); - String bucketName = keyDetails.getBucketName(); - String keyName = keyDetails.getName(); - System.out.println("Processing key : " + volumeName + "/" + bucketName + "/" + keyName); - try { - ClientProtocol checksumClient = client.getObjectStore().getClientProxy(); - - // Multilevel keys will have a '/' in their names. This interferes with - // directory and file creation process. Flatten the keys to fix this. - String sanitizedKeyName = keyName.replace("/", "_"); - - File dir = createDirectory(volumeName, bucketName, sanitizedKeyName); - OzoneKeyDetails keyInfoDetails = checksumClient.getKeyDetails(volumeName, bucketName, keyName); - Map> replicas = - checksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName); - - ObjectNode result = JsonUtils.createObjectNode(null); - result.put(JSON_PROPERTY_FILE_NAME, volumeName + "/" + bucketName + "/" + keyName); - result.put(JSON_PROPERTY_FILE_SIZE, keyInfoDetails.getDataSize()); - - ArrayNode blocks = JsonUtils.createArrayNode(); - downloadReplicasAndCreateManifest(replicas, blocks); - result.set(JSON_PROPERTY_FILE_BLOCKS, blocks); - - String prettyJson = JsonUtils.toJsonStringWithDefaultPrettyPrinter(result); - - String manifestFileName = sanitizedKeyName + "_manifest"; - File manifestFile = new File(dir, manifestFileName); - Files.write(manifestFile.toPath(), prettyJson.getBytes(StandardCharsets.UTF_8)); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java index cbb5f31c981b..af9bcdac4b83 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java @@ -17,12 +17,14 @@ package org.apache.hadoop.ozone.debug.replicas; -import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; /** - * Functional interface for implementing a key verifier. + * Functional interface for implementing a block verifier. */ -@FunctionalInterface public interface ReplicaVerifier { - void verifyKey(OzoneKeyDetails keyDetails); + BlockVerificationResult verifyBlock(DatanodeDetails datanode, OmKeyLocationInfo keyLocation, int replicaIndex); + + String getType(); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java index 2f1a30ed26d9..38ebee0e8ac0 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java @@ -17,18 +17,24 @@ package org.apache.hadoop.ozone.debug.replicas; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.cli.ScmOption; +import org.apache.hadoop.hdds.server.JsonUtils; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneKey; -import org.apache.hadoop.ozone.client.OzoneKeyDetails; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.shell.Handler; import org.apache.hadoop.ozone.shell.OzoneAddress; import org.apache.hadoop.ozone.shell.Shell; @@ -40,7 +46,7 @@ @CommandLine.Command( name = "verify", - description = "Run checks to verify data across replicas") + description = "Run checks to verify data across replicas. By default prints only the keys with failed checks.") public class ReplicasVerify extends Handler { @CommandLine.Mixin private ScmOption scmOption; @@ -54,6 +60,10 @@ public class ReplicasVerify extends Handler { required = true) private String outputDir; + @CommandLine.Option(names = {"--all-results"}, + description = "Print results for all passing and failing keys") + private boolean allResults; + @CommandLine.ArgGroup(exclusive = false, multiplicity = "1") private Verification verification; @@ -64,7 +74,11 @@ protected void execute(OzoneClient client, OzoneAddress address) throws IOExcept replicaVerifiers = new ArrayList<>(); if (verification.doExecuteChecksums) { - replicaVerifiers.add(new Checksums(client, outputDir)); + replicaVerifiers.add(new ChecksumVerifier(getConf())); + } + + if (verification.doExecuteBlockExistence) { + replicaVerifiers.add(new BlockExistenceVerifier(getConf())); } findCandidateKeys(client, address); @@ -80,42 +94,120 @@ void findCandidateKeys(OzoneClient ozoneClient, OzoneAddress address) throws IOE String volumeName = address.getVolumeName(); String bucketName = address.getBucketName(); String keyName = address.getKeyName(); + + ObjectNode root = JsonUtils.createObjectNode(null); + ArrayNode keysArray = root.putArray("keys"); + + AtomicBoolean allKeysPassed = new AtomicBoolean(true); + if (!keyName.isEmpty()) { - OzoneKeyDetails keyDetails = ozoneClient.getProxy().getKeyDetails(volumeName, bucketName, keyName); - processKey(keyDetails); + processKey(ozoneClient, volumeName, bucketName, keyName, keysArray, allKeysPassed); } else if (!bucketName.isEmpty()) { OzoneVolume volume = objectStore.getVolume(volumeName); OzoneBucket bucket = volume.getBucket(bucketName); - checkBucket(bucket); + checkBucket(ozoneClient, bucket, keysArray, allKeysPassed); } else if (!volumeName.isEmpty()) { OzoneVolume volume = objectStore.getVolume(volumeName); - checkVolume(volume); + checkVolume(ozoneClient, volume, keysArray, allKeysPassed); } else { for (Iterator it = objectStore.listVolumes(null); it.hasNext();) { - checkVolume(it.next()); + checkVolume(ozoneClient, it.next(), keysArray, allKeysPassed); } } + root.put("pass", allKeysPassed.get()); + System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter(root)); } - void checkVolume(OzoneVolume volume) throws IOException { + void checkVolume(OzoneClient ozoneClient, OzoneVolume volume, ArrayNode keysArray, AtomicBoolean allKeysPassed) + throws IOException { for (Iterator it = volume.listBuckets(null); it.hasNext();) { OzoneBucket bucket = it.next(); - checkBucket(bucket); + checkBucket(ozoneClient, bucket, keysArray, allKeysPassed); } } - void checkBucket(OzoneBucket bucket) throws IOException { + void checkBucket(OzoneClient ozoneClient, OzoneBucket bucket, ArrayNode keysArray, AtomicBoolean allKeysPassed) + throws IOException { for (Iterator it = bucket.listKeys(null); it.hasNext();) { OzoneKey key = it.next(); // TODO: Remove this check once HDDS-12094 is fixed if (!key.getName().endsWith("/")) { - processKey(bucket.getKey(key.getName())); + processKey(ozoneClient, key.getVolumeName(), key.getBucketName(), key.getName(), keysArray, allKeysPassed); } } } - void processKey(OzoneKeyDetails keyDetails) { - replicaVerifiers.forEach(verifier -> verifier.verifyKey(keyDetails)); + void processKey(OzoneClient ozoneClient, String volumeName, String bucketName, String keyName, + ArrayNode keysArray, AtomicBoolean allKeysPassed) throws IOException { + OmKeyInfo keyInfo = ozoneClient.getProxy().getKeyInfo( + volumeName, bucketName, keyName, false); + + ObjectNode keyNode = JsonUtils.createObjectNode(null); + keyNode.put("volumeName", volumeName); + keyNode.put("bucketName", bucketName); + keyNode.put("name", keyName); + + ArrayNode blocksArray = keyNode.putArray("blocks"); + boolean keyPass = true; + + for (OmKeyLocationInfo keyLocation : keyInfo.getLatestVersionLocations().getBlocksLatestVersionOnly()) { + long containerID = keyLocation.getContainerID(); + long localID = keyLocation.getLocalID(); + + ObjectNode blockNode = blocksArray.addObject(); + blockNode.put("containerID", containerID); + blockNode.put("blockID", localID); + + ArrayNode replicasArray = blockNode.putArray("replicas"); + boolean blockPass = true; + + for (DatanodeDetails datanode : keyLocation.getPipeline().getNodes()) { + ObjectNode replicaNode = replicasArray.addObject(); + + ObjectNode datanodeNode = replicaNode.putObject("datanode"); + datanodeNode.put("uuid", datanode.getUuidString()); + datanodeNode.put("hostname", datanode.getHostName()); + + ArrayNode checksArray = replicaNode.putArray("checks"); + boolean replicaPass = true; + int replicaIndex = keyLocation.getPipeline().getReplicaIndex(datanode); + + for (ReplicaVerifier verifier : replicaVerifiers) { + BlockVerificationResult result = verifier.verifyBlock(datanode, keyLocation, replicaIndex); + ObjectNode checkNode = checksArray.addObject(); + checkNode.put("type", verifier.getType()); + checkNode.put("completed", result.isCompleted()); + checkNode.put("pass", result.passed()); + + ArrayNode failuresArray = checkNode.putArray("failures"); + for (String failure : result.getFailures()) { + failuresArray.addObject().put("message", failure); + } + replicaNode.put("replicaIndex", replicaIndex); + + if (!result.passed()) { + replicaPass = false; + } + } + + if (!replicaPass) { + blockPass = false; + } + } + + if (!blockPass) { + keyPass = false; + } + } + + keyNode.put("pass", keyPass); + if (!keyPass) { + allKeysPassed.set(false); + } + + if (!keyPass || allResults) { + keysArray.add(keyNode); + } } static class Verification { @@ -125,5 +217,10 @@ static class Verification { defaultValue = "false") private boolean doExecuteChecksums; + @CommandLine.Option(names = "--block-existence", + description = "Check for block existence on datanodes.", + defaultValue = "false") + private boolean doExecuteBlockExistence; + } }