diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot index e0fcd50ac12f..20689b7c0f5e 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot @@ -27,8 +27,8 @@ ${TESTFILE} testfile ${CORRUPT_DATANODE} ozone_datanode_1.ozone_default *** Test Cases *** -Test ozone debug read-replicas with corrupt block replica - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with corrupt block replica + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot index e8385bb5938f..42ae5dec7e92 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot @@ -14,7 +14,7 @@ # limitations under the License. *** Settings *** -Documentation Test read-replicas in case of one datanode is dead +Documentation Test checksums in case of one datanode is dead Library OperatingSystem Resource ../lib/os.robot Resource ozone-debug.robot @@ -26,8 +26,8 @@ ${BUCKET} cli-debug-bucket ${TESTFILE} testfile *** Test Cases *** -Test ozone debug read-replicas with one datanode DEAD - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with one datanode DEAD + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot index c7cc7aaf3aa4..36cef5e66518 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot @@ -14,7 +14,7 @@ # limitations under the License. *** Settings *** -Documentation Test read-replicas in case of one datanode is stale +Documentation Test checksums in case of one datanode is stale Library OperatingSystem Resource ../lib/os.robot Resource ozone-debug.robot @@ -27,8 +27,8 @@ ${TESTFILE} testfile ${STALE_DATANODE} ozone_datanode_1.ozone_default *** Test Cases *** -Test ozone debug read-replicas with one datanode STALE - ${directory} = Execute read-replicas CLI tool +Test ozone debug checksums with one datanode STALE + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot index 5b3638040a73..57227458cc15 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot @@ -44,13 +44,13 @@ Create EC key *** Test Cases *** 0 data block Create EC key 1000 0 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 1 1 data block Create EC key 1048576 1 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 ${sum_size} = Evaluate 1048576 * 3 @@ -58,7 +58,7 @@ Create EC key 2 data blocks Create EC key 1048576 2 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 4 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 @@ -66,7 +66,7 @@ Create EC key 3 data blocks Create EC key 1048576 3 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 5 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 6 @@ -74,7 +74,7 @@ Create EC key 3 data blocks and partial stripe Create EC key 1000000 4 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 ${sum_size_last_stripe} = Evaluate ((1000000 * 4) % 1048576) * 3 @@ -84,7 +84,7 @@ Create EC key 4 data blocks and partial stripe Create EC key 1000000 5 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 ${sum_size_last_stripe} = Evaluate 1048576 * 3 + ((1000000 * 5) % 1048576) @@ -94,7 +94,7 @@ Create EC key 6 data blocks Create EC key 1048576 6 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 5 Should Be Equal As Integers ${count_files} 11 diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot index 692f2791e20f..52d48c25f77d 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot @@ -39,13 +39,13 @@ Create EC key *** Test Cases *** 0 data block Create EC key 1048576 0 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 1 1 data block Create EC key 1048576 1 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 ${sum_size} = Evaluate 1048576 * 4 @@ -53,7 +53,7 @@ Create EC key 2 data blocks Create EC key 1048576 2 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 5 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 @@ -61,7 +61,7 @@ Create EC key 3 data blocks Create EC key 1048576 3 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${sum_size} = Evaluate 1048576 * 6 ${count_files} = Count Files In Directory ${directory} Should Be Equal As Integers ${count_files} 10 @@ -69,7 +69,7 @@ Create EC key 4 data blocks Create EC key 1048576 4 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 7 Should Be Equal As Integers ${count_files} 10 @@ -77,7 +77,7 @@ Create EC key 5 data blocks Create EC key 1048576 5 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 8 Should Be Equal As Integers ${count_files} 10 @@ -85,7 +85,7 @@ Create EC key 6 data blocks Create EC key 1048576 6 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 Should Be Equal As Integers ${count_files} 10 @@ -93,7 +93,7 @@ Create EC key 6 data blocks and partial stripe Create EC key 1000000 7 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 ${sum_size_last_stripe} = Evaluate ((1000000 * 7) % 1048576) * 4 @@ -103,10 +103,10 @@ Create EC key 7 data blocks and partial stripe Create EC key 1000000 8 - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool ${count_files} = Count Files In Directory ${directory} ${sum_size} = Evaluate 1048576 * 9 ${sum_size_last_stripe} = Evaluate 1048576 * 4 + ((1000000 * 8) % 1048576) Should Be Equal As Integers ${count_files} 19 Verify Healthy EC Replica ${directory} 1 ${sum_size} - Verify Healthy EC Replica ${directory} 2 ${sum_size_last_stripe} \ No newline at end of file + Verify Healthy EC Replica ${directory} 2 ${sum_size_last_stripe} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index 4e013e2a64b9..803ab19ade84 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -37,7 +37,7 @@ Write keys *** Test Cases *** Test ozone debug read-replicas - ${directory} = Execute read-replicas CLI tool + ${directory} = Execute replicas verify checksums CLI tool Set Test Variable ${DIR} ${directory} ${count_files} = Count Files In Directory ${directory} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot index fb3e0f415860..9bb77d00d6d3 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot @@ -19,8 +19,8 @@ Library Collections Resource ../lib/os.robot *** Keywords *** -Execute read-replicas CLI tool - Execute ozone debug -Dozone.network.topology.aware.read=true read-replicas --output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE} +Execute replicas verify checksums CLI tool + Execute ozone debug -Dozone.network.topology.aware.read=true replicas verify --checksums --output-dir ${TEMP_DIR} o3://om/${VOLUME}/${BUCKET}/${TESTFILE} ${directory} = Execute ls -d ${TEMP_DIR}/${VOLUME}_${BUCKET}_${TESTFILE}_*/ | tail -n 1 Directory Should Exist ${directory} File Should Exist ${directory}/${TESTFILE}_manifest diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java similarity index 78% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java index 0ea53f5bb1d9..de6aa05de73c 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/ReadReplicas.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/Checksums.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.debug; +package org.apache.hadoop.ozone.debug.replicas; import static java.util.Collections.emptyMap; @@ -33,7 +33,6 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map; -import org.apache.hadoop.hdds.cli.DebugSubcommand; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -46,27 +45,14 @@ import org.apache.hadoop.ozone.client.rpc.RpcClient; import org.apache.hadoop.ozone.common.OzoneChecksumException; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; -import org.apache.hadoop.ozone.shell.OzoneAddress; -import org.apache.hadoop.ozone.shell.keys.KeyHandler; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; +import org.slf4j.Logger; /** * Class that downloads every replica for all the blocks associated with a * given key. It also generates a manifest file with information about the * downloaded replicas. */ -@CommandLine.Command(name = "read-replicas", - description = "Reads every replica for all the blocks associated with a " + - "given key.") -@MetaInfServices(DebugSubcommand.class) -public class ReadReplicas extends KeyHandler implements DebugSubcommand { - - @CommandLine.Option(names = {"--outputDir", "-o", "--output-dir"}, - description = "Destination where the directory will be created" + - " for the downloaded replicas and the manifest file.", - defaultValue = "/opt/hadoop") - private String outputDir; +public class Checksums implements ReplicaVerifier { private static final String JSON_PROPERTY_FILE_NAME = "filename"; private static final String JSON_PROPERTY_FILE_SIZE = "datasize"; @@ -81,70 +67,18 @@ public class ReadReplicas extends KeyHandler implements DebugSubcommand { private static final String JSON_PROPERTY_REPLICA_UUID = "uuid"; private static final String JSON_PROPERTY_REPLICA_EXCEPTION = "exception"; - @Override - protected void execute(OzoneClient client, OzoneAddress address) - throws IOException { - - address.ensureKeyAddress(); - boolean isChecksumVerifyEnabled - = getConf().getBoolean("ozone.client.verify.checksum", true); - OzoneConfiguration configuration = new OzoneConfiguration(getConf()); - configuration.setBoolean("ozone.client.verify.checksum", - !isChecksumVerifyEnabled); - - RpcClient newClient = new RpcClient(configuration, null); - try { - ClientProtocol noChecksumClient; - ClientProtocol checksumClient; - if (isChecksumVerifyEnabled) { - checksumClient = client.getObjectStore().getClientProxy(); - noChecksumClient = newClient; - } else { - checksumClient = newClient; - noChecksumClient = client.getObjectStore().getClientProxy(); - } - - String volumeName = address.getVolumeName(); - String bucketName = address.getBucketName(); - String keyName = address.getKeyName(); - // Multilevel keys will have a '/' in their names. This interferes with - // directory and file creation process. Flatten the keys to fix this. - String sanitizedKeyName = address.getKeyName().replace("/", "_"); - - File dir = createDirectory(volumeName, bucketName, sanitizedKeyName); - - OzoneKeyDetails keyInfoDetails - = checksumClient.getKeyDetails(volumeName, bucketName, keyName); - - Map> replicas = - checksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName); - - Map> - replicasWithoutChecksum = noChecksumClient - .getKeysEveryReplicas(volumeName, bucketName, keyName); - - ObjectNode result = JsonUtils.createObjectNode(null); - result.put(JSON_PROPERTY_FILE_NAME, - volumeName + "/" + bucketName + "/" + keyName); - result.put(JSON_PROPERTY_FILE_SIZE, keyInfoDetails.getDataSize()); - - ArrayNode blocks = JsonUtils.createArrayNode(); - downloadReplicasAndCreateManifest(sanitizedKeyName, replicas, - replicasWithoutChecksum, dir, blocks); - result.set(JSON_PROPERTY_FILE_BLOCKS, blocks); - - String prettyJson = JsonUtils.toJsonStringWithDefaultPrettyPrinter(result); - - String manifestFileName = sanitizedKeyName + "_manifest"; - System.out.println("Writing manifest file : " + manifestFileName); - File manifestFile - = new File(dir, manifestFileName); - Files.write(manifestFile.toPath(), - prettyJson.getBytes(StandardCharsets.UTF_8)); - } finally { - newClient.close(); - } + private String outputDir; + private RpcClient rpcClient = null; + private OzoneClient client; + private Logger log; + private OzoneConfiguration ozoneConfiguration; + + public Checksums(OzoneClient client, String outputDir, Logger log, OzoneConfiguration conf) { + this.client = client; + this.outputDir = outputDir; + this.log = log; + this.ozoneConfiguration = conf; } private void downloadReplicasAndCreateManifest( @@ -161,8 +95,8 @@ private void downloadReplicasAndCreateManifest( ArrayNode replicasJson = JsonUtils.createArrayNode(); blockIndex += 1; - blockJson.put(JSON_PROPERTY_BLOCK_INDEX, blockIndex); OmKeyLocationInfo locationInfo = block.getKey(); + blockJson.put(JSON_PROPERTY_BLOCK_INDEX, blockIndex); blockJson.put(JSON_PROPERTY_BLOCK_CONTAINERID, locationInfo.getContainerID()); blockJson.put(JSON_PROPERTY_BLOCK_LOCALID, locationInfo.getLocalID()); @@ -184,7 +118,6 @@ private void downloadReplicasAndCreateManifest( String fileName = keyName + "_block" + blockIndex + "_" + datanode.getHostName(); - System.out.println("Writing : " + fileName); Path path = new File(dir, fileName).toPath(); try (InputStream is = replica.getValue()) { @@ -204,7 +137,7 @@ private void downloadReplicasAndCreateManifest( blockJson.set(JSON_PROPERTY_BLOCK_REPLICAS, replicasJson); blocks.add(blockJson); - IOUtils.close(LOG, blockReplicasWithoutChecksum.values()); + IOUtils.close(log, blockReplicasWithoutChecksum.values()); } } @@ -236,7 +169,7 @@ private File createDirectory(String volumeName, String bucketName, System.out.println("Creating directory : " + directoryName); File dir = new File(outputDir, directoryName); if (!dir.exists()) { - if (dir.mkdir()) { + if (dir.mkdirs()) { System.out.println("Successfully created!"); } else { throw new IOException(String.format( @@ -245,4 +178,66 @@ private File createDirectory(String volumeName, String bucketName, } return dir; } + + @Override + public void verifyKey(OzoneKeyDetails keyDetails) { + String volumeName = keyDetails.getVolumeName(); + String bucketName = keyDetails.getBucketName(); + String keyName = keyDetails.getName(); + System.out.println("Processing key : " + volumeName + "/" + bucketName + "/" + keyName); + boolean isChecksumVerifyEnabled = ozoneConfiguration.getBoolean("ozone.client.verify.checksum", true); + RpcClient newClient = null; + try { + OzoneConfiguration configuration = new OzoneConfiguration(ozoneConfiguration); + configuration.setBoolean("ozone.client.verify.checksum", !isChecksumVerifyEnabled); + newClient = getClient(isChecksumVerifyEnabled); + ClientProtocol noChecksumClient; + ClientProtocol checksumClient; + if (isChecksumVerifyEnabled) { + checksumClient = client.getObjectStore().getClientProxy(); + noChecksumClient = newClient; + } else { + checksumClient = newClient; + noChecksumClient = client.getObjectStore().getClientProxy(); + } + + // Multilevel keys will have a '/' in their names. This interferes with + // directory and file creation process. Flatten the keys to fix this. + String sanitizedKeyName = keyName.replace("/", "_"); + + File dir = createDirectory(volumeName, bucketName, sanitizedKeyName); + OzoneKeyDetails keyInfoDetails = checksumClient.getKeyDetails(volumeName, bucketName, keyName); + Map> replicas = + checksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName); + Map> replicasWithoutChecksum = + noChecksumClient.getKeysEveryReplicas(volumeName, bucketName, keyName); + + ObjectNode result = JsonUtils.createObjectNode(null); + result.put(JSON_PROPERTY_FILE_NAME, volumeName + "/" + bucketName + "/" + keyName); + result.put(JSON_PROPERTY_FILE_SIZE, keyInfoDetails.getDataSize()); + + ArrayNode blocks = JsonUtils.createArrayNode(); + downloadReplicasAndCreateManifest(sanitizedKeyName, replicas, replicasWithoutChecksum, dir, blocks); + result.set(JSON_PROPERTY_FILE_BLOCKS, blocks); + + String prettyJson = JsonUtils.toJsonStringWithDefaultPrettyPrinter(result); + + String manifestFileName = sanitizedKeyName + "_manifest"; + File manifestFile = new File(dir, manifestFileName); + Files.write(manifestFile.toPath(), prettyJson.getBytes(StandardCharsets.UTF_8)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private RpcClient getClient(boolean isChecksumVerifyEnabled) throws IOException { + if (rpcClient != null) { + return rpcClient; + } + + OzoneConfiguration configuration = new OzoneConfiguration(ozoneConfiguration); + configuration.setBoolean("ozone.client.verify.checksum", !isChecksumVerifyEnabled); + rpcClient = new RpcClient(configuration, null); + return rpcClient; + } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java similarity index 65% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java index 29acef2b0ddc..b5c11d41d074 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FindMissingPadding.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/FindMissingPadding.java @@ -15,24 +15,24 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.debug; +package org.apache.hadoop.ozone.debug.replicas; import static java.util.Collections.emptySet; import static java.util.Comparator.comparing; import java.io.IOException; +import java.io.PrintWriter; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; -import org.apache.hadoop.hdds.cli.DebugSubcommand; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.XceiverClientFactory; @@ -48,41 +48,26 @@ import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.utils.HAUtils; -import org.apache.hadoop.ozone.client.ObjectStore; -import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneKey; import org.apache.hadoop.ozone.client.OzoneKeyDetails; import org.apache.hadoop.ozone.client.OzoneKeyLocation; -import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.client.protocol.ClientProtocol; import org.apache.hadoop.ozone.client.rpc.RpcClient; -import org.apache.hadoop.ozone.shell.Handler; -import org.apache.hadoop.ozone.shell.OzoneAddress; -import org.apache.hadoop.ozone.shell.Shell; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.StringUtils; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; +import org.slf4j.Logger; /** * Find EC keys affected by missing padding blocks (HDDS-10681). */ -@CommandLine.Command(name = "find-missing-padding", - aliases = { "fmp" }, - description = "List all keys with any missing padding, optionally limited to a volume/bucket/key URI.") -@MetaInfServices(DebugSubcommand.class) -public class FindMissingPadding extends Handler implements DebugSubcommand { +public class FindMissingPadding implements ReplicaVerifier { - @CommandLine.Mixin + private OzoneClient ozoneClient; private ScmOption scmOption; - - @CommandLine.Parameters(arity = "0..1", - description = Shell.OZONE_URI_DESCRIPTION) - private String uri; - + private Logger log; + private PrintWriter printWriter; + private OzoneConfiguration ozoneConfiguration; /** * Keys possibly affected (those with any block under threshold size), * grouped by container ID and block (local) ID. @@ -91,69 +76,31 @@ public class FindMissingPadding extends Handler implements DebugSubcommand { private final Set affectedKeys = new HashSet<>(); - @Override - protected OzoneAddress getAddress() throws OzoneClientException { - return new OzoneAddress(uri); + public FindMissingPadding(OzoneClient ozoneClient, ScmOption scmOption, Logger log, + PrintWriter printWriter, OzoneConfiguration ozoneConfiguration) { + this.ozoneClient = ozoneClient; + this.scmOption = scmOption; + this.log = log; + this.printWriter = printWriter; + this.ozoneConfiguration = ozoneConfiguration; } - @Override - protected void execute(OzoneClient ozoneClient, OzoneAddress address) throws IOException { - findCandidateKeys(ozoneClient, address); - checkContainers(ozoneClient); + protected void execute() throws IOException { + checkContainers(); handleAffectedKeys(); } - private void findCandidateKeys(OzoneClient ozoneClient, OzoneAddress address) throws IOException { - ObjectStore objectStore = ozoneClient.getObjectStore(); - ClientProtocol rpcClient = objectStore.getClientProxy(); - String volumeName = address.getVolumeName(); - String bucketName = address.getBucketName(); - String keyName = address.getKeyName(); - if (!keyName.isEmpty()) { - checkKey(rpcClient, volumeName, bucketName, keyName); - } else if (!bucketName.isEmpty()) { - OzoneVolume volume = objectStore.getVolume(volumeName); - OzoneBucket bucket = volume.getBucket(bucketName); - checkBucket(bucket, rpcClient); - } else if (!volumeName.isEmpty()) { - OzoneVolume volume = objectStore.getVolume(volumeName); - checkVolume(volume, rpcClient); - } else { - for (Iterator it = objectStore.listVolumes(null); it.hasNext();) { - checkVolume(it.next(), rpcClient); - } - } - } - - private void checkVolume(OzoneVolume volume, ClientProtocol rpcClient) throws IOException { - for (Iterator it = volume.listBuckets(null); it.hasNext();) { - OzoneBucket bucket = it.next(); - checkBucket(bucket, rpcClient); - } - } - - private void checkBucket(OzoneBucket bucket, ClientProtocol rpcClient) throws IOException { - String volumeName = bucket.getVolumeName(); - String bucketName = bucket.getName(); - for (Iterator it = bucket.listKeys(null); it.hasNext();) { - OzoneKey key = it.next(); - if (isEC(key)) { - checkKey(rpcClient, volumeName, bucketName, key.getName()); - } else { - LOG.trace("Key {}/{}/{} is not EC", volumeName, bucketName, key.getName()); - } - } + @Override + public void verifyKey(OzoneKeyDetails keyDetails) { + checkECKey(keyDetails); } - private void checkKey(ClientProtocol rpcClient, String volumeName, String bucketName, String keyName) - throws IOException { - OzoneKeyDetails keyDetails = rpcClient.getKeyDetails(volumeName, bucketName, keyName); - if (isEC(keyDetails)) { - checkECKey(keyDetails); + private void checkECKey(OzoneKeyDetails keyDetails) { + if (!isEC(keyDetails)) { + log.trace("Key {}/{}/{} is not EC", keyDetails.getVolumeName(), keyDetails.getBucketName(), keyDetails.getName()); + return; } - } - private void checkECKey(OzoneKeyDetails keyDetails) { List locations = keyDetails.getOzoneKeyLocations(); if (!locations.isEmpty()) { ECReplicationConfig ecConfig = (ECReplicationConfig) keyDetails.getReplicationConfig(); @@ -167,7 +114,7 @@ private void checkECKey(OzoneKeyDetails keyDetails) { } } } else { - LOG.trace("Key {}/{}/{} has no locations", + log.trace("Key {}/{}/{} has no locations", keyDetails.getVolumeName(), keyDetails.getBucketName(), keyDetails.getName()); } } @@ -176,14 +123,14 @@ private static boolean isEC(OzoneKey key) { return key.getReplicationConfig().getReplicationType() == HddsProtos.ReplicationType.EC; } - private void checkContainers(OzoneClient ozoneClient) throws IOException { + private void checkContainers() throws IOException { if (candidateKeys.isEmpty()) { return; } - SecurityConfig securityConfig = new SecurityConfig(getConf()); + SecurityConfig securityConfig = new SecurityConfig(ozoneConfiguration); final boolean tokenEnabled = securityConfig.isSecurityEnabled() && securityConfig.isContainerTokenEnabled(); - StorageContainerLocationProtocol scmContainerClient = HAUtils.getScmContainerClient(getConf()); + StorageContainerLocationProtocol scmContainerClient = HAUtils.getScmContainerClient(ozoneConfiguration); RpcClient rpcClient = (RpcClient) ozoneClient.getProxy(); XceiverClientFactory xceiverClientManager = rpcClient.getXceiverClientManager(); Pipeline.Builder pipelineBuilder = Pipeline.newBuilder() @@ -198,7 +145,7 @@ private void checkContainers(OzoneClient ozoneClient) throws IOException { ContainerInfo container = scmClient.getContainer(containerID); if (container.getState() != HddsProtos.LifeCycleState.CLOSED) { - LOG.trace("Skip container {} as it is not CLOSED, rather {}", containerID, container.getState()); + log.trace("Skip container {} as it is not CLOSED, rather {}", containerID, container.getState()); continue; } @@ -208,7 +155,7 @@ private void checkContainers(OzoneClient ozoneClient) throws IOException { List containerReplicas = scmClient.getContainerReplicas(containerID); - LOG.debug("Container {} replicas: {}", containerID, containerReplicas.stream() + log.debug("Container {} replicas: {}", containerID, containerReplicas.stream() .sorted(comparing(ContainerReplicaInfo::getReplicaIndex) .thenComparing(ContainerReplicaInfo::getState) .thenComparing(r -> r.getDatanodeDetails().getUuidString())) @@ -219,7 +166,7 @@ private void checkContainers(OzoneClient ozoneClient) throws IOException { for (ContainerReplicaInfo replica : containerReplicas) { if (!HddsProtos.LifeCycleState.CLOSED.name().equals(replica.getState())) { - LOG.trace("Ignore container {} replica {} at {} in {} state", + log.trace("Ignore container {} replica {} at {} in {} state", replica.getContainerID(), replica.getReplicaIndex(), replica.getDatanodeDetails(), replica.getState()); continue; } @@ -236,10 +183,10 @@ private void checkContainers(OzoneClient ozoneClient) throws IOException { missingBlocks.remove(blockData.getBlockID().getLocalID()); } if (missingBlocks.isEmpty()) { - LOG.debug("All {} blocks in container {} found on replica {} at {}", + log.debug("All {} blocks in container {} found on replica {} at {}", blockToKeysMap.keySet().size(), containerID, replica.getReplicaIndex(), replica.getDatanodeDetails()); } else { - LOG.info("Found {} blocks missing from container {} on replica {} at {}", + log.info("Found {} blocks missing from container {} on replica {} at {}", missingBlocks.size(), containerID, replica.getReplicaIndex(), replica.getDatanodeDetails()); missingBlocks.forEach(b -> affectedKeys.addAll(blockToKeysMap.getOrDefault(b, emptySet()))); } @@ -253,11 +200,11 @@ private void checkContainers(OzoneClient ozoneClient) throws IOException { private void handleAffectedKeys() { if (!affectedKeys.isEmpty()) { - out().println(StringUtils.join("\t", Arrays.asList( + printWriter.println(StringUtils.join("\t", Arrays.asList( "Key", "Size", "Replication" ))); for (OzoneKey key : affectedKeys) { - out().println(StringUtils.join("\t", Arrays.asList( + printWriter.println(StringUtils.join("\t", Arrays.asList( key.getVolumeName() + "/" + key.getBucketName() + "/" + key.getName(), key.getDataSize(), key.getReplicationConfig().getReplication() diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java new file mode 100644 index 000000000000..cbb5f31c981b --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicaVerifier.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import org.apache.hadoop.ozone.client.OzoneKeyDetails; + +/** + * Functional interface for implementing a key verifier. + */ +@FunctionalInterface +public interface ReplicaVerifier { + void verifyKey(OzoneKeyDetails keyDetails); +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java index 19531136f4a8..30d5d69f5f91 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasDebug.java @@ -30,7 +30,8 @@ description = "Debug commands for replica-related issues, retrieving replica information from the OM and " + "performing checks over the network against a running cluster.", subcommands = { - ChunkKeyHandler.class + ChunkKeyHandler.class, + ReplicasVerify.class } ) @MetaInfServices(DebugSubcommand.class) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java new file mode 100644 index 000000000000..a5d474219e89 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.replicas; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import org.apache.hadoop.hdds.scm.cli.ScmOption; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientException; +import org.apache.hadoop.ozone.client.OzoneKey; +import org.apache.hadoop.ozone.client.OzoneKeyDetails; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.shell.Handler; +import org.apache.hadoop.ozone.shell.OzoneAddress; +import org.apache.hadoop.ozone.shell.Shell; +import picocli.CommandLine; + +/** + * Verify replicas command. + */ + +@CommandLine.Command( + name = "verify", + description = "Run checks to verify data across replicas") +public class ReplicasVerify extends Handler { + @CommandLine.Mixin + private ScmOption scmOption; + + @CommandLine.Parameters(arity = "1", + description = Shell.OZONE_URI_DESCRIPTION) + private String uri; + + @CommandLine.Option(names = {"-o", "--output-dir"}, + description = "Destination directory to save the generated output.", + required = true) + private String outputDir; + + @CommandLine.ArgGroup(exclusive = false, multiplicity = "1") + private Verification verification; + + static class Verification { + @CommandLine.Option(names = "--checksums", + description = "Do client side data checksum validation of all replicas.", + // value will be true only if the "--checksums" option was specified on the CLI + defaultValue = "false") + private boolean doExecuteChecksums; + + @CommandLine.Option(names = "--padding", + description = "Check for missing padding in erasure coded replicas.", + defaultValue = "false") + private boolean doExecutePadding; + } + private FindMissingPadding findMissingPadding; + private List replicaVerifiers; + + @Override + protected void execute(OzoneClient client, OzoneAddress address) throws IOException { + replicaVerifiers = new ArrayList<>(); + + if (verification.doExecuteChecksums) { + replicaVerifiers.add(new Checksums(client, outputDir, LOG, getConf())); + } + + if (verification.doExecutePadding) { + findMissingPadding = new FindMissingPadding(client, scmOption, LOG, out(), getConf()); + replicaVerifiers.add(findMissingPadding); + } + + findCandidateKeys(client, address); + + if (verification.doExecutePadding) { + findMissingPadding.execute(); + } + } + + @Override + protected OzoneAddress getAddress() throws OzoneClientException { + return new OzoneAddress(uri); + } + + void findCandidateKeys(OzoneClient ozoneClient, OzoneAddress address) throws IOException { + ObjectStore objectStore = ozoneClient.getObjectStore(); + String volumeName = address.getVolumeName(); + String bucketName = address.getBucketName(); + String keyName = address.getKeyName(); + if (!keyName.isEmpty()) { + OzoneKeyDetails keyDetails = ozoneClient.getProxy().getKeyDetails(volumeName, bucketName, keyName); + processKey(keyDetails); + } else if (!bucketName.isEmpty()) { + OzoneVolume volume = objectStore.getVolume(volumeName); + OzoneBucket bucket = volume.getBucket(bucketName); + checkBucket(bucket); + } else if (!volumeName.isEmpty()) { + OzoneVolume volume = objectStore.getVolume(volumeName); + checkVolume(volume); + } else { + for (Iterator it = objectStore.listVolumes(null); it.hasNext();) { + checkVolume(it.next()); + } + } + } + + void checkVolume(OzoneVolume volume) throws IOException { + for (Iterator it = volume.listBuckets(null); it.hasNext();) { + OzoneBucket bucket = it.next(); + checkBucket(bucket); + } + } + + void checkBucket(OzoneBucket bucket) throws IOException { + for (Iterator it = bucket.listKeys(null); it.hasNext();) { + OzoneKey key = it.next(); + // TODO: Remove this check once HDDS-12094 is fixed + if (!key.getName().endsWith("/")) { + processKey(bucket.getKey(key.getName())); + } + } + } + + void processKey(OzoneKeyDetails keyDetails) { + replicaVerifiers.forEach(verifier -> verifier.verifyKey(keyDetails)); + } +}