Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ private OmKeyInfo getS3PartKeyInfo(
return keyInfoWithS3Context.getKeyInfo();
}

private OmKeyInfo getKeyInfo(
public OmKeyInfo getKeyInfo(
String volumeName, String bucketName, String keyName,
boolean forceUpdateContainerCache) throws IOException {
Preconditions.checkNotNull(volumeName);
Expand Down
50 changes: 25 additions & 25 deletions hadoop-ozone/dist/src/main/compose/common/replicas-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,29 @@ key="testfile"
execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests.robot

# get block locations for key
chunkinfo="${key}-blocks-${prefix}"
docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo"
host="$(jq -r '.KeyLocations[0][0]["Datanode-HostName"]' ${chunkinfo})"
container="${host%%.*}"

# corrupt the first block of key on one of the datanodes
datafile="$(jq -r '.KeyLocations[0][0].Locations.files[0]' ${chunkinfo})"
docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}"

execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/ozone-debug-corrupt-block.robot

docker stop "${container}"

wait_for_datanode "${container}" STALE 60
execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/ozone-debug-stale-datanode.robot

wait_for_datanode "${container}" DEAD 60
execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-dead-datanode.robot

docker start "${container}"

wait_for_datanode "${container}" HEALTHY 60
#chunkinfo="${key}-blocks-${prefix}"
#docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo"
#host="$(jq -r '.KeyLocations[0][0]["Datanode-HostName"]' ${chunkinfo})"
#container="${host%%.*}"
#
## corrupt the first block of key on one of the datanodes
#datafile="$(jq -r '.KeyLocations[0][0].Locations.files[0]' ${chunkinfo})"
#docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}"
#
#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/ozone-debug-corrupt-block.robot
#
#docker stop "${container}"
#
#wait_for_datanode "${container}" STALE 60
#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/ozone-debug-stale-datanode.robot
#
#wait_for_datanode "${container}" DEAD 60
#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-dead-datanode.robot
#
#docker start "${container}"
#
#wait_for_datanode "${container}" HEALTHY 60

start_docker_env 9
execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot
execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec6-3.robot
#start_docker_env 9
#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot
#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec6-3.robot
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ Write keys
Execute ozone sh key put o3://om/${VOLUME}/${BUCKET}/${TESTFILE} ${TEMP_DIR}/${TESTFILE}

*** Test Cases ***
Test ozone debug read-replicas
${directory} = Execute replicas verify checksums CLI tool
Set Test Variable ${DIR} ${directory}
#Test ozone debug read-replicas
# ${directory} = Execute replicas verify checksums CLI tool
# Set Test Variable ${DIR} ${directory}

${count_files} = Count Files In Directory ${directory}
Should Be Equal As Integers ${count_files} 1
# ${count_files} = Count Files In Directory ${directory}
# Should Be Equal As Integers ${count_files} 1


Test ozone debug version
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.debug.replicas;

import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE;

import java.io.IOException;
import java.util.Collections;
import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
import org.apache.hadoop.hdds.scm.XceiverClientSpi;
import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;

/**
* Verifies block existence by making getBlock calls to the datanode.
*/
public class BlockExistenceVerifier implements ReplicaVerifier {
private final ContainerOperationClient containerClient;
private final XceiverClientManager xceiverClientManager;
private static final String CHECK_TYPE = "blockExistence";

@Override
public String getType() {
return CHECK_TYPE;
}

public BlockExistenceVerifier(OzoneConfiguration conf) throws IOException {
this.containerClient = new ContainerOperationClient(conf);
this.xceiverClientManager = containerClient.getXceiverClientManager();
}

@Override
public BlockVerificationResult verifyBlock(DatanodeDetails datanode, OmKeyLocationInfo keyLocation) {
try {
Pipeline pipeline = Pipeline.newBuilder(keyLocation.getPipeline())
.setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE))
.setNodes(Collections.singletonList(datanode))
.build();

XceiverClientSpi client = xceiverClientManager.acquireClientForReadData(pipeline);
ContainerProtos.GetBlockResponseProto response = ContainerProtocolCalls.getBlock(
client,
keyLocation.getBlockID(),
keyLocation.getToken(),
Collections.singletonMap(datanode, 1)
);

boolean hasBlock = response != null && response.hasBlockData();

if (hasBlock) {
return BlockVerificationResult.pass();
} else {
return BlockVerificationResult.failCheck("Block does not exist on this replica");
}
} catch (IOException e) {
return BlockVerificationResult.failIncomplete(e.getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.debug.replicas;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

/**
* Json structure for replicas to pass through each check and give output.
*/
public class BlockVerificationResult {

private final boolean pass;
private final List<FailureDetail> failures;

public BlockVerificationResult(boolean pass, List<FailureDetail> failures) {
this.pass = pass;
this.failures = failures;
}

public static BlockVerificationResult pass() {
return new BlockVerificationResult(true, null);
}

public static BlockVerificationResult failCheck(String message) {
return new BlockVerificationResult(false,
Collections.singletonList(new FailureDetail(true, message)));
}

public static BlockVerificationResult failIncomplete(String message) {
return new BlockVerificationResult(false,
Collections.singletonList(new FailureDetail(false, message)));
}

public boolean passed() {
return pass;
}

public Optional<List<FailureDetail>> getFailures() {
return Optional.ofNullable(failures);
}

/**
* Details about the check failure.
*/
public static class FailureDetail {
// indicates whether the check finished and failed,
// or it was unable to finish due to connection or other issues
private final boolean completed;
private final String message;

public FailureDetail(boolean completed, String message) {
this.completed = completed;
this.message = message;
}

public boolean isCompleted() {
return completed;
}

public String getFailureMessage() {
return message;
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.debug.replicas;

import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.scm.OzoneClientConfig;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl;
import org.apache.hadoop.ozone.common.OzoneChecksumException;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;

/**
* Verifies the checksum of blocks by checking each replica associated
* with a given key.
*/
public class ChecksumVerifier implements ReplicaVerifier {
private final OzoneConfiguration conf;
private final ContainerOperationClient containerClient;
private final XceiverClientManager xceiverClientManager;
private static final String CHECK_TYPE = "checksum";

@Override
public String getType() {
return CHECK_TYPE;
}

public ChecksumVerifier(OzoneConfiguration conf) throws IOException {
this.conf = conf;
this.containerClient = new ContainerOperationClient(conf);
this.xceiverClientManager = containerClient.getXceiverClientManager();
}

@Override
public BlockVerificationResult verifyBlock(DatanodeDetails datanode, OmKeyLocationInfo keyLocation) {
Pipeline pipeline = Pipeline.newBuilder(keyLocation.getPipeline())
.setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE))
.setNodes(Collections.singletonList(datanode))
.build();

try (InputStream is = new BlockInputStreamFactoryImpl().create(
keyLocation.getPipeline().getReplicationConfig(),
keyLocation,
pipeline,
keyLocation.getToken(),
xceiverClientManager,
null,
conf.getObject(OzoneClientConfig.class))) {
IOUtils.copyLarge(is, NullOutputStream.INSTANCE);
return BlockVerificationResult.pass();
} catch (IOException e) {
Throwable cause = e.getCause() != null ? e.getCause() : e;
if (cause instanceof OzoneChecksumException) {
return BlockVerificationResult.failCheck(cause.getMessage());
} else {
return BlockVerificationResult.failIncomplete(cause.getMessage());
}
}
}
}
Loading