Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e294940
HDDS-11763. Implement container repair logic within datanodes
aswinshakil Nov 22, 2024
99de480
HDDS-11763. Added test cases and fixed bugs.
aswinshakil Jan 6, 2025
f8606fe
HDDS-11763. Fix tests.
aswinshakil Jan 8, 2025
52bde04
Merge branch 'HDDS-10239-container-reconciliation' of https://github.…
aswinshakil Jan 24, 2025
385e8c1
Address partial review comments.
aswinshakil Jan 25, 2025
04aa8a0
Address review comments.
aswinshakil Jan 31, 2025
4f291a0
Merge branch 'HDDS-10239-container-reconciliation' into HDDS-11763-re…
aswinshakil Feb 10, 2025
112762d
Address review.
aswinshakil Feb 18, 2025
07c40e4
Fix tests.
aswinshakil Feb 19, 2025
0350c29
Merge branch 'HDDS-10239-container-reconciliation' of https://github.…
aswinshakil Feb 19, 2025
aa34c57
Address Review Comments
aswinshakil Feb 21, 2025
1e5685e
Merge branch 'HDDS-10239-container-reconciliation' of https://github.…
aswinshakil Mar 7, 2025
546bd6f
Add unit test suite
aswinshakil Mar 10, 2025
89a9848
Fix findbugs.
aswinshakil Mar 10, 2025
2b272e7
Fix findbugs.
aswinshakil Mar 10, 2025
7570005
Address review comments.
aswinshakil Mar 18, 2025
47b5fef
Use BlockInputStream to read data.
aswinshakil Mar 20, 2025
caffe21
Fix findbugs
aswinshakil Mar 20, 2025
12b9443
Use existing blockData from BlockInputStream
aswinshakil Mar 20, 2025
d03e4d9
Use ByteBuffer instead of byte array.
aswinshakil Mar 24, 2025
369b24d
Address review comments.
aswinshakil Apr 4, 2025
f062bed
Address review comments.
aswinshakil Apr 4, 2025
a5796a1
Update tests and address review comments.
aswinshakil Apr 8, 2025
d578f3c
Address review comments.
aswinshakil Apr 10, 2025
130d57a
Address review comments.
aswinshakil Apr 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,11 @@ public static HddsProtos.UUID toProtobuf(UUID uuid) {
: null;
}

/** @return Hex string representation of {@code value} */
public static String checksumToString(long value) {
return Long.toHexString(value);
}

/**
* Logs a warning to report that the class is not closed properly.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.hadoop.hdds.scm.container;

import static org.apache.hadoop.hdds.HddsUtils.checksumToString;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
Expand All @@ -26,7 +28,6 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;


/**
* Class which stores ContainerReplica details on the client.
*/
Expand Down Expand Up @@ -102,7 +103,7 @@ public long getDataChecksum() {
private static class LongToHexJsonSerializer extends JsonSerializer<Long> {
@Override
public void serialize(Long value, JsonGenerator gen, SerializerProvider provider) throws IOException {
gen.writeString(Long.toHexString(value));
gen.writeString(checksumToString(value));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ private Lock getLock(long containerID) {
* Callers are not required to hold a lock while calling this since writes are done to a tmp file and atomically
* swapped into place.
*/
private Optional<ContainerProtos.ContainerChecksumInfo> read(ContainerData data) throws IOException {
public Optional<ContainerProtos.ContainerChecksumInfo> read(ContainerData data) throws IOException {
long containerID = data.getContainerID();
File checksumFile = getContainerChecksumFile(data);
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ public XceiverClientManager getXceiverClientManager() {
return xceiverClientManager;
}

public TokenHelper getTokenHelper() {
return tokenHelper;
}

/**
* Reads {@link ContainerProtos.ContainerChecksumInfo} for a specified container for the specified datanode.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ public class ContainerMetrics implements Closeable {
@Metric private MutableCounterLong containerForceDelete;
@Metric private MutableCounterLong numReadStateMachine;
@Metric private MutableCounterLong bytesReadStateMachine;
@Metric private MutableCounterLong numContainerReconciledWithoutChanges;
@Metric private MutableCounterLong numContainerReconciledWithChanges;


private final EnumMap<ContainerProtos.Type, MutableCounterLong> numOpsArray;
Expand Down Expand Up @@ -172,4 +174,12 @@ public void incBytesReadStateMachine(long bytes) {
public long getBytesReadStateMachine() {
return bytesReadStateMachine.value();
}

public void incContainerReconciledWithoutChanges() {
numContainerReconciledWithoutChanges.incr();
}

public void incContainerReconciledWithChanges() {
numContainerReconciledWithChanges.incr();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public class DatanodeStateMachine implements Closeable {

private final DatanodeQueueMetrics queueMetrics;
private final ReconfigurationHandler reconfigurationHandler;
private final DNContainerOperationClient dnClient;
/**
* Constructs a datanode state machine.
* @param datanodeDetails - DatanodeDetails used to identify a datanode
Expand Down Expand Up @@ -229,7 +230,7 @@ public DatanodeStateMachine(HddsDatanodeService hddsDatanodeService,

// TODO HDDS-11218 combine the clients used for reconstruction and reconciliation so they share the same cache of
// datanode clients.
DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient);
dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient);

ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat(threadNamePrefix + "PipelineCommandHandlerThread-%d")
Expand Down Expand Up @@ -752,4 +753,9 @@ public DatanodeQueueMetrics getQueueMetrics() {
public ReconfigurationHandler getReconfigurationHandler() {
return reconfigurationHandler;
}

@VisibleForTesting
public DNContainerOperationClient getDnContainerOperationClientClient() {
return dnClient;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.hadoop.ozone.container.common.utils;

import static org.apache.hadoop.hdds.HddsUtils.checksumToString;

import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
Expand Down Expand Up @@ -145,6 +147,16 @@ public static void logRecovered(ContainerData containerData) {
LOG.info(getMessage(containerData));
}

/**
* Logged when a container is reconciled.
*
* @param containerData The container that was reconciled on this datanode.
* @param oldDataChecksum The old data checksum.
*/
public static void logReconciled(ContainerData containerData, long oldDataChecksum) {
LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " + checksumToString(oldDataChecksum)));
}

private static String getMessage(ContainerData containerData,
String message) {
return String.join(FIELD_SEPARATOR, getMessage(containerData), message);
Expand All @@ -155,6 +167,7 @@ private static String getMessage(ContainerData containerData) {
"ID=" + containerData.getContainerID(),
"Index=" + containerData.getReplicaIndex(),
"BCSID=" + containerData.getBlockCommitSequenceId(),
"State=" + containerData.getState());
"State=" + containerData.getState(),
"DataChecksum=" + checksumToString(containerData.getDataChecksum()));
}
}
Loading