From f806e8d419236d4c81f5a8f9f88f75d93505a823 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 8 Mar 2025 17:31:18 -0800 Subject: [PATCH 1/4] HDDS-12529. Clean up code in AbstractContainerReportHandler. --- .../protocol/commands/CommandForDatanode.java | 5 + .../AbstractContainerReportHandler.java | 283 ++++++++---------- .../scm/container/ContainerReportHandler.java | 15 +- .../IncrementalContainerReportHandler.java | 4 +- 4 files changed, 132 insertions(+), 175 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandForDatanode.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandForDatanode.java index 2a974ce21e79..fa2173218e48 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandForDatanode.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/CommandForDatanode.java @@ -19,6 +19,7 @@ import com.google.protobuf.Message; import java.util.UUID; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.server.events.IdentifiableEventPayload; /** @@ -31,6 +32,10 @@ public class CommandForDatanode implements private final SCMCommand command; + public CommandForDatanode(DatanodeDetails datanode, SCMCommand command) { + this(datanode.getUuid(), command); + } + // TODO: Command for datanode should take DatanodeDetails as parameter. public CommandForDatanode(UUID datanodeId, SCMCommand command) { this.datanodeId = datanodeId; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java index cf204f2392f8..5be288e1a01d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java @@ -17,14 +17,13 @@ package org.apache.hadoop.hdds.scm.container; -import com.google.common.base.Preconditions; import com.google.protobuf.TextFormat; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.UUID; -import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -40,6 +39,8 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.apache.ratis.util.MemoizedSupplier; +import org.apache.ratis.util.Preconditions; import org.slf4j.Logger; /** @@ -61,12 +62,46 @@ public class AbstractContainerReportHandler { AbstractContainerReportHandler(final ContainerManager containerManager, final SCMContext scmContext, final Logger logger) { - Preconditions.checkNotNull(containerManager); - Preconditions.checkNotNull(scmContext); - Preconditions.checkNotNull(logger); - this.containerManager = containerManager; - this.scmContext = scmContext; - this.logger = logger; + this.containerManager = Objects.requireNonNull(containerManager, "containerManager == null"); + this.scmContext = Objects.requireNonNull(scmContext, "scmContext == null"); + this.logger = Objects.requireNonNull(logger, "logger == null"); + } + + public Logger getLogger() { + return logger; + } + + /** @return the container in SCM and the replica from a datanode details for logging. */ + static Object getDetailsForLogging(ContainerInfo container, ContainerReplicaProto replica, DatanodeDetails datanode) { + Objects.requireNonNull(replica, "replica == null"); + Objects.requireNonNull(datanode, "datanode == null"); + if (container != null) { + Preconditions.assertSame(container.getContainerID(), replica.getContainerID(), "Container ID"); + } + + final Supplier details = MemoizedSupplier.valueOf(() -> { + final StringBuilder b = new StringBuilder(); + if (container == null) { + b.append("Container #").append(replica.getContainerID()).append(" (NOT_FOUND"); + } else { + b.append("Container ").append(container.containerID()) + .append(" (").append(container.getState()).append(", sid=").append(container.getSequenceId()); + } + return b.append(") r").append(replica.getReplicaIndex()) + .append(" (").append(replica.getState()) + .append(", bcsid=").append(replica.getBlockCommitSequenceId()) + .append(", origin=").append(replica.getOriginNodeId()) + .append(", ").append(replica.hasIsEmpty() && replica.getIsEmpty() ? "empty" : "non-empty") + .append(") from dn ").append(datanode) + .toString(); + }); + + return new Object() { + @Override + public String toString() { + return details.get(); + } + }; } /** @@ -75,13 +110,10 @@ public class AbstractContainerReportHandler { * @param datanodeDetails DatanodeDetails for the DN * @param replicaProto Protobuf representing the replicas * @param publisher EventPublisher instance - * @throws IOException - * @throws InvalidStateTransitionException - * @throws TimeoutException */ protected void processContainerReplica(final DatanodeDetails datanodeDetails, final ContainerReplicaProto replicaProto, final EventPublisher publisher) - throws IOException, InvalidStateTransitionException, TimeoutException { + throws IOException, InvalidStateTransitionException { ContainerInfo container = getContainerManager().getContainer( ContainerID.valueOf(replicaProto.getContainerID())); processContainerReplica( @@ -96,26 +128,20 @@ protected void processContainerReplica(final DatanodeDetails datanodeDetails, * @param containerInfo ContainerInfo represending the container * @param replicaProto ContainerReplica * @param publisher EventPublisher instance - * - * @throws IOException In case of any Exception while processing the report - * @throws TimeoutException In case of timeout while updating container state */ protected void processContainerReplica(final DatanodeDetails datanodeDetails, final ContainerInfo containerInfo, final ContainerReplicaProto replicaProto, final EventPublisher publisher) - throws IOException, InvalidStateTransitionException, TimeoutException { + throws IOException, InvalidStateTransitionException { final ContainerID containerId = containerInfo.containerID(); + final Object detailsForLogging = getDetailsForLogging(containerInfo, replicaProto, datanodeDetails); - if (logger.isDebugEnabled()) { - logger.debug("Processing replica of container {} from datanode {}", - containerId, datanodeDetails); - } + getLogger().debug("Processing replica {}", detailsForLogging); // Synchronized block should be replaced by container lock, // once we have introduced lock inside ContainerInfo. synchronized (containerInfo) { - updateContainerStats(datanodeDetails, containerInfo, replicaProto); - if (!updateContainerState(datanodeDetails, containerInfo, replicaProto, - publisher)) { + updateContainerStats(datanodeDetails, containerInfo, replicaProto, detailsForLogging); + if (!updateContainerState(datanodeDetails, containerInfo, replicaProto, publisher, detailsForLogging)) { updateContainerReplica(datanodeDetails, containerId, replicaProto); } } @@ -131,18 +157,15 @@ protected void processContainerReplica(final DatanodeDetails datanodeDetails, */ private void updateContainerStats(final DatanodeDetails datanodeDetails, final ContainerInfo containerInfo, - final ContainerReplicaProto replicaProto) - throws ContainerNotFoundException { + final ContainerReplicaProto replicaProto, + Object detailsForLogging) throws ContainerNotFoundException { if (containerInfo.getState() == HddsProtos.LifeCycleState.CLOSED && containerInfo.getSequenceId() < replicaProto.getBlockCommitSequenceId()) { - logger.error( - "There is a CLOSED container with lower sequence ID than a replica. Container: {}, Container's " + - "sequence ID: {}, Replica: {}, Replica's sequence ID: {}, Datanode: {}.", containerInfo, - containerInfo.getSequenceId(), TextFormat.shortDebugString(replicaProto), - replicaProto.getBlockCommitSequenceId(), datanodeDetails); + getLogger().error("Container CLOSED with sequence ID lower than a replica: {}, proto={}", + detailsForLogging, TextFormat.shortDebugString(replicaProto)); } - if (isHealthy(replicaProto::getState)) { + if (isHealthy(replicaProto.getState())) { if (containerInfo.getSequenceId() < replicaProto.getBlockCommitSequenceId()) { containerInfo.updateSequenceId( @@ -200,8 +223,7 @@ private void updateECContainerStats(ContainerInfo containerInfo, } } - private long calculateUsage(ContainerInfo containerInfo, long lastValue, - long thisValue) { + private static long calculateUsage(ContainerInfo containerInfo, long lastValue, long thisValue) { if (containerInfo.getState().equals(HddsProtos.LifeCycleState.OPEN)) { // Open containers are generally growing in key count and size, the // overall size should be the min of all reported replicas. @@ -213,8 +235,7 @@ private long calculateUsage(ContainerInfo containerInfo, long lastValue, } } - private void updateContainerUsedAndKeys(ContainerInfo containerInfo, - long usedBytes, long keyCount) { + private static void updateContainerUsedAndKeys(ContainerInfo containerInfo, long usedBytes, long keyCount) { if (containerInfo.getUsedBytes() != usedBytes) { containerInfo.setUsedBytes(usedBytes); } @@ -242,59 +263,39 @@ private List getOtherReplicas(ContainerID containerId, * @param datanode Datanode from which the report is received * @param container ContainerInfo representing the the container * @param replica ContainerReplica - * @boolean true - replica should be ignored in the next process + * @return true iff replica must be ignored in the next process * @throws IOException In case of Exception - * @throws TimeoutException In case of timeout while updating container state */ private boolean updateContainerState(final DatanodeDetails datanode, final ContainerInfo container, final ContainerReplicaProto replica, - final EventPublisher publisher) - throws IOException, InvalidStateTransitionException, TimeoutException { + final EventPublisher publisher, + Object detailsForLogging) throws IOException, InvalidStateTransitionException { final ContainerID containerId = container.containerID(); - boolean ignored = false; boolean replicaIsEmpty = replica.hasIsEmpty() && replica.getIsEmpty(); + switch (container.getState()) { case OPEN: - /* - * If the state of a container is OPEN, datanodes cannot report - * any other state. - */ + // If the state of a container is OPEN and a replica is in different state, finalize the container. if (replica.getState() != State.OPEN) { - logger.info("Moving OPEN container {} to CLOSING state, datanode {} " + - "reported {} replica with index {}.", containerId, datanode, - replica.getState(), replica.getReplicaIndex()); - containerManager.updateContainerState(containerId, - LifeCycleEvent.FINALIZE); + getLogger().info("FINALIZE (i.e. CLOSING) {}", detailsForLogging); + containerManager.updateContainerState(containerId, LifeCycleEvent.FINALIZE); } - break; + return false; case CLOSING: - /* - * When the container is in CLOSING state the replicas can be in any - * of the following states: - * - * - OPEN - * - CLOSING - * - QUASI_CLOSED - * - CLOSED - * - * If all the replica are either in OPEN or CLOSING state, do nothing. - * - * If the replica is in QUASI_CLOSED state, move the container to - * QUASI_CLOSED state. - * - * If the replica is in CLOSED state, mark the container as CLOSED. - * - */ + // When the container is in CLOSING state, a replica can be either OPEN, CLOSING, QUASI_CLOSED or CLOSED + // If the replica are either in OPEN or CLOSING state, do nothing. + + // If the replica is in QUASI_CLOSED state, move the container to QUASI_CLOSED state. if (replica.getState() == State.QUASI_CLOSED) { - logger.info("Moving container {} to QUASI_CLOSED state, datanode {} " + - "reported QUASI_CLOSED replica.", containerId, datanode); - containerManager.updateContainerState(containerId, - LifeCycleEvent.QUASI_CLOSE); + getLogger().info("QUASI_CLOSE {}", detailsForLogging); + containerManager.updateContainerState(containerId, LifeCycleEvent.QUASI_CLOSE); + return false; } + // If the replica is in CLOSED state, mark the container as CLOSED. if (replica.getState() == State.CLOSED) { /* For an EC container, only the first index and the parity indexes are @@ -307,114 +308,71 @@ private boolean updateContainerState(final DatanodeDetails datanode, int dataNum = ((ECReplicationConfig)container.getReplicationConfig()).getData(); if (replicaIndex != 1 && replicaIndex <= dataNum) { - break; + return false; } } - if (!verifyBcsId(replica.getBlockCommitSequenceId(), container.getSequenceId(), datanode, containerId)) { - logger.warn("Ignored moving container {} from CLOSING to CLOSED state because replica bcsId ({}) " + - "reported by datanode {} does not match sequenceId ({}).", - containerId, replica.getBlockCommitSequenceId(), datanode, container.getSequenceId()); + if (bcsidMismatched(container, replica, detailsForLogging)) { return true; } - logger.info("Moving container {} from CLOSING to CLOSED state, datanode {} " + - "reported CLOSED replica with index {}.", containerId, datanode, - replica.getReplicaIndex()); - containerManager.updateContainerState(containerId, - LifeCycleEvent.CLOSE); + getLogger().info("CLOSE {}", detailsForLogging); + containerManager.updateContainerState(containerId, LifeCycleEvent.CLOSE); } - - break; + return false; case QUASI_CLOSED: - /* - * The container is in QUASI_CLOSED state, this means that at least - * one of the replica was QUASI_CLOSED. - * - * Now replicas can be in any of the following state. - * - * 1. OPEN - * 2. CLOSING - * 3. QUASI_CLOSED - * 4. CLOSED - * - * If at least one of the replica is in CLOSED state, mark the - * container as CLOSED. - * - */ + // The container is QUASI_CLOSED, this means that at least one of the replicas was QUASI_CLOSED. + // Now replicas can be in either OPEN, CLOSING, QUASI_CLOSED or CLOSED + + // If one of the replica is in CLOSED state, mark the container as CLOSED. if (replica.getState() == State.CLOSED) { - if (!verifyBcsId(replica.getBlockCommitSequenceId(), container.getSequenceId(), datanode, containerId)) { - logger.warn("Ignored moving container {} from QUASI_CLOSED to CLOSED state because replica bcsId ({}) " + - "reported by datanode {} does not match sequenceId ({}).", - containerId, replica.getBlockCommitSequenceId(), datanode, container.getSequenceId()); + if (bcsidMismatched(container, replica, detailsForLogging)) { return true; } - logger.info("Moving container {} from QUASI_CLOSED to CLOSED state, datanode {} " + - "reported CLOSED replica with index {}.", containerId, datanode, - replica.getReplicaIndex()); - containerManager.updateContainerState(containerId, - LifeCycleEvent.FORCE_CLOSE); + getLogger().info("FORCE_CLOSE for {}", detailsForLogging); + containerManager.updateContainerState(containerId, LifeCycleEvent.FORCE_CLOSE); } - break; + return false; case CLOSED: - /* - * The container is already in closed state. do nothing. - */ - break; + // The container is already in closed state. do nothing. + return false; case DELETED: // If container is in DELETED state and the reported replica is empty, delete the empty replica. // We should also do this for DELETING containers and currently DeletingContainerHandler does that if (replicaIsEmpty) { - deleteReplica(containerId, datanode, publisher, "DELETED", false); - break; + deleteReplica(containerId, datanode, publisher, "DELETED", false, detailsForLogging); + return false; } + // HDDS-12421: fall-through to case DELETING case DELETING: - /* - * HDDS-11136: If a DELETING container has a non-empty CLOSED replica, the container should be moved back to - * CLOSED state. - * - * HDDS-12421: If a DELETING or DELETED container has a non-empty replica, the container should also be moved - * back to CLOSED state. - */ + // HDDS-11136: If a DELETING container has a non-empty CLOSED replica, transition the container to CLOSED + // HDDS-12421: If a DELETING or DELETED container has a non-empty replica, transition the container to CLOSED boolean replicaStateAllowed = (replica.getState() != State.INVALID && replica.getState() != State.DELETED); if (!replicaIsEmpty && replicaStateAllowed) { - logger.info("Moving container {} from {} to CLOSED state, datanode {} reported replica with state={}, " + - "isEmpty={}, bcsId={}, keyCount={}, and origin={}", - container, container.getState(), datanode.getHostName(), replica.getState(), - replica.getIsEmpty(), replica.getBlockCommitSequenceId(), replica.getKeyCount(), replica.getOriginNodeId()); + getLogger().info("transitionDeletingToClosed for non-empty CLOSED replica (keyCount={}) for {}", + replica.getKeyCount(), detailsForLogging); containerManager.transitionDeletingOrDeletedToClosedState(containerId); } - break; + return false; default: - break; + getLogger().warn("Unprocessed state {}", container.getState()); + return false; } - - return ignored; } /** * Helper method to verify that the replica's bcsId matches the container's in SCM. - * Throws IOException if the bcsIds do not match. - *

- * @param replicaBcsId Replica bcsId - * @param containerBcsId Container bcsId in SCM - * @param datanode DatanodeDetails for logging - * @param containerId ContainerID for logging - * @return true if verification has passed, false otherwise + * + * @param replica Replica reported from a datanode + * @param container Container in SCM + * @param detailsForLogging The detail information of the container in SCM and the replica from a datanode + * @return true iff the BCSIDs are mismatched */ - private boolean verifyBcsId(long replicaBcsId, long containerBcsId, - DatanodeDetails datanode, ContainerID containerId) { - - if (replicaBcsId != containerBcsId) { - final String errMsg = "Unexpected bcsId for container " + containerId + - " from datanode " + datanode + ". replica's: " + replicaBcsId + - ", SCM's: " + containerBcsId + - ". Ignoring container report for " + containerId; - - logger.error(errMsg); + private boolean bcsidMismatched(ContainerInfo container, ContainerReplicaProto replica, Object detailsForLogging) { + if (replica.getBlockCommitSequenceId() == container.getSequenceId()) { return false; - } else { - return true; } + getLogger().warn("Replica BCSID mismatched for {} ", detailsForLogging); + return true; } private void updateContainerReplica(final DatanodeDetails datanodeDetails, @@ -449,10 +407,10 @@ private void updateContainerReplica(final DatanodeDetails datanodeDetails, * @param replicaState State of the container replica. * @return true if healthy, false otherwise */ - private boolean isHealthy(final Supplier replicaState) { - return replicaState.get() != State.UNHEALTHY - && replicaState.get() != State.INVALID - && replicaState.get() != State.DELETED; + private boolean isHealthy(final State replicaState) { + return replicaState != State.UNHEALTHY + && replicaState != State.INVALID + && replicaState != State.DELETED; } /** @@ -464,19 +422,20 @@ protected ContainerManager getContainerManager() { } protected void deleteReplica(ContainerID containerID, DatanodeDetails dn, - EventPublisher publisher, String reason, boolean force) { - SCMCommand command = new DeleteContainerCommand( - containerID.getId(), force); + EventPublisher publisher, String reason, boolean force, Object detailsForLogging) { + final long term; try { - command.setTerm(scmContext.getTermOfLeader()); + term = scmContext.getTermOfLeader(); } catch (NotLeaderException nle) { - logger.warn("Skip sending delete container command," + - " since not leader SCM", nle); + final String message = "Skip sending DeleteContainerCommand for " + detailsForLogging + ": " + nle; + getLogger().warn(message); return; } - publisher.fireEvent(SCMEvents.DATANODE_COMMAND, - new CommandForDatanode<>(dn.getUuid(), command)); - logger.info("Sending delete container command for " + reason + - " container {} to datanode {}", containerID.getId(), dn); + + final SCMCommand command = new DeleteContainerCommand(containerID, force); + command.setTerm(term); + publisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(dn, command)); + getLogger().info("Sending {}DeleteContainerCommand due to {} for {}", + force ? "force" : "", reason, detailsForLogging); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java index 484f84576e32..a327f7c545c3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.List; import java.util.Set; -import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; @@ -224,28 +223,24 @@ public void onMessage(final ContainerReportFromDatanode reportFromDatanode, private void processSingleReplica(final DatanodeDetails datanodeDetails, final ContainerInfo container, final ContainerReplicaProto replicaProto, final EventPublisher publisher) { + final Object detailsForLogging = getDetailsForLogging(container, replicaProto, datanodeDetails); if (container == null) { if (unknownContainerHandleAction.equals( UNKNOWN_CONTAINER_ACTION_WARN)) { - LOG.error("Received container report for an unknown container" + - " {} from datanode {}.", replicaProto.getContainerID(), - datanodeDetails); + getLogger().error("Received report for {}", detailsForLogging); } else if (unknownContainerHandleAction.equals( UNKNOWN_CONTAINER_ACTION_DELETE)) { final ContainerID containerId = ContainerID .valueOf(replicaProto.getContainerID()); - deleteReplica(containerId, datanodeDetails, publisher, "unknown", true); + deleteReplica(containerId, datanodeDetails, publisher, "CONTAINER_NOT_FOUND", true, detailsForLogging); } return; } try { processContainerReplica( datanodeDetails, container, replicaProto, publisher); - } catch (IOException | InvalidStateTransitionException | - TimeoutException e) { - LOG.error("Exception while processing container report for container" + - " {} from datanode {}.", replicaProto.getContainerID(), - datanodeDetails, e); + } catch (IOException | InvalidStateTransitionException e) { + getLogger().error("Failed to process report for {}", detailsForLogging, e); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/IncrementalContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/IncrementalContainerReportHandler.java index c3aff0ce0f40..7f8c8162d1f3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/IncrementalContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/IncrementalContainerReportHandler.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdds.scm.container; import java.io.IOException; -import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.report.ContainerReportValidator; @@ -112,8 +111,7 @@ public void onMessage(final IncrementalContainerReportFromDatanode report, LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID(), ex); } - } catch (IOException | InvalidStateTransitionException | - TimeoutException e) { + } catch (IOException | InvalidStateTransitionException e) { LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID(), e); } From 6045859d46f265a6d3154998b24e43df884d4d4b Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 8 Mar 2025 17:47:53 -0800 Subject: [PATCH 2/4] Fix javac error --- .../hdds/scm/container/AbstractContainerReportHandler.java | 5 +++-- .../hadoop/hdds/scm/container/ContainerReportHandler.java | 4 ++-- .../recon/scm/ReconIncrementalContainerReportHandler.java | 4 +--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java index 5be288e1a01d..ede268d5198e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java @@ -348,13 +348,14 @@ private boolean updateContainerState(final DatanodeDetails datanode, // HDDS-12421: If a DELETING or DELETED container has a non-empty replica, transition the container to CLOSED boolean replicaStateAllowed = (replica.getState() != State.INVALID && replica.getState() != State.DELETED); if (!replicaIsEmpty && replicaStateAllowed) { - getLogger().info("transitionDeletingToClosed for non-empty CLOSED replica (keyCount={}) for {}", + getLogger().info("transitionDeletingToClosed due to non-empty CLOSED replica (keyCount={}) for {}", replica.getKeyCount(), detailsForLogging); containerManager.transitionDeletingOrDeletedToClosedState(containerId); } return false; default: - getLogger().warn("Unprocessed state {}", container.getState()); + getLogger().error("Replica not processed due to container state {}: {}", + container.getState(), detailsForLogging); return false; } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java index a327f7c545c3..130a11b3c646 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java @@ -227,7 +227,7 @@ private void processSingleReplica(final DatanodeDetails datanodeDetails, if (container == null) { if (unknownContainerHandleAction.equals( UNKNOWN_CONTAINER_ACTION_WARN)) { - getLogger().error("Received report for {}", detailsForLogging); + getLogger().error("CONTAINER_NOT_FOUND for {}", detailsForLogging); } else if (unknownContainerHandleAction.equals( UNKNOWN_CONTAINER_ACTION_DELETE)) { final ContainerID containerId = ContainerID @@ -240,7 +240,7 @@ private void processSingleReplica(final DatanodeDetails datanodeDetails, processContainerReplica( datanodeDetails, container, replicaProto, publisher); } catch (IOException | InvalidStateTransitionException e) { - getLogger().error("Failed to process report for {}", detailsForLogging, e); + getLogger().error("Failed to process {}", detailsForLogging, e); } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java index 00bae5e1a3fd..97636e92fc08 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java @@ -18,7 +18,6 @@ package org.apache.hadoop.ozone.recon.scm; import java.io.IOException; -import java.util.concurrent.TimeoutException; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -98,8 +97,7 @@ public void onMessage(final IncrementalContainerReportFromDatanode report, success = false; LOG.error("Received ICR from unknown datanode {}.", report.getDatanodeDetails(), ex); - } catch (IOException | InvalidStateTransitionException | - TimeoutException e) { + } catch (IOException | InvalidStateTransitionException e) { success = false; LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID()); From 692402340ff6b335e4bd4e95dc3f69e390213ff1 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Tue, 25 Mar 2025 15:12:03 -0700 Subject: [PATCH 3/4] Update hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java Co-authored-by: Nandakumar Vadivelu --- .../hdds/scm/container/AbstractContainerReportHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java index ede268d5198e..6f17941794bf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java @@ -67,7 +67,7 @@ public class AbstractContainerReportHandler { this.logger = Objects.requireNonNull(logger, "logger == null"); } - public Logger getLogger() { + protected Logger getLogger() { return logger; } From 700f9a4214e118ee38ed1a53a76f7ec674dd0179 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Tue, 25 Mar 2025 15:12:12 -0700 Subject: [PATCH 4/4] Update hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java Co-authored-by: Nandakumar Vadivelu --- .../hdds/scm/container/AbstractContainerReportHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java index 6f17941794bf..17029a915e63 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java @@ -261,7 +261,7 @@ private List getOtherReplicas(ContainerID containerId, * Updates the container state based on the given replica state. * * @param datanode Datanode from which the report is received - * @param container ContainerInfo representing the the container + * @param container ContainerInfo representing the container * @param replica ContainerReplica * @return true iff replica must be ignored in the next process * @throws IOException In case of Exception