Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsInfo;
Expand Down Expand Up @@ -544,7 +546,7 @@ private void handleUnderReplicatedContainer(final ContainerInfo container,
// maintenance nodes, as the replicas will remain present in the
// container manager, even when they go dead.
.filter(r ->
nodeManager.getNodeStatus(r.getDatanodeDetails()).isHealthy())
getNodeStatus(r.getDatanodeDetails()).isHealthy())
.filter(r -> !deletionInFlight.contains(r.getDatanodeDetails()))
.sorted((r1, r2) -> r2.getSequenceId().compareTo(r1.getSequenceId()))
.map(ContainerReplica::getDatanodeDetails)
Expand Down Expand Up @@ -574,7 +576,7 @@ private void handleUnderReplicatedContainer(final ContainerInfo container,
LOG.warn("Cannot replicate container {}, no healthy replica found.",
container.containerID());
}
} catch (IOException ex) {
} catch (IOException | IllegalStateException ex) {
LOG.warn("Exception while replicating container {}.",
container.getContainerID(), ex);
}
Expand Down Expand Up @@ -785,6 +787,20 @@ private <T extends GeneratedMessage> void sendAndTrackDatanodeCommand(
tracker.accept(new InflightAction(datanode, Time.monotonicNow()));
}

/**
* Wrap the call to nodeManager.getNodeStatus, catching any
* NodeNotFoundException and instead throwing an IllegalStateException.
* @param dn The datanodeDetails to obtain the NodeStatus for
* @return NodeStatus corresponding to the given Datanode.
*/
private NodeStatus getNodeStatus(DatanodeDetails dn) {
try {
return nodeManager.getNodeStatus(dn);
} catch (NodeNotFoundException e) {
throw new IllegalStateException("Unable to find NodeStatus for "+dn, e);
}
}

/**
* Compares the container state with the replica state.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
public interface DatanodeAdminMonitor extends Runnable {

void startMonitoring(DatanodeDetails dn, int endInHours);

void stopMonitoring(DatanodeDetails dn);

}
Original file line number Diff line number Diff line change
Expand Up @@ -363,16 +363,9 @@ private void setNodeOpState(DatanodeAdminNodeDetails dn,
nodeManager.setNodeOperationalState(dn.getDatanodeDetails(), state);
}

// TODO - The nodeManager.getNodeStatus call should really throw
// NodeNotFoundException rather than having to handle it here as all
// registered nodes must have a status.
private NodeStatus getNodeStatus(DatanodeDetails dnd)
throws NodeNotFoundException {
NodeStatus nodeStatus = nodeManager.getNodeStatus(dnd);
if (nodeStatus == null) {
throw new NodeNotFoundException("Unable to retrieve the nodeStatus");
}
return nodeStatus;
return nodeManager.getNodeStatus(dnd);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,7 @@ public synchronized void startMaintenance(DatanodeDetails dn, int endInHours)

private NodeStatus getNodeStatus(DatanodeDetails dn)
throws NodeNotFoundException {
NodeStatus nodeStatus = nodeManager.getNodeStatus(dn);
if (nodeStatus == null) {
throw new NodeNotFoundException();
}
return nodeStatus;
return nodeManager.getNodeStatus(dn);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,10 @@ int getNodeCount(
* Returns the node status of a specific node.
* @param datanodeDetails DatanodeDetails
* @return NodeStatus for the node
* @throws NodeNotFoundException if the node does not exist
*/
NodeStatus getNodeStatus(DatanodeDetails datanodeDetails);
NodeStatus getNodeStatus(DatanodeDetails datanodeDetails)
throws NodeNotFoundException;

/**
* Set the operation state of a node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,9 @@ public int getNodeCount(NodeOperationalState nodeOpState, NodeState health) {
* @return NodeStatus for the node
*/
@Override
public NodeStatus getNodeStatus(DatanodeDetails datanodeDetails) {
try {
return nodeStateManager.getNodeStatus(datanodeDetails);
} catch (NodeNotFoundException e) {
// TODO: should we throw NodeNotFoundException?
return null;
}
public NodeStatus getNodeStatus(DatanodeDetails datanodeDetails)
throws NodeNotFoundException {
return nodeStateManager.getNodeStatus(datanodeDetails);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hdds.scm.ScmUtils;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
import org.apache.hadoop.hdds.scm.safemode.SafeModePrecheck;
import org.apache.hadoop.hdds.scm.container.ContainerID;
Expand Down Expand Up @@ -357,15 +358,19 @@ public List<HddsProtos.Node> queryNode(
}

List<HddsProtos.Node> result = new ArrayList<>();
queryNode(opState, state)
.forEach(node -> {
NodeStatus ns = scm.getScmNodeManager().getNodeStatus(node);
result.add(HddsProtos.Node.newBuilder()
.setNodeID(node.getProtoBufMessage())
.addNodeStates(ns.getHealth())
.addNodeOperationalStates(ns.getOperationalState())
.build());
});
for (DatanodeDetails node : queryNode(opState, state)) {
try {
NodeStatus ns = scm.getScmNodeManager().getNodeStatus(node);
result.add(HddsProtos.Node.newBuilder()
.setNodeID(node.getProtoBufMessage())
.addNodeStates(ns.getHealth())
.addNodeOperationalStates(ns.getOperationalState())
.build());
} catch (NodeNotFoundException e) {
throw new IOException(
"An unexpected error occurred querying the NodeStatus", e);
}
}
return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ public SCMNodeMetric getNodeStat(DatanodeDetails datanodeDetails) {
* @return Healthy/Stale/Dead.
*/
@Override
public NodeStatus getNodeStatus(DatanodeDetails dd) {
public NodeStatus getNodeStatus(DatanodeDetails dd)
throws NodeNotFoundException {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ public void setPipelines(DatanodeDetails dd, int count) {
* Inservice and Healthy NodeStatus.
*/
@Override
public NodeStatus getNodeStatus(DatanodeDetails datanodeDetails) {
public NodeStatus getNodeStatus(DatanodeDetails datanodeDetails)
throws NodeNotFoundException {
Comment on lines +90 to +91
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this method really throw NodeNotFoundException if dni is not found, instead of returning a healthy status, to more closely reflect actual NodeManager behavior?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea it would make more sense for this method do thow NodeNotFoundException, however, I cheated somewhat when using this class in an earlier patch within TestReplicationManager. Replication manager only calls the "getNodeStatus()" method of the NodeManager, and to avoid having to change too many existing tests I defaulted this to return "Healthy + Inservice" for any node not registered so the existing tests passed without modification.

I have an existing Jira HDDS-2673 to refactor and merge MockNodeManager and SimpleMockNodeManager into one, so perhaps we could improve this issue as part of that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we could improve this issue as part of that?

Sure, that's fine, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment to HDDS-2673 to remind about this when I get to working on it.

DatanodeInfo dni = nodeMap.get(datanodeDetails.getUuid());
if (dni != null) {
return dni.getNodeStatus();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ public void testNodeCanBeQueuedAndCancelled() {
* must wait until the pipelines have closed before completing the flow.
*/
@Test
public void testClosePipelinesEventFiredWhenAdminStarted() {
public void testClosePipelinesEventFiredWhenAdminStarted()
throws NodeNotFoundException{
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING,
Expand Down Expand Up @@ -136,7 +137,8 @@ public void testClosePipelinesEventFiredWhenAdminStarted() {
* state.
*/
@Test
public void testDecommissionNodeTransitionsToCompleteWhenNoContainers() {
public void testDecommissionNodeTransitionsToCompleteWhenNoContainers()
throws NodeNotFoundException {
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING,
Expand Down Expand Up @@ -280,7 +282,8 @@ public void testDecommissionAbortedWhenNodeGoesDead()
}

@Test
public void testMaintenanceWaitsForMaintenanceToComplete() {
public void testMaintenanceWaitsForMaintenanceToComplete()
throws NodeNotFoundException {
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
Expand Down Expand Up @@ -310,7 +313,8 @@ public void testMaintenanceWaitsForMaintenanceToComplete() {
}

@Test
public void testMaintenanceEndsClosingPipelines() {
public void testMaintenanceEndsClosingPipelines()
throws NodeNotFoundException {
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
Expand Down Expand Up @@ -366,7 +370,8 @@ public void testMaintenanceEndsWhileReplicatingContainers()
}

@Test
public void testDeadMaintenanceNodeDoesNotAbortWorkflow() {
public void testDeadMaintenanceNodeDoesNotAbortWorkflow()
throws NodeNotFoundException {
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
Expand All @@ -393,7 +398,8 @@ public void testDeadMaintenanceNodeDoesNotAbortWorkflow() {
}

@Test
public void testCancelledNodesMovedToInService() {
public void testCancelledNodesMovedToInService()
throws NodeNotFoundException {
DatanodeDetails dn1 = TestUtils.randomDatanodeDetails();
nodeManager.register(dn1,
new NodeStatus(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.test.GenericTestUtils;
Expand Down Expand Up @@ -134,7 +135,7 @@ public void testAnyInvalidHostThrowsException()

@Test
public void testNodesCanBeDecommissionedAndRecommissioned()
throws InvalidHostStringException {
throws InvalidHostStringException, NodeNotFoundException {
List<DatanodeDetails> dns = generateDatanodes();

// Decommission 2 valid nodes
Expand Down Expand Up @@ -173,7 +174,7 @@ public void testNodesCanBeDecommissionedAndRecommissioned()

@Test
public void testNodesCanBePutIntoMaintenanceAndRecommissioned()
throws InvalidHostStringException {
throws InvalidHostStringException, NodeNotFoundException {
List<DatanodeDetails> dns = generateDatanodes();

// Put 2 valid nodes into maintenance
Expand Down