Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,12 @@ private synchronized boolean checkIfDecommissionPossible(List<DatanodeDetails> d
if (opState != NodeOperationalState.IN_SERVICE) {
numDecom--;
validDns.remove(dn);
LOG.warn("Cannot decommission {} because it is not IN-SERVICE", dn.getHostName());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this message (and similar one for maintenance) could be improved by including the actual opState the node is in.

Also, please consider replacing dn.getHostName() with dn to rely on:

public String toString() {
return uuidString + "(" + hostName + "/" + ipAddress + ")";
}

This shows just a bit more info to make the node uniquely identifiable while keeping the hostname for convenience. (Applies to all similar logs.)

}
} catch (NodeNotFoundException ex) {
numDecom--;
validDns.remove(dn);
LOG.warn("Cannot decommission {} because it is not found in SCM", dn.getHostName());
}
}

Expand Down Expand Up @@ -430,9 +432,11 @@ private synchronized boolean checkIfDecommissionPossible(List<DatanodeDetails> d
}
int reqNodes = cif.getReplicationConfig().getRequiredNodes();
if ((inServiceTotal - numDecom) < reqNodes) {
int unHealthyTotal = nodeManager.getAllNodes().size() - inServiceTotal;
String errorMsg = "Insufficient nodes. Tried to decommission " + dns.size() +
" nodes of which " + numDecom + " nodes were valid. Cluster has " + inServiceTotal +
" IN-SERVICE nodes, " + reqNodes + " of which are required for minimum replication. ";
" nodes out of " + inServiceTotal + " IN-SERVICE HEALTHY and " + unHealthyTotal +
" not IN-SERVICE or not HEALTHY nodes. Cannot decommission as a minimum of " + reqNodes +
" IN-SERVICE HEALTHY nodes are required to maintain replication after decommission. ";
LOG.info(errorMsg + "Failing due to datanode : {}, container : {}", dn, cid);
errors.add(new DatanodeAdminError("AllHosts", errorMsg));
return false;
Expand Down Expand Up @@ -552,10 +556,12 @@ private synchronized boolean checkIfMaintenancePossible(List<DatanodeDetails> dn
if (opState != NodeOperationalState.IN_SERVICE) {
numMaintenance--;
validDns.remove(dn);
LOG.warn("{} cannot enter maintenance because it is not IN-SERVICE", dn.getHostName());
}
} catch (NodeNotFoundException ex) {
numMaintenance--;
validDns.remove(dn);
LOG.warn("{} cannot enter maintenance because it is not found in SCM", dn.getHostName());
}
}

Expand Down Expand Up @@ -594,9 +600,11 @@ private synchronized boolean checkIfMaintenancePossible(List<DatanodeDetails> dn
minInService = maintenanceReplicaMinimum;
}
if ((inServiceTotal - numMaintenance) < minInService) {
int unHealthyTotal = nodeManager.getAllNodes().size() - inServiceTotal;
String errorMsg = "Insufficient nodes. Tried to start maintenance for " + dns.size() +
" nodes of which " + numMaintenance + " nodes were valid. Cluster has " + inServiceTotal +
" IN-SERVICE nodes, " + minInService + " of which are required for minimum replication. ";
" nodes out of " + inServiceTotal + " IN-SERVICE HEALTHY and " + unHealthyTotal +
" not IN-SERVICE or not HEALTHY nodes. Cannot enter maintenance mode as a minimum of " + minInService +
" IN-SERVICE HEALTHY nodes are required to maintain replication after maintenance. ";
LOG.info(errorMsg + "Failing due to datanode : {}, container : {}", dn, cid);
errors.add(new DatanodeAdminError("AllHosts", errorMsg));
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,10 @@ public void testInsufficientNodeDecommissionThrowsExceptionForRatis() throws
error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress(), dns.get(3).getIpAddress(), dns.get(4).getIpAddress()), false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot decommission as a minimum of %d IN-SERVICE HEALTHY nodes are required", 3);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
Expand Down Expand Up @@ -489,6 +493,10 @@ public void testInsufficientNodeDecommissionThrowsExceptionForEc() throws

error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress()), false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot decommission as a minimum of %d IN-SERVICE HEALTHY nodes are required", 5);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress()), true);
Expand Down Expand Up @@ -537,6 +545,10 @@ public void testInsufficientNodeDecommissionThrowsExceptionRatisAndEc() throws

error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress()), false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot decommission as a minimum of %d IN-SERVICE HEALTHY nodes are required", 5);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
error = decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress()), true);
Expand Down Expand Up @@ -637,6 +649,7 @@ public void testInsufficientNodeDecommissionChecksForNNF() throws
error = decom.decommissionNodes(Arrays.asList(dns.get(0).getIpAddress(),
dns.get(1).getIpAddress(), dns.get(2).getIpAddress()), false);
assertFalse(error.get(0).getHostname().contains("AllHosts"));
assertTrue(error.get(0).getError().contains("The host was not found in SCM"));
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
Expand Down Expand Up @@ -673,6 +686,11 @@ public void testInsufficientNodeMaintenanceThrowsExceptionForRatis() throws
error = decom.startMaintenanceNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress(), dns.get(3).getIpAddress(), dns.get(4).getIpAddress()), 100, false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot enter maintenance mode as a minimum of %d IN-SERVICE HEALTHY nodes are required",
2);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
Expand Down Expand Up @@ -768,6 +786,11 @@ public void testInsufficientNodeMaintenanceThrowsExceptionForEc() throws
error = decom.startMaintenanceNodes(Arrays.asList(dns.get(1).getIpAddress(), dns.get(2).getIpAddress()),
100, false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot enter maintenance mode as a minimum of %d IN-SERVICE HEALTHY nodes are required",
4);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
Expand Down Expand Up @@ -869,6 +892,11 @@ public void testInsufficientNodeMaintenanceThrowsExceptionForRatisAndEc() throws
// it should not be allowed as for EC, maintenance.remaining.redundancy is 2 => 3+2=5 DNs are required
error = decom.startMaintenanceNodes(Arrays.asList(dns.get(1).getIpAddress()), 100, false);
assertTrue(error.get(0).getHostname().contains("AllHosts"));
String errorMsg = String.format("%d IN-SERVICE HEALTHY and %d not IN-SERVICE or not HEALTHY nodes.", 5, 0);
assertTrue(error.get(0).getError().contains(errorMsg));
errorMsg = String.format("Cannot enter maintenance mode as a minimum of %d IN-SERVICE HEALTHY nodes are required",
5);
assertTrue(error.get(0).getError().contains(errorMsg));
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());

Expand Down