-
Notifications
You must be signed in to change notification settings - Fork 588
HDDS-4023. Delete closed container after all blocks have been deleted. #1338
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
1f165c2
6d0a84a
897a4b9
9f4c625
d434ed4
fb9afc9
cd93423
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |
| import org.apache.hadoop.hdds.conf.ConfigGroup; | ||
| import org.apache.hadoop.hdds.conf.ConfigType; | ||
| import org.apache.hadoop.hdds.protocol.DatanodeDetails; | ||
| import org.apache.hadoop.hdds.protocol.proto.HddsProtos; | ||
| import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; | ||
| import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; | ||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; | ||
|
|
@@ -79,7 +80,7 @@ | |
| public class ReplicationManager | ||
| implements MetricsSource, EventHandler<SafeModeStatus> { | ||
|
|
||
| private static final Logger LOG = | ||
| public static final Logger LOG = | ||
| LoggerFactory.getLogger(ReplicationManager.class); | ||
|
|
||
| public static final String METRICS_SOURCE_NAME = "SCMReplicationManager"; | ||
|
|
@@ -312,6 +313,16 @@ private void processContainer(ContainerID id) { | |
| action -> replicas.stream() | ||
| .noneMatch(r -> r.getDatanodeDetails().equals(action.datanode))); | ||
|
|
||
| /* | ||
| * If container is under deleting and all it's replicas are deleted, then | ||
| * make the container as CLEANED, or resend the delete replica command if | ||
| * needed. | ||
| */ | ||
| if (state == LifeCycleState.DELETING) { | ||
| handleContainerUnderDelete(container, replicas); | ||
| return; | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add another check here: This will avoid doing any further processing on a container which is expected to have zero replicas? |
||
| /* | ||
| * We don't have to take any action if the container is healthy. | ||
| * | ||
|
|
@@ -320,6 +331,12 @@ private void processContainer(ContainerID id) { | |
| * exact number of replicas in the same state. | ||
| */ | ||
| if (isContainerHealthy(container, replicas)) { | ||
| /* | ||
| * If container is empty, schedule task to delete the container. | ||
| */ | ||
| if (isContainerEmpty(container, replicas)) { | ||
| deleteContainerReplicas(container, replicas); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ChenSammi , Is there any specific reason that we let ReplicationManager to help clean empty containers? After this, ReplicaManager will do additionally container empty check for all healthy containers. Not sure if this is an efficiency way to put logic here.
Actually I prefer this way as @sodonnel mentioned.
We could also get all replica info and check state in ContainerReportHandler, then send delete container command I'm okay for current way but just share my thought for this. |
||
| return; | ||
| } | ||
|
|
||
|
|
@@ -400,6 +417,21 @@ private boolean isContainerHealthy(final ContainerInfo container, | |
| r -> compareState(container.getState(), r.getState())); | ||
| } | ||
|
|
||
| /** | ||
| * Returns true if the container is empty and CLOSED. | ||
| * | ||
| * @param container Container to check | ||
| * @param replicas Set of ContainerReplicas | ||
| * @return true if the container is empty, false otherwise | ||
| */ | ||
| private boolean isContainerEmpty(final ContainerInfo container, | ||
| final Set<ContainerReplica> replicas) { | ||
| return container.getState() == LifeCycleState.CLOSED && | ||
| (container.getUsedBytes() == 0 && container.getNumberOfKeys() == 0) && | ||
| replicas.stream().allMatch(r -> r.getState() == State.CLOSED && | ||
| r.getBytesUsed() == 0 && r.getKeyCount() == 0); | ||
| } | ||
|
|
||
| /** | ||
| * Checks if the container is under replicated or not. | ||
| * | ||
|
|
@@ -409,6 +441,10 @@ private boolean isContainerHealthy(final ContainerInfo container, | |
| */ | ||
| private boolean isContainerUnderReplicated(final ContainerInfo container, | ||
| final Set<ContainerReplica> replicas) { | ||
| if (container.getState() != LifeCycleState.CLOSED && | ||
| container.getState() != LifeCycleState.QUASI_CLOSED) { | ||
| return false; | ||
| } | ||
| boolean misReplicated = !getPlacementStatus( | ||
| replicas, container.getReplicationFactor().getNumber()) | ||
| .isPolicySatisfied(); | ||
|
|
@@ -465,6 +501,64 @@ private boolean canForceCloseContainer(final ContainerInfo container, | |
| return uniqueQuasiClosedReplicaCount > (replicationFactor / 2); | ||
| } | ||
|
|
||
| /** | ||
| * Delete the container and its replicas. | ||
| * | ||
| * @param container ContainerInfo | ||
| * @param replicas Set of ContainerReplicas | ||
| */ | ||
| private void deleteContainerReplicas(final ContainerInfo container, | ||
| final Set<ContainerReplica> replicas) throws IOException { | ||
| Preconditions.assertTrue(container.getState() == | ||
| LifeCycleState.CLOSED); | ||
| Preconditions.assertTrue(container.getNumberOfKeys() == 0); | ||
| Preconditions.assertTrue(container.getUsedBytes() == 0); | ||
|
|
||
| replicas.stream().forEach(rp -> { | ||
| Preconditions.assertTrue(rp.getState() == State.CLOSED); | ||
| Preconditions.assertTrue(rp.getBytesUsed() == 0); | ||
| Preconditions.assertTrue(rp.getKeyCount() == 0); | ||
| sendDeleteCommand(container, rp.getDatanodeDetails(), false); | ||
| }); | ||
| containerManager.updateContainerState(container.containerID(), | ||
| HddsProtos.LifeCycleEvent.DELETE); | ||
| LOG.debug("Deleting empty container {} replicas,", container.containerID()); | ||
| } | ||
|
|
||
| /** | ||
| * Handle the container which is under delete. | ||
| * | ||
| * @param container ContainerInfo | ||
| * @param replicas Set of ContainerReplicas | ||
| */ | ||
| private void handleContainerUnderDelete(final ContainerInfo container, | ||
| final Set<ContainerReplica> replicas) throws IOException { | ||
| if (replicas.size() == 0) { | ||
| containerManager.updateContainerState(container.containerID(), | ||
| HddsProtos.LifeCycleEvent.CLEANUP); | ||
| LOG.debug("Container {} state changes to DELETED", | ||
| container.containerID()); | ||
| } else { | ||
| // Check whether to resend the delete replica command | ||
| final List<DatanodeDetails> deletionInFlight = inflightDeletion | ||
| .getOrDefault(container.containerID(), Collections.emptyList()) | ||
| .stream() | ||
| .map(action -> action.datanode) | ||
| .collect(Collectors.toList()); | ||
| Set<ContainerReplica> filteredReplicas = replicas.stream().filter( | ||
| r -> !deletionInFlight.contains(r.getDatanodeDetails())) | ||
| .collect(Collectors.toSet()); | ||
| // Resend the delete command | ||
| if (filteredReplicas.size() > 0) { | ||
| filteredReplicas.stream().forEach(rp -> { | ||
| sendDeleteCommand(container, rp.getDatanodeDetails(), false); | ||
| }); | ||
| LOG.debug("Resend delete Container {} command", | ||
| container.containerID()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Force close the container replica(s) with highest sequence Id. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't seem correct to put the logic to delete the replica for the DELETED container inside
updateContainerStats.In
ContainerReportHandler#processContainerReplicas(..)there is logic to delete an unknown container in the exception handler.Could we extract this into a new method, which is called from the exception handler. Then in
AbstractContainerReportHandler#updateContainerState(...)handle the containers which should be deleted in the "case DELETED" branch of the swith statement. It could call that same extracted method - that way the logic to form the DeleteContainer command will be the same for both? It also seems more logical to put the delete inside UpdateContainerState rather than UpdateContainerStats.