-
Notifications
You must be signed in to change notification settings - Fork 588
HDDS-13045. Implement Immediate Triggering of Heartbeat when Volume Full #8492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
2b5d7b9
865689f
0600aa4
e5743d2
2437d5e
4fd1252
31bdb20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
|
|
||
| package org.apache.hadoop.ozone.container.common.impl; | ||
|
|
||
| import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL; | ||
| import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL_DEFAULT; | ||
| import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.malformedRequest; | ||
| import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.unsupportedRequest; | ||
| import static org.apache.hadoop.ozone.audit.AuditLogger.PerformanceStringBuilder; | ||
|
|
@@ -32,6 +34,7 @@ | |
| import java.util.Set; | ||
| import java.util.TreeMap; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import org.apache.hadoop.hdds.HddsConfigKeys; | ||
| import org.apache.hadoop.hdds.HddsUtils; | ||
| import org.apache.hadoop.hdds.client.BlockID; | ||
|
|
@@ -44,6 +47,7 @@ | |
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; | ||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; | ||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction; | ||
| import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; | ||
| import org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException; | ||
|
|
@@ -110,6 +114,8 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { | |
| private ContainerMetrics metrics; | ||
| private final TokenVerifier tokenVerifier; | ||
| private long slowOpThresholdNs; | ||
| private AtomicLong fullVolumeLastHeartbeatTriggerMs; | ||
| private long fullVolumeHeartbeatThrottleIntervalMs; | ||
|
|
||
| /** | ||
| * Constructs an OzoneContainer that receives calls from | ||
|
|
@@ -130,6 +136,10 @@ public HddsDispatcher(ConfigurationSource config, ContainerSet contSet, | |
| this.tokenVerifier = tokenVerifier != null ? tokenVerifier | ||
| : new NoopTokenVerifier(); | ||
| this.slowOpThresholdNs = getSlowOpThresholdMs(conf) * 1000000; | ||
| fullVolumeLastHeartbeatTriggerMs = new AtomicLong(-1); | ||
| long nodeReportInterval = conf.getTimeDuration(HDDS_NODE_REPORT_INTERVAL, HDDS_NODE_REPORT_INTERVAL_DEFAULT, | ||
| TimeUnit.MILLISECONDS); | ||
| fullVolumeHeartbeatThrottleIntervalMs = Math.min(nodeReportInterval, 60000); // min of interval and 1 minute | ||
|
|
||
| protocolMetrics = | ||
| new ProtocolMessageMetrics<>( | ||
|
|
@@ -335,7 +345,15 @@ && getMissingContainerSet().contains(containerID)) { | |
| // Small performance optimization. We check if the operation is of type | ||
| // write before trying to send CloseContainerAction. | ||
| if (!HddsUtils.isReadOnly(msg)) { | ||
| sendCloseContainerActionIfNeeded(container); | ||
| boolean isFull = isVolumeFull(container); | ||
| sendCloseContainerActionIfNeeded(container, isFull); | ||
| if (isFull) { | ||
| try { | ||
| handleFullVolume(container.getContainerData().getVolume()); | ||
| } catch (StorageContainerException e) { | ||
| LOG.warn("Failed to handle full volume while handling request: {}", msg, e); | ||
| } | ||
| } | ||
| } | ||
| Handler handler = getHandler(containerType); | ||
| if (handler == null) { | ||
|
|
@@ -403,7 +421,7 @@ && getMissingContainerSet().contains(containerID)) { | |
| // in any case, the in memory state of the container should be unhealthy | ||
| Preconditions.checkArgument( | ||
| container.getContainerData().getState() == State.UNHEALTHY); | ||
| sendCloseContainerActionIfNeeded(container); | ||
| sendCloseContainerActionIfNeeded(container, isVolumeFull(container)); | ||
| } | ||
| if (cmdType == Type.CreateContainer | ||
| && result == Result.SUCCESS && dispatcherContext != null) { | ||
|
|
@@ -435,6 +453,37 @@ && getMissingContainerSet().contains(containerID)) { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * If the volume is full, we need to inform SCM about the latest volume usage stats and send the close container | ||
| * action for this container immediately. {@link HddsDispatcher#sendCloseContainerActionIfNeeded(Container, boolean)} | ||
| * just adds the action to the heartbeat. Here, we get the latest storage statistics for this node, add them to the | ||
| * heartbeat, and then send the heartbeat (including container close action) immediately. | ||
| * @param volume the volume being written to | ||
| */ | ||
| private void handleFullVolume(HddsVolume volume) throws StorageContainerException { | ||
| long current = System.currentTimeMillis(); | ||
| long last = fullVolumeLastHeartbeatTriggerMs.get(); | ||
| boolean isFirstTrigger = last == -1; | ||
peterxcli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| boolean allowedToTrigger = (current - fullVolumeHeartbeatThrottleIntervalMs) >= last; | ||
| if (isFirstTrigger || allowedToTrigger) { | ||
| if (fullVolumeLastHeartbeatTriggerMs.compareAndSet(last, current)) { | ||
| StorageContainerDatanodeProtocolProtos.NodeReportProto nodeReport; | ||
| try { | ||
| nodeReport = context.getParent().getContainer().getNodeReport(); | ||
sumitagrawl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| context.refreshFullReport(nodeReport); | ||
| context.getParent().triggerHeartbeat(); | ||
| LOG.info("Triggering heartbeat for full volume {}, with node report: {}.", volume, nodeReport); | ||
|
||
| } catch (IOException e) { | ||
| String volumePath = volume.getVolumeRootDir(); | ||
| StorageLocationReport volumeReport = volume.getReport(); | ||
| String error = String.format( | ||
| "Failed to create node report when handling full volume %s. Volume Report: %s", volumePath, volumeReport); | ||
| throw new StorageContainerException(error, e, Result.IO_EXCEPTION); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private long getSlowOpThresholdMs(ConfigurationSource config) { | ||
| return config.getTimeDuration( | ||
| HddsConfigKeys.HDDS_DATANODE_SLOW_OP_WARNING_THRESHOLD_KEY, | ||
|
|
@@ -578,9 +627,9 @@ public void validateContainerCommand( | |
| * marked unhealthy we send Close ContainerAction to SCM. | ||
| * @param container current state of container | ||
| */ | ||
| private void sendCloseContainerActionIfNeeded(Container container) { | ||
| private void sendCloseContainerActionIfNeeded(Container container, boolean isVolumeFull) { | ||
| // We have to find a more efficient way to close a container. | ||
| boolean isSpaceFull = isContainerFull(container) || isVolumeFull(container); | ||
| boolean isSpaceFull = isContainerFull(container) || isVolumeFull; | ||
| boolean shouldClose = isSpaceFull || isContainerUnhealthy(container); | ||
| if (shouldClose) { | ||
| ContainerData containerData = container.getContainerData(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider different volume gets full case , for example, P0, /data1 gets full, P1, /data2 gets full,
(P1-P0) < interval, do we expect two emergent container reports, or one report?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently we will only send one report. I think this is fine because in the report we send info about all the volumes. However there's a discussion going on here #8460 (comment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have a good answer for this after thought for a while. The ideal state is if we want to send immediate heartbeat when one volume is full, we should respect each volume, send a heartbeat for each volume when it's full, but consider the complexity introduced to achieve that, I just doubt whether it's worthy to do that.
Because except the heartbeat sent here, there are regular node reports with storage info sent every 60s. If we only sent one report regardless of which volume, them probably we only need to sent the first one, and let the regular periodic node reports do the rest thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, let's stick to the current implementation then. I'll change the interval to node report interval instead of heartbeat interval.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think purpose of sending full volume report is avoiding pipeline and container creation. Now node report is throttled and hence close container is throttled implicitly. Initial purpose was close container immediate to avoid new block allocation for the HB time (ie 30 second).
This may be similar to sending DN HB, only advantage here is for first failure within 1 min, its immediate, but all later failure is throttled.
for node report, there is a new configuration at SCM discovered to avoid new container allocation,
"hdds.datanode.storage.utilization.critical.threshold". We need recheck overall target of problem to solve and optimize configuration / fix inconsistency.cc: @ChenSammi
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed, this is dead code in Ozone and is not used anywhere.