-
Notifications
You must be signed in to change notification settings - Fork 588
HDDS-13045. Implement Immediate Triggering of Heartbeat when Volume Full #8492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2b5d7b9
865689f
0600aa4
e5743d2
2437d5e
4fd1252
31bdb20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ | |
| import java.util.Set; | ||
| import java.util.TreeMap; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
| import org.apache.hadoop.hdds.HddsConfigKeys; | ||
| import org.apache.hadoop.hdds.HddsUtils; | ||
| import org.apache.hadoop.hdds.client.BlockID; | ||
|
|
@@ -44,6 +45,7 @@ | |
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result; | ||
| import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; | ||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; | ||
| import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction; | ||
| import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException; | ||
| import org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException; | ||
|
|
@@ -110,6 +112,8 @@ public class HddsDispatcher implements ContainerDispatcher, Auditor { | |
| private ContainerMetrics metrics; | ||
| private final TokenVerifier tokenVerifier; | ||
| private long slowOpThresholdNs; | ||
| private AtomicLong fullVolumeLastHeartbeatTriggerMs; | ||
| private long fullVolumeHeartbeatThrottleIntervalMs; | ||
|
|
||
| /** | ||
| * Constructs an OzoneContainer that receives calls from | ||
|
|
@@ -130,6 +134,10 @@ public HddsDispatcher(ConfigurationSource config, ContainerSet contSet, | |
| this.tokenVerifier = tokenVerifier != null ? tokenVerifier | ||
| : new NoopTokenVerifier(); | ||
| this.slowOpThresholdNs = getSlowOpThresholdMs(conf) * 1000000; | ||
| fullVolumeLastHeartbeatTriggerMs = new AtomicLong(-1); | ||
| long heartbeatInterval = | ||
| config.getTimeDuration("hdds.heartbeat.interval", 30000, TimeUnit.MILLISECONDS); | ||
| fullVolumeHeartbeatThrottleIntervalMs = Math.min(heartbeatInterval, 30000); | ||
|
|
||
| protocolMetrics = | ||
| new ProtocolMessageMetrics<>( | ||
|
|
@@ -335,7 +343,15 @@ && getMissingContainerSet().contains(containerID)) { | |
| // Small performance optimization. We check if the operation is of type | ||
| // write before trying to send CloseContainerAction. | ||
| if (!HddsUtils.isReadOnly(msg)) { | ||
| sendCloseContainerActionIfNeeded(container); | ||
| boolean isFull = isVolumeFull(container); | ||
| sendCloseContainerActionIfNeeded(container, isFull); | ||
| if (isFull) { | ||
| try { | ||
| handleFullVolume(container.getContainerData().getVolume()); | ||
| } catch (StorageContainerException e) { | ||
| ContainerUtils.logAndReturnError(LOG, e, msg); | ||
|
||
| } | ||
| } | ||
| } | ||
| Handler handler = getHandler(containerType); | ||
| if (handler == null) { | ||
|
|
@@ -403,7 +419,7 @@ && getMissingContainerSet().contains(containerID)) { | |
| // in any case, the in memory state of the container should be unhealthy | ||
| Preconditions.checkArgument( | ||
| container.getContainerData().getState() == State.UNHEALTHY); | ||
| sendCloseContainerActionIfNeeded(container); | ||
| sendCloseContainerActionIfNeeded(container, isVolumeFull(container)); | ||
| } | ||
| if (cmdType == Type.CreateContainer | ||
| && result == Result.SUCCESS && dispatcherContext != null) { | ||
|
|
@@ -435,6 +451,37 @@ && getMissingContainerSet().contains(containerID)) { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * If the volume is full, we need to inform SCM about the latest volume usage stats and send the close container | ||
| * action for this container immediately. {@link HddsDispatcher#sendCloseContainerActionIfNeeded(Container, boolean)} | ||
| * just adds the action to the heartbeat. Here, we get the latest storage statistics for this node, add them to the | ||
| * heartbeat, and then send the heartbeat (including container close action) immediately. | ||
| * @param volume the volume being written to | ||
| */ | ||
| private void handleFullVolume(HddsVolume volume) throws StorageContainerException { | ||
| long current = System.currentTimeMillis(); | ||
| long last = fullVolumeLastHeartbeatTriggerMs.get(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider different volume gets full case , for example, P0, /data1 gets full, P1, /data2 gets full,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently we will only send one report. I think this is fine because in the report we send info about all the volumes. However there's a discussion going on here #8460 (comment).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have a good answer for this after thought for a while. The ideal state is if we want to send immediate heartbeat when one volume is full, we should respect each volume, send a heartbeat for each volume when it's full, but consider the complexity introduced to achieve that, I just doubt whether it's worthy to do that. Because except the heartbeat sent here, there are regular node reports with storage info sent every 60s. If we only sent one report regardless of which volume, them probably we only need to sent the first one, and let the regular periodic node reports do the rest thing.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, let's stick to the current implementation then. I'll change the interval to node report interval instead of heartbeat interval.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think purpose of sending full volume report is avoiding pipeline and container creation. Now node report is throttled and hence close container is throttled implicitly. Initial purpose was close container immediate to avoid new block allocation for the HB time (ie 30 second). This may be similar to sending DN HB, only advantage here is for first failure within 1 min, its immediate, but all later failure is throttled. for node report, there is a new configuration at SCM discovered to avoid new container allocation, cc: @ChenSammi
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As discussed, this is dead code in Ozone and is not used anywhere. |
||
| boolean isFirstTrigger = last == -1; | ||
peterxcli marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| boolean allowedToTrigger = (current - fullVolumeHeartbeatThrottleIntervalMs) >= last; | ||
| if (isFirstTrigger || allowedToTrigger) { | ||
| if (fullVolumeLastHeartbeatTriggerMs.compareAndSet(last, current)) { | ||
| StorageContainerDatanodeProtocolProtos.NodeReportProto nodeReport; | ||
| try { | ||
| nodeReport = context.getParent().getContainer().getNodeReport(); | ||
sumitagrawl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| context.refreshFullReport(nodeReport); | ||
| context.getParent().triggerHeartbeat(); | ||
| LOG.info("Triggering heartbeat for full volume {}, with node report: {}.", volume, nodeReport); | ||
|
||
| } catch (IOException e) { | ||
| String volumePath = volume.getVolumeRootDir(); | ||
| StorageLocationReport volumeReport = volume.getReport(); | ||
| String error = String.format( | ||
| "Failed to create node report when handling full volume %s. Volume Report: %s", volumePath, volumeReport); | ||
| throw new StorageContainerException(error, e, Result.IO_EXCEPTION); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private long getSlowOpThresholdMs(ConfigurationSource config) { | ||
| return config.getTimeDuration( | ||
| HddsConfigKeys.HDDS_DATANODE_SLOW_OP_WARNING_THRESHOLD_KEY, | ||
|
|
@@ -578,9 +625,9 @@ public void validateContainerCommand( | |
| * marked unhealthy we send Close ContainerAction to SCM. | ||
| * @param container current state of container | ||
| */ | ||
| private void sendCloseContainerActionIfNeeded(Container container) { | ||
| private void sendCloseContainerActionIfNeeded(Container container, boolean isVolumeFull) { | ||
| // We have to find a more efficient way to close a container. | ||
| boolean isSpaceFull = isContainerFull(container) || isVolumeFull(container); | ||
| boolean isSpaceFull = isContainerFull(container) || isVolumeFull; | ||
| boolean shouldClose = isSpaceFull || isContainerUnhealthy(container); | ||
| if (shouldClose) { | ||
| ContainerData containerData = container.getContainerData(); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we call HddsServerUtil#getScmHeartbeatInterval instead?
And there is HDDS_NODE_REPORT_INTERVAL for node report. Shall we use node report property instead of heartbeat property?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
HDDS_NODE_REPORT_INTERVAL is 1 minute, it may be too long?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1m or 3s doesn't matter, because you always send out the first heartbeat immediately. This 1m is used to control the throttling, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's for throttling