Skip to content

Commit 305c493

Browse files
committed
Caching avg total bytes and avg free bytes inside ClusterInfo (opensearch-project#14851)
Signed-off-by: RS146BIJAY <[email protected]> (cherry picked from commit 1305002)
1 parent ed24a14 commit 305c493

File tree

3 files changed

+57
-38
lines changed

3 files changed

+57
-38
lines changed

server/src/main/java/org/opensearch/cluster/ClusterInfo.java

+37
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.cluster;
3434

3535
import org.opensearch.Version;
36+
import org.opensearch.cluster.routing.RoutingNode;
3637
import org.opensearch.cluster.routing.ShardRouting;
3738
import org.opensearch.common.annotation.PublicApi;
3839
import org.opensearch.core.common.io.stream.StreamInput;
@@ -69,6 +70,8 @@ public class ClusterInfo implements ToXContentFragment, Writeable {
6970
final Map<ShardRouting, String> routingToDataPath;
7071
final Map<NodeAndPath, ReservedSpace> reservedSpace;
7172
final Map<String, FileCacheStats> nodeFileCacheStats;
73+
private long avgTotalBytes;
74+
private long avgFreeByte;
7275

7376
protected ClusterInfo() {
7477
this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
@@ -98,6 +101,7 @@ public ClusterInfo(
98101
this.routingToDataPath = routingToDataPath;
99102
this.reservedSpace = reservedSpace;
100103
this.nodeFileCacheStats = nodeFileCacheStats;
104+
calculateAvgFreeAndTotalBytes(mostAvailableSpaceUsage);
101105
}
102106

103107
public ClusterInfo(StreamInput in) throws IOException {
@@ -122,6 +126,39 @@ public ClusterInfo(StreamInput in) throws IOException {
122126
} else {
123127
this.nodeFileCacheStats = Map.of();
124128
}
129+
130+
calculateAvgFreeAndTotalBytes(mostAvailableSpaceUsage);
131+
}
132+
133+
/**
134+
* Returns a {@link DiskUsage} for the {@link RoutingNode} using the
135+
* average usage of other nodes in the disk usage map.
136+
* @param usages Map of nodeId to DiskUsage for all known nodes
137+
*/
138+
private void calculateAvgFreeAndTotalBytes(final Map<String, DiskUsage> usages) {
139+
if (usages == null || usages.isEmpty()) {
140+
this.avgTotalBytes = 0;
141+
this.avgFreeByte = 0;
142+
return;
143+
}
144+
145+
long totalBytes = 0;
146+
long freeBytes = 0;
147+
for (DiskUsage du : usages.values()) {
148+
totalBytes += du.getTotalBytes();
149+
freeBytes += du.getFreeBytes();
150+
}
151+
152+
this.avgTotalBytes = totalBytes / usages.size();
153+
this.avgFreeByte = freeBytes / usages.size();
154+
}
155+
156+
public long getAvgFreeByte() {
157+
return avgFreeByte;
158+
}
159+
160+
public long getAvgTotalBytes() {
161+
return avgTotalBytes;
125162
}
126163

127164
@Override

server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

+20-25
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,8 @@ public static long sizeOfRelocatingShards(
140140

141141
// Where reserved space is unavailable (e.g. stats are out-of-sync) compute a conservative estimate for initialising shards
142142
final List<ShardRouting> initializingShards = node.shardsWithState(ShardRoutingState.INITIALIZING);
143-
initializingShards.removeIf(shardRouting -> reservedSpace.containsShardId(shardRouting.shardId()));
144143
for (ShardRouting routing : initializingShards) {
145-
if (routing.relocatingNodeId() == null) {
144+
if (routing.relocatingNodeId() == null || reservedSpace.containsShardId(routing.shardId())) {
146145
// in practice the only initializing-but-not-relocating shards with a nonzero expected shard size will be ones created
147146
// by a resize (shrink/split/clone) operation which we expect to happen using hard links, so they shouldn't be taking
148147
// any additional space and can be ignored here
@@ -230,7 +229,14 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
230229

231230
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore we should be extra careful
232231
// and take the size into account
233-
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, false);
232+
final DiskUsageWithRelocations usage = getDiskUsage(
233+
node,
234+
allocation,
235+
usages,
236+
clusterInfo.getAvgFreeByte(),
237+
clusterInfo.getAvgTotalBytes(),
238+
false
239+
);
234240
// First, check that the node currently over the low watermark
235241
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
236242
// Cache the used disk percentage for displaying disk percentages consistent with documentation
@@ -492,7 +498,14 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
492498

493499
// subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
494500
// since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
495-
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, true);
501+
final DiskUsageWithRelocations usage = getDiskUsage(
502+
node,
503+
allocation,
504+
usages,
505+
clusterInfo.getAvgFreeByte(),
506+
clusterInfo.getAvgTotalBytes(),
507+
true
508+
);
496509
final String dataPath = clusterInfo.getDataPath(shardRouting);
497510
// If this node is already above the high threshold, the shard cannot remain (get it off!)
498511
final double freeDiskPercentage = usage.getFreeDiskAsPercentage();
@@ -581,13 +594,15 @@ private DiskUsageWithRelocations getDiskUsage(
581594
RoutingNode node,
582595
RoutingAllocation allocation,
583596
final Map<String, DiskUsage> usages,
597+
final long avgFreeBytes,
598+
final long avgTotalBytes,
584599
boolean subtractLeavingShards
585600
) {
586601
DiskUsage usage = usages.get(node.nodeId());
587602
if (usage == null) {
588603
// If there is no usage, and we have other nodes in the cluster,
589604
// use the average usage for all nodes as the usage for this node
590-
usage = averageUsage(node, usages);
605+
usage = new DiskUsage(node.nodeId(), node.node().getName(), "_na_", avgTotalBytes, avgFreeBytes);
591606
if (logger.isDebugEnabled()) {
592607
logger.debug(
593608
"unable to determine disk usage for {}, defaulting to average across nodes [{} total] [{} free] [{}% free]",
@@ -619,26 +634,6 @@ private DiskUsageWithRelocations getDiskUsage(
619634
return diskUsageWithRelocations;
620635
}
621636

622-
/**
623-
* Returns a {@link DiskUsage} for the {@link RoutingNode} using the
624-
* average usage of other nodes in the disk usage map.
625-
* @param node Node to return an averaged DiskUsage object for
626-
* @param usages Map of nodeId to DiskUsage for all known nodes
627-
* @return DiskUsage representing given node using the average disk usage
628-
*/
629-
DiskUsage averageUsage(RoutingNode node, final Map<String, DiskUsage> usages) {
630-
if (usages.size() == 0) {
631-
return new DiskUsage(node.nodeId(), node.node().getName(), "_na_", 0, 0);
632-
}
633-
long totalBytes = 0;
634-
long freeBytes = 0;
635-
for (DiskUsage du : usages.values()) {
636-
totalBytes += du.getTotalBytes();
637-
freeBytes += du.getFreeBytes();
638-
}
639-
return new DiskUsage(node.nodeId(), node.node().getName(), "_na_", totalBytes / usages.size(), freeBytes / usages.size());
640-
}
641-
642637
/**
643638
* Given the DiskUsage for a node and the size of the shard, return the
644639
* percentage of free disk if the shard were to be allocated to the node.

server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java

-13
Original file line numberDiff line numberDiff line change
@@ -863,19 +863,6 @@ public void testUnknownDiskUsage() {
863863
assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1));
864864
}
865865

866-
public void testAverageUsage() {
867-
RoutingNode rn = new RoutingNode("node1", newNode("node1"));
868-
DiskThresholdDecider decider = makeDecider(Settings.EMPTY);
869-
870-
final Map<String, DiskUsage> usages = new HashMap<>();
871-
usages.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 50)); // 50% used
872-
usages.put("node3", new DiskUsage("node3", "n3", "/dev/null", 100, 0)); // 100% used
873-
874-
DiskUsage node1Usage = decider.averageUsage(rn, usages);
875-
assertThat(node1Usage.getTotalBytes(), equalTo(100L));
876-
assertThat(node1Usage.getFreeBytes(), equalTo(25L));
877-
}
878-
879866
public void testFreeDiskPercentageAfterShardAssigned() {
880867
DiskThresholdDecider decider = makeDecider(Settings.EMPTY);
881868

0 commit comments

Comments
 (0)