|
11 | 11 | import org.opensearch.action.FailedNodeException; |
12 | 12 | import org.opensearch.action.support.nodes.BaseNodesResponse; |
13 | 13 | import org.opensearch.cluster.ClusterName; |
| 14 | +import org.opensearch.common.unit.TimeValue; |
14 | 15 | import org.opensearch.core.common.io.stream.StreamInput; |
15 | 16 | import org.opensearch.core.common.io.stream.StreamOutput; |
| 17 | +import org.opensearch.core.common.unit.ByteSizeValue; |
16 | 18 | import org.opensearch.core.xcontent.ToXContentObject; |
17 | 19 | import org.opensearch.core.xcontent.XContentBuilder; |
18 | 20 |
|
@@ -72,41 +74,163 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws |
72 | 74 | } |
73 | 75 |
|
74 | 76 | private void aggregateClusterStats(XContentBuilder builder, Params params) throws IOException { |
| 77 | + // Performance aggregates |
75 | 78 | long totalServerRequests = 0; |
76 | 79 | long totalServerRequestsCurrent = 0; |
| 80 | + long totalServerBatches = 0; |
77 | 81 | long totalClientBatches = 0; |
78 | 82 | long totalClientResponses = 0; |
79 | 83 | long totalBytesSent = 0; |
80 | 84 | long totalBytesReceived = 0; |
81 | | - long totalStreamErrors = 0; |
| 85 | + long totalServerRequestTime = 0; |
| 86 | + long totalServerBatchTime = 0; |
| 87 | + long totalClientBatchTime = 0; |
| 88 | + |
| 89 | + // Reliability aggregates |
| 90 | + long totalClientApplicationErrors = 0; |
| 91 | + long totalClientTransportErrors = 0; |
| 92 | + long totalServerApplicationErrors = 0; |
| 93 | + long totalServerTransportErrors = 0; |
| 94 | + long totalClientStreamsCompleted = 0; |
| 95 | + long totalServerStreamsCompleted = 0; |
| 96 | + long totalUptime = 0; |
| 97 | + |
| 98 | + // Resource aggregates |
| 99 | + long totalArrowAllocated = 0; |
| 100 | + long totalArrowPeak = 0; |
| 101 | + long totalDirectMemory = 0; |
| 102 | + int totalClientThreadsActive = 0; |
| 103 | + int totalClientThreadsTotal = 0; |
| 104 | + int totalServerThreadsActive = 0; |
| 105 | + int totalServerThreadsTotal = 0; |
| 106 | + int totalConnections = 0; |
| 107 | + int totalChannels = 0; |
82 | 108 |
|
83 | 109 | for (FlightNodeStats nodeStats : getNodes()) { |
84 | 110 | FlightTransportStats stats = nodeStats.getFlightStats(); |
| 111 | + |
| 112 | + // Performance |
85 | 113 | totalServerRequests += stats.performance.serverRequestsReceived; |
86 | 114 | totalServerRequestsCurrent += stats.performance.serverRequestsCurrent; |
| 115 | + totalServerBatches += stats.performance.serverBatchesSent; |
87 | 116 | totalClientBatches += stats.performance.clientBatchesReceived; |
88 | 117 | totalClientResponses += stats.performance.clientResponsesReceived; |
89 | | - totalBytesSent += stats.performance.bytesSentTotal; |
90 | | - totalBytesReceived += stats.performance.bytesReceivedTotal; |
91 | | - totalStreamErrors += stats.reliability.streamErrorsTotal; |
| 118 | + totalBytesSent += stats.performance.bytesSent; |
| 119 | + totalBytesReceived += stats.performance.bytesReceived; |
| 120 | + totalServerRequestTime += stats.performance.serverRequestTotalMillis; |
| 121 | + totalServerBatchTime += stats.performance.serverBatchTotalMillis; |
| 122 | + totalClientBatchTime += stats.performance.clientBatchTotalMillis; |
| 123 | + |
| 124 | + // Reliability |
| 125 | + totalClientApplicationErrors += stats.reliability.clientApplicationErrors; |
| 126 | + totalClientTransportErrors += stats.reliability.clientTransportErrors; |
| 127 | + totalServerApplicationErrors += stats.reliability.serverApplicationErrors; |
| 128 | + totalServerTransportErrors += stats.reliability.serverTransportErrors; |
| 129 | + totalClientStreamsCompleted += stats.reliability.clientStreamsCompleted; |
| 130 | + totalServerStreamsCompleted += stats.reliability.serverStreamsCompleted; |
| 131 | + totalUptime = Math.max(totalUptime, stats.reliability.uptimeMillis); |
| 132 | + |
| 133 | + // Resources |
| 134 | + totalArrowAllocated += stats.resourceUtilization.arrowAllocatedBytes; |
| 135 | + totalArrowPeak = Math.max(totalArrowPeak, stats.resourceUtilization.arrowPeakBytes); |
| 136 | + totalDirectMemory += stats.resourceUtilization.directMemoryBytes; |
| 137 | + totalClientThreadsActive += stats.resourceUtilization.clientThreadsActive; |
| 138 | + totalClientThreadsTotal += stats.resourceUtilization.clientThreadsTotal; |
| 139 | + totalServerThreadsActive += stats.resourceUtilization.serverThreadsActive; |
| 140 | + totalServerThreadsTotal += stats.resourceUtilization.serverThreadsTotal; |
| 141 | + totalConnections += stats.resourceUtilization.connectionsActive; |
| 142 | + totalChannels += stats.resourceUtilization.channelsActive; |
92 | 143 | } |
93 | 144 |
|
| 145 | + // Performance stats |
94 | 146 | builder.startObject("performance"); |
95 | | - builder.field("total_server_requests", totalServerRequests); |
96 | | - builder.field("total_server_requests_current", totalServerRequestsCurrent); |
97 | | - builder.field("total_client_batches", totalClientBatches); |
98 | | - builder.field("total_client_responses", totalClientResponses); |
99 | | - builder.field("total_bytes_sent", totalBytesSent); |
100 | | - builder.field("total_bytes_received", totalBytesReceived); |
| 147 | + builder.field("server_requests_total", totalServerRequests); |
| 148 | + builder.field("server_requests_current", totalServerRequestsCurrent); |
| 149 | + builder.field("server_batches_sent", totalServerBatches); |
| 150 | + builder.field("client_batches_received", totalClientBatches); |
| 151 | + builder.field("client_responses_received", totalClientResponses); |
| 152 | + builder.field("bytes_sent", totalBytesSent); |
| 153 | + if (params.paramAsBoolean("human", false)) { |
| 154 | + builder.field("bytes_sent_human", new ByteSizeValue(totalBytesSent).toString()); |
| 155 | + } |
| 156 | + builder.field("bytes_received", totalBytesReceived); |
| 157 | + if (params.paramAsBoolean("human", false)) { |
| 158 | + builder.field("bytes_received_human", new ByteSizeValue(totalBytesReceived).toString()); |
| 159 | + } |
| 160 | + if (totalServerRequests > 0) { |
| 161 | + long avgRequestTime = totalServerRequestTime / totalServerRequests; |
| 162 | + builder.field("server_request_avg_millis", avgRequestTime); |
| 163 | + if (params.paramAsBoolean("human", false)) { |
| 164 | + builder.field("server_request_avg_time", TimeValue.timeValueMillis(avgRequestTime).toString()); |
| 165 | + } |
| 166 | + } |
| 167 | + if (totalServerBatches > 0) { |
| 168 | + long avgBatchTime = totalServerBatchTime / totalServerBatches; |
| 169 | + builder.field("server_batch_avg_millis", avgBatchTime); |
| 170 | + if (params.paramAsBoolean("human", false)) { |
| 171 | + builder.field("server_batch_avg_time", TimeValue.timeValueMillis(avgBatchTime).toString()); |
| 172 | + } |
| 173 | + } |
| 174 | + if (totalClientBatches > 0) { |
| 175 | + long avgClientBatchTime = totalClientBatchTime / totalClientBatches; |
| 176 | + builder.field("client_batch_avg_millis", avgClientBatchTime); |
| 177 | + if (params.paramAsBoolean("human", false)) { |
| 178 | + builder.field("client_batch_avg_time", TimeValue.timeValueMillis(avgClientBatchTime).toString()); |
| 179 | + } |
| 180 | + } |
101 | 181 | builder.endObject(); |
102 | 182 |
|
| 183 | + // Reliability stats |
103 | 184 | builder.startObject("reliability"); |
104 | | - builder.field("total_stream_errors", totalStreamErrors); |
105 | | - if (totalServerRequests > 0) { |
106 | | - builder.field("cluster_error_rate_percent", (totalStreamErrors * 100.0) / totalServerRequests); |
| 185 | + builder.field("client_application_errors", totalClientApplicationErrors); |
| 186 | + builder.field("client_transport_errors", totalClientTransportErrors); |
| 187 | + builder.field("server_application_errors", totalServerApplicationErrors); |
| 188 | + builder.field("server_transport_errors", totalServerTransportErrors); |
| 189 | + builder.field("client_streams_completed", totalClientStreamsCompleted); |
| 190 | + builder.field("server_streams_completed", totalServerStreamsCompleted); |
| 191 | + builder.field("cluster_uptime_millis", totalUptime); |
| 192 | + if (params.paramAsBoolean("human", false)) { |
| 193 | + builder.field("cluster_uptime", TimeValue.timeValueMillis(totalUptime).toString()); |
| 194 | + } |
| 195 | + |
| 196 | + long totalErrors = totalClientApplicationErrors + totalClientTransportErrors + totalServerApplicationErrors |
| 197 | + + totalServerTransportErrors; |
| 198 | + long totalStreams = totalClientStreamsCompleted + totalServerStreamsCompleted + totalErrors; |
| 199 | + if (totalStreams > 0) { |
| 200 | + builder.field("cluster_error_rate_percent", (totalErrors * 100.0) / totalStreams); |
| 201 | + builder.field( |
| 202 | + "cluster_success_rate_percent", |
| 203 | + ((totalClientStreamsCompleted + totalServerStreamsCompleted) * 100.0) / totalStreams |
| 204 | + ); |
107 | 205 | } |
108 | 206 | builder.endObject(); |
109 | 207 |
|
110 | | - // Resource utilization stats are per-node only |
| 208 | + // Resource utilization stats |
| 209 | + builder.startObject("resource_utilization"); |
| 210 | + builder.field("arrow_allocated_bytes_total", totalArrowAllocated); |
| 211 | + if (params.paramAsBoolean("human", false)) { |
| 212 | + builder.field("arrow_allocated_total", new ByteSizeValue(totalArrowAllocated).toString()); |
| 213 | + } |
| 214 | + builder.field("arrow_peak_bytes_max", totalArrowPeak); |
| 215 | + if (params.paramAsBoolean("human", false)) { |
| 216 | + builder.field("arrow_peak_max", new ByteSizeValue(totalArrowPeak).toString()); |
| 217 | + } |
| 218 | + builder.field("direct_memory_bytes_total", totalDirectMemory); |
| 219 | + if (params.paramAsBoolean("human", false)) { |
| 220 | + builder.field("direct_memory_total", new ByteSizeValue(totalDirectMemory).toString()); |
| 221 | + } |
| 222 | + builder.field("client_threads_active", totalClientThreadsActive); |
| 223 | + builder.field("client_threads_total", totalClientThreadsTotal); |
| 224 | + builder.field("server_threads_active", totalServerThreadsActive); |
| 225 | + builder.field("server_threads_total", totalServerThreadsTotal); |
| 226 | + builder.field("connections_active", totalConnections); |
| 227 | + builder.field("channels_active", totalChannels); |
| 228 | + if (totalClientThreadsTotal > 0) { |
| 229 | + builder.field("client_thread_utilization_percent", (totalClientThreadsActive * 100.0) / totalClientThreadsTotal); |
| 230 | + } |
| 231 | + if (totalServerThreadsTotal > 0) { |
| 232 | + builder.field("server_thread_utilization_percent", (totalServerThreadsActive * 100.0) / totalServerThreadsTotal); |
| 233 | + } |
| 234 | + builder.endObject(); |
111 | 235 | } |
112 | 236 | } |
0 commit comments