Skip to content

Commit 332b671

Browse files
authored
[core][stats-die/01] kill STATS in rpc component (#57926)
Signed-off-by: Cuong Nguyen <[email protected]>
1 parent c584b1e commit 332b671

File tree

8 files changed

+118
-65
lines changed

8 files changed

+118
-65
lines changed

src/ray/rpc/BUILD.bazel

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ray_cc_library(
1515
"//src/ray/core_worker:__pkg__",
1616
],
1717
deps = [
18+
":metrics",
1819
":rpc_callback_types",
1920
"//src/ray/common:asio",
2021
"//src/ray/common:grpc_util",
@@ -98,6 +99,7 @@ ray_cc_library(
9899
hdrs = ["server_call.h"],
99100
visibility = ["//visibility:private"],
100101
deps = [
102+
":metrics",
101103
":rpc_callback_types",
102104
"//src/ray/common:asio",
103105
"//src/ray/common:grpc_util",
@@ -175,3 +177,11 @@ ray_cc_library(
175177
"//src/ray/common:status",
176178
],
177179
)
180+
181+
ray_cc_library(
182+
name = "metrics",
183+
hdrs = ["metrics.h"],
184+
deps = [
185+
"//src/ray/stats:stats_metric",
186+
],
187+
)

src/ray/rpc/client_call.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
#include "ray/common/grpc_util.h"
3131
#include "ray/common/id.h"
3232
#include "ray/common/status.h"
33+
#include "ray/rpc/metrics.h"
3334
#include "ray/rpc/rpc_callback_types.h"
34-
#include "ray/stats/metric_defs.h"
3535
#include "ray/util/thread_utils.h"
3636

3737
namespace ray {
@@ -104,7 +104,8 @@ class ClientCallImpl : public ClientCall {
104104
status = return_status_;
105105
}
106106
if (record_stats_ && !status.ok()) {
107-
stats::STATS_grpc_client_req_failed.Record(1.0, stats_handle_->event_name);
107+
grpc_client_req_failed_counter_.Record(1.0,
108+
{{"Method", stats_handle_->event_name}});
108109
}
109110
if (callback_ != nullptr) {
110111
// This should be only called once.
@@ -146,6 +147,9 @@ class ClientCallImpl : public ClientCall {
146147
/// the server and/or tweak certain RPC behaviors.
147148
grpc::ClientContext context_;
148149

150+
ray::stats::Count grpc_client_req_failed_counter_{
151+
GetGrpcClientReqFailedCounterMetric()};
152+
149153
friend class ClientCallManager;
150154
};
151155

src/ray/rpc/metrics.h

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "ray/stats/metric.h"
18+
19+
namespace ray {
20+
namespace rpc {
21+
22+
inline ray::stats::Histogram GetGrpcServerReqProcessTimeMsHistogramMetric() {
23+
return ray::stats::Histogram(
24+
/*name=*/"grpc_server_req_process_time_ms",
25+
/*description=*/"Request latency in grpc server",
26+
/*unit=*/"",
27+
/*boundaries=*/{0.1, 1, 10, 100, 1000, 10000},
28+
/*tag_keys=*/{"Method"});
29+
}
30+
31+
inline ray::stats::Count GetGrpcServerReqNewCounterMetric() {
32+
return ray::stats::Count(
33+
/*name=*/"grpc_server_req_new",
34+
/*description=*/"New request number in grpc server",
35+
/*unit=*/"",
36+
/*tag_keys=*/{"Method"});
37+
}
38+
39+
inline ray::stats::Count GetGrpcServerReqHandlingCounterMetric() {
40+
return ray::stats::Count(
41+
/*name=*/"grpc_server_req_handling",
42+
/*description=*/"Request number are handling in grpc server",
43+
/*unit=*/"",
44+
/*tag_keys=*/{"Method"});
45+
}
46+
47+
inline ray::stats::Count GetGrpcServerReqFinishedCounterMetric() {
48+
return ray::stats::Count(
49+
/*name=*/"grpc_server_req_finished",
50+
/*description=*/"Finished request number in grpc server",
51+
/*unit=*/"",
52+
/*tag_keys=*/{"Method"});
53+
}
54+
55+
inline ray::stats::Count GetGrpcServerReqSucceededCounterMetric() {
56+
return ray::stats::Count(
57+
/*name=*/"grpc_server_req_succeeded",
58+
/*description=*/"Succeeded request count in grpc server",
59+
/*unit=*/"",
60+
/*tag_keys=*/{"Method"});
61+
}
62+
63+
inline ray::stats::Count GetGrpcServerReqFailedCounterMetric() {
64+
return ray::stats::Count(
65+
/*name=*/"grpc_server_req_failed",
66+
/*description=*/"Failed request count in grpc server",
67+
/*unit=*/"",
68+
/*tag_keys=*/{"Method"});
69+
}
70+
71+
inline ray::stats::Count GetGrpcClientReqFailedCounterMetric() {
72+
return ray::stats::Count(
73+
/*name=*/"grpc_client_req_failed",
74+
/*description=*/"Number of gRPC client failures (non-OK response statuses).",
75+
/*unit=*/"",
76+
/*tag_keys=*/{"Method"});
77+
}
78+
79+
} // namespace rpc
80+
} // namespace ray

src/ray/rpc/server_call.h

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
#include "ray/common/grpc_util.h"
2828
#include "ray/common/id.h"
2929
#include "ray/common/status.h"
30+
#include "ray/rpc/metrics.h"
3031
#include "ray/rpc/rpc_callback_types.h"
3132
#include "ray/stats/metric.h"
32-
#include "ray/stats/metric_defs.h"
3333

3434
namespace ray {
3535
namespace rpc {
@@ -185,7 +185,7 @@ class ServerCallImpl : public ServerCall {
185185
// TODO(Yi Cheng) call_name_ sometimes get corrunpted due to memory issues.
186186
RAY_CHECK(!call_name_.empty()) << "Call name is empty";
187187
if (record_metrics_) {
188-
ray::stats::STATS_grpc_server_req_new.Record(1.0, call_name_);
188+
grpc_server_req_new_counter_.Record(1.0, {{"Method", call_name_}});
189189
}
190190
}
191191

@@ -218,7 +218,7 @@ class ServerCallImpl : public ServerCall {
218218

219219
start_time_ = absl::GetCurrentTimeNanos();
220220
if (record_metrics_) {
221-
ray::stats::STATS_grpc_server_req_handling.Record(1.0, call_name_);
221+
grpc_server_req_handling_counter_.Record(1.0, {{"Method", call_name_}});
222222
}
223223
if (!io_service_.stopped()) {
224224
io_service_.post([this, auth_success] { HandleRequestImpl(auth_success); },
@@ -279,8 +279,8 @@ class ServerCallImpl : public ServerCall {
279279

280280
void OnReplySent() override {
281281
if (record_metrics_) {
282-
ray::stats::STATS_grpc_server_req_finished.Record(1.0, call_name_);
283-
ray::stats::STATS_grpc_server_req_succeeded.Record(1.0, call_name_);
282+
grpc_server_req_finished_counter_.Record(1.0, {{"Method", call_name_}});
283+
grpc_server_req_succeeded_counter_.Record(1.0, {{"Method", call_name_}});
284284
}
285285
if (send_reply_success_callback_ && !io_service_.stopped()) {
286286
io_service_.post(
@@ -292,8 +292,8 @@ class ServerCallImpl : public ServerCall {
292292

293293
void OnReplyFailed() override {
294294
if (record_metrics_) {
295-
ray::stats::STATS_grpc_server_req_finished.Record(1.0, call_name_);
296-
ray::stats::STATS_grpc_server_req_failed.Record(1.0, call_name_);
295+
grpc_server_req_finished_counter_.Record(1.0, {{"Method", call_name_}});
296+
grpc_server_req_failed_counter_.Record(1.0, {{"Method", call_name_}});
297297
}
298298
if (send_reply_failure_callback_ && !io_service_.stopped()) {
299299
io_service_.post(
@@ -311,8 +311,8 @@ class ServerCallImpl : public ServerCall {
311311
EventTracker::RecordEnd(std::move(stats_handle_));
312312
auto end_time = absl::GetCurrentTimeNanos();
313313
if (record_metrics_) {
314-
ray::stats::STATS_grpc_server_req_process_time_ms.Record(
315-
(end_time - start_time_) / 1000000.0, call_name_);
314+
grpc_server_req_process_time_ms_histogram_.Record(
315+
(end_time - start_time_) / 1000000.0, {{"Method", call_name_}});
316316
}
317317
}
318318

@@ -385,6 +385,18 @@ class ServerCallImpl : public ServerCall {
385385
/// If true, the server call will generate gRPC server metrics.
386386
bool record_metrics_;
387387

388+
ray::stats::Histogram grpc_server_req_process_time_ms_histogram_{
389+
GetGrpcServerReqProcessTimeMsHistogramMetric()};
390+
ray::stats::Count grpc_server_req_new_counter_{GetGrpcServerReqNewCounterMetric()};
391+
ray::stats::Count grpc_server_req_handling_counter_{
392+
GetGrpcServerReqHandlingCounterMetric()};
393+
ray::stats::Count grpc_server_req_finished_counter_{
394+
GetGrpcServerReqFinishedCounterMetric()};
395+
ray::stats::Count grpc_server_req_succeeded_counter_{
396+
GetGrpcServerReqSucceededCounterMetric()};
397+
ray::stats::Count grpc_server_req_failed_counter_{
398+
GetGrpcServerReqFailedCounterMetric()};
399+
388400
template <class T1, class T2, class T3, class T4, AuthType T5>
389401
friend class ServerCallFactoryImpl;
390402
};

src/ray/stats/metric.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,6 @@ void Metric::RecordForCython(double value,
181181
Record(value, std::move(tags_pair_vec));
182182
}
183183

184-
Metric::~Metric() { opencensus::stats::StatsExporter::RemoveView(name_); }
185-
186184
void Gauge::RegisterOpenTelemetryMetric() {
187185
// Register the metric in OpenTelemetry.
188186
OpenTelemetryMetricRecorder::GetInstance().RegisterGaugeMetric(name_, description_);

src/ray/stats/metric.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class Metric : public observability::MetricInterface {
108108
std::string unit,
109109
const std::vector<std::string> &tag_keys = {});
110110

111-
~Metric() override;
111+
~Metric() = default;
112112

113113
Metric &operator()() { return *this; }
114114

src/ray/stats/metric_defs.cc

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -110,46 +110,6 @@ DEFINE_stats(operation_queue_time_ms,
110110
DEFINE_stats(
111111
operation_active_count, "active operation number", ("Name"), (), ray::stats::GAUGE);
112112

113-
/// GRPC server
114-
DEFINE_stats(grpc_server_req_process_time_ms,
115-
"Request latency in grpc server",
116-
("Method"),
117-
({0.1, 1, 10, 100, 1000, 10000}, ),
118-
ray::stats::HISTOGRAM);
119-
DEFINE_stats(grpc_server_req_new,
120-
"New request number in grpc server",
121-
("Method"),
122-
(),
123-
ray::stats::COUNT);
124-
DEFINE_stats(grpc_server_req_handling,
125-
"Request number are handling in grpc server",
126-
("Method"),
127-
(),
128-
ray::stats::COUNT);
129-
DEFINE_stats(grpc_server_req_finished,
130-
"Finished request number in grpc server",
131-
("Method"),
132-
(),
133-
ray::stats::COUNT);
134-
DEFINE_stats(grpc_server_req_succeeded,
135-
"Succeeded request count in grpc server",
136-
("Method"),
137-
(),
138-
ray::stats::COUNT);
139-
DEFINE_stats(grpc_server_req_failed,
140-
"Failed request count in grpc server",
141-
("Method"),
142-
(),
143-
ray::stats::COUNT);
144-
145-
/// Number of failures observed from gRPC client(s).
146-
/// A failure is an RPC whose response status was not `OK`.
147-
DEFINE_stats(grpc_client_req_failed,
148-
"Number of gRPC client failures (non-OK response statuses).",
149-
("Method"),
150-
(),
151-
ray::stats::COUNT);
152-
153113
/// Object Manager.
154114
DEFINE_stats(object_manager_bytes,
155115
"Number of bytes pushed or received by type {PushedFromLocalPlasma, "

src/ray/stats/metric_defs.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,6 @@ DECLARE_stats(operation_run_time_ms);
5151
DECLARE_stats(operation_queue_time_ms);
5252
DECLARE_stats(operation_active_count);
5353

54-
/// GRPC server
55-
DECLARE_stats(grpc_server_req_process_time_ms);
56-
DECLARE_stats(grpc_server_req_new);
57-
DECLARE_stats(grpc_server_req_handling);
58-
DECLARE_stats(grpc_server_req_finished);
59-
DECLARE_stats(grpc_server_req_succeeded);
60-
DECLARE_stats(grpc_server_req_failed);
61-
62-
/// GRPC Client Failures
63-
DECLARE_stats(grpc_client_req_failed);
64-
6554
/// Object Manager.
6655
DECLARE_stats(object_manager_bytes);
6756
DECLARE_stats(object_manager_received_chunks);

0 commit comments

Comments
 (0)