Skip to content

Commit 0459b69

Browse files
committed
[core][stats-die/02] kill STATS in object manager component
Signed-off-by: Cuong Nguyen <[email protected]>
1 parent e51f803 commit 0459b69

25 files changed

+331
-191
lines changed

src/ray/common/metrics.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,29 @@ inline ray::stats::Gauge GetActorByStateGaugeMetric() {
4848
};
4949
}
5050

51+
inline ray::stats::Gauge GetObjectStoreMemoryGaugeMetric() {
52+
return ray::stats::Gauge{
53+
/*name=*/"object_store_memory",
54+
/*description=*/"Object store memory by various sub-kinds on this node",
55+
/*unit=*/"",
56+
/// Location:
57+
/// - MMAP_SHM: currently in shared memory(e.g. /dev/shm).
58+
/// - MMAP_DISK: memory that's fallback allocated on mmapped disk,
59+
/// e.g. /tmp.
60+
/// - WORKER_HEAP: ray objects smaller than ('max_direct_call_object_size',
61+
/// default 100KiB) stored in process memory, i.e. inlined return
62+
/// values, placeholders for objects stored in plasma store.
63+
/// - SPILLED: current number of bytes from objects spilled
64+
/// to external storage. Note this might be smaller than
65+
/// the physical storage incurred on the external storage because
66+
/// Ray might fuse spilled objects into a single file, so a deleted
67+
/// spill object might still exist in the spilled file. Check
68+
/// spilled object fusing for more details.
69+
/// ObjectState:
70+
/// - SEALED: sealed objects bytes (could be MMAP_SHM or MMAP_DISK)
71+
/// - UNSEALED: unsealed objects bytes (could be MMAP_SHM or MMAP_DISK)
72+
/*tag_keys=*/{"Location", "ObjectState"},
73+
};
74+
}
75+
5176
} // namespace ray

src/ray/core_worker/BUILD.bazel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ ray_cc_library(
272272
":reference_counter_interface",
273273
"//src/ray/common:asio",
274274
"//src/ray/common:id",
275+
"//src/ray/common:metrics",
275276
"//src/ray/common:ray_config",
276277
"//src/ray/common:status",
277278
"//src/ray/raylet_ipc_client:raylet_ipc_client_interface",
@@ -423,6 +424,6 @@ ray_cc_library(
423424
name = "metrics",
424425
hdrs = ["metrics.h"],
425426
deps = [
426-
"//src/ray/stats:stats_lib",
427+
"//src/ray/stats:stats_metric",
427428
],
428429
)

src/ray/core_worker/store_provider/memory_store/memory_store.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,8 @@ MemoryStoreStats CoreWorkerMemoryStore::GetMemoryStoreStatisticalData() {
597597

598598
void CoreWorkerMemoryStore::RecordMetrics() {
599599
absl::MutexLock lock(&mu_);
600-
stats::STATS_object_store_memory.Record(num_local_objects_bytes_,
601-
{{stats::LocationKey, "WORKER_HEAP"}});
600+
object_store_memory_gauge_.Record(num_local_objects_bytes_,
601+
{{stats::LocationKey, "WORKER_HEAP"}});
602602
}
603603

604604
} // namespace core

src/ray/core_worker/store_provider/memory_store/memory_store.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "absl/synchronization/mutex.h"
2525
#include "ray/common/asio/asio_util.h"
2626
#include "ray/common/id.h"
27+
#include "ray/common/metrics.h"
2728
#include "ray/common/status.h"
2829
#include "ray/core_worker/context.h"
2930
#include "ray/core_worker/reference_counter_interface.h"
@@ -253,6 +254,8 @@ class CoreWorkerMemoryStore {
253254
std::function<std::shared_ptr<RayObject>(const RayObject &object,
254255
const ObjectID &object_id)>
255256
object_allocator_;
257+
258+
ray::stats::Gauge object_store_memory_gauge_{ray::GetObjectStoreMemoryGaugeMetric()};
256259
};
257260

258261
} // namespace core

src/ray/object_manager/BUILD.bazel

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ ray_cc_library(
66
hdrs = ["object_manager.h"],
77
deps = [
88
":chunk_object_reader",
9+
":metrics",
910
":object_buffer_pool",
1011
":object_directory",
1112
":object_manager_common",
@@ -30,6 +31,7 @@ ray_cc_library(
3031
srcs = ["push_manager.cc"],
3132
hdrs = ["push_manager.h"],
3233
deps = [
34+
":metrics",
3335
"//src/ray/common:id",
3436
"//src/ray/stats:stats_metric",
3537
"@com_google_absl//absl/container:flat_hash_map",
@@ -41,6 +43,7 @@ ray_cc_library(
4143
srcs = ["pull_manager.cc"],
4244
hdrs = ["pull_manager.h"],
4345
deps = [
46+
":metrics",
4447
":object_manager_common",
4548
":ownership_object_directory",
4649
"//src/ray/common:id",
@@ -162,3 +165,11 @@ ray_cc_library(
162165
hdrs = ["object_reader.h"],
163166
deps = ["//src/ray/protobuf:common_cc_proto"],
164167
)
168+
169+
ray_cc_library(
170+
name = "metrics",
171+
hdrs = ["metrics.h"],
172+
deps = [
173+
"//src/ray/stats:stats_metric",
174+
],
175+
)

src/ray/object_manager/metrics.h

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Copyright 2025 The Ray Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include "ray/stats/metric.h"
18+
19+
namespace ray {
20+
21+
inline ray::stats::Histogram GetObjectStoreDistHistogramMetric() {
22+
return ray::stats::Histogram{
23+
/*name=*/"object_store_dist",
24+
/*description=*/"The distribution of object size in bytes",
25+
/*unit=*/"MiB",
26+
/*boundaries=*/{32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
27+
/*tag_keys=*/{"Source"},
28+
};
29+
}
30+
31+
inline ray::stats::Gauge GetObjectStoreAvailableMemoryGaugeMetric() {
32+
return ray::stats::Gauge(
33+
/*name=*/"object_store_available_memory",
34+
/*description=*/"Amount of memory currently available in the object store.",
35+
/*unit=*/"bytes");
36+
}
37+
38+
inline ray::stats::Gauge GetObjectStoreUsedMemoryGaugeMetric() {
39+
return ray::stats::Gauge(
40+
/*name=*/"object_store_used_memory",
41+
/*description=*/"Amount of memory currently occupied in the object store.",
42+
/*unit=*/"bytes");
43+
}
44+
45+
inline ray::stats::Gauge GetObjectStoreFallbackMemoryGaugeMetric() {
46+
return ray::stats::Gauge(
47+
/*name=*/"object_store_fallback_memory",
48+
/*description=*/"Amount of memory in fallback allocations in the filesystem.",
49+
/*unit=*/"bytes");
50+
}
51+
52+
inline ray::stats::Gauge GetObjectStoreLocalObjectsGaugeMetric() {
53+
return ray::stats::Gauge(
54+
/*name=*/"object_store_num_local_objects",
55+
/*description=*/"Number of objects currently in the object store.",
56+
/*unit=*/"objects");
57+
}
58+
59+
inline ray::stats::Gauge GetObjectManagerPullRequestsGaugeMetric() {
60+
return ray::stats::Gauge(
61+
/*name=*/"object_manager_num_pull_requests",
62+
/*description=*/"Number of active pull requests for objects.",
63+
/*unit=*/"requests");
64+
}
65+
66+
inline ray::stats::Gauge GetObjectManagerBytesGaugeMetric() {
67+
return ray::stats::Gauge(
68+
/*name=*/"object_manager_bytes",
69+
/*description=*/
70+
"Number of bytes pushed or received by type {PushedFromLocalPlasma, "
71+
"PushedFromLocalDisk, Received}.",
72+
/*unit=*/"bytes",
73+
/*tag_keys=*/{"Type"});
74+
}
75+
76+
inline ray::stats::Gauge GetObjectManagerReceivedChunksGaugeMetric() {
77+
return ray::stats::Gauge(
78+
/*name=*/"object_manager_received_chunks",
79+
/*description=*/
80+
"Number object chunks received broken per type {Total, FailedTotal, "
81+
"FailedCancelled, FailedPlasmaFull}.",
82+
/*unit=*/"chunks",
83+
/*tag_keys=*/{"Type"});
84+
}
85+
86+
inline ray::stats::Gauge GetPullManagerUsageBytesGaugeMetric() {
87+
return ray::stats::Gauge(
88+
/*name=*/"pull_manager_usage_bytes",
89+
/*description=*/
90+
"The total number of bytes usage broken per type {Available, BeingPulled, Pinned}",
91+
/*unit=*/"bytes",
92+
/*tag_keys=*/{"Type"});
93+
}
94+
95+
inline ray::stats::Gauge GetPullManagerRequestedBundlesGaugeMetric() {
96+
return ray::stats::Gauge(
97+
/*name=*/"pull_manager_requested_bundles",
98+
/*description=*/
99+
"Number of requested bundles broken per type {Get, Wait, TaskArgs}.",
100+
/*unit=*/"bundles",
101+
/*tag_keys=*/{"Type"});
102+
}
103+
104+
inline ray::stats::Gauge GetPullManagerRequestsGaugeMetric() {
105+
return ray::stats::Gauge(
106+
/*name=*/"pull_manager_requests",
107+
/*description=*/"Number of pull requests broken per type {Queued, Active, Pinned}.",
108+
/*unit=*/"requests",
109+
/*tag_keys=*/{"Type"});
110+
}
111+
112+
inline ray::stats::Gauge GetPullManagerActiveBundlesGaugeMetric() {
113+
return ray::stats::Gauge(
114+
/*name=*/"pull_manager_active_bundles",
115+
/*description=*/"Number of active bundle requests",
116+
/*unit=*/"bundles",
117+
/*tag_keys=*/{"Type"});
118+
}
119+
120+
inline ray::stats::Gauge GetPullManagerRetriesTotalGaugeMetric() {
121+
return ray::stats::Gauge(
122+
/*name=*/"pull_manager_retries_total",
123+
/*description=*/"Number of cumulative pull retries.",
124+
/*unit=*/"retries",
125+
/*tag_keys=*/{"Type"});
126+
}
127+
128+
inline ray::stats::Gauge GetPullManagerNumObjectPinsGaugeMetric() {
129+
return ray::stats::Gauge(
130+
/*name=*/"pull_manager_num_object_pins",
131+
/*description=*/
132+
"Number of object pin attempts by the pull manager, can be {Success, Failure}.",
133+
/*unit=*/"pins",
134+
/*tag_keys=*/{"Type"});
135+
}
136+
137+
inline ray::stats::Histogram GetPullManagerObjectRequestTimeMsHistogramMetric() {
138+
return ray::stats::Histogram(
139+
/*name=*/"pull_manager_object_request_time_ms",
140+
/*description=*/
141+
"Time between initial object pull request and local pinning of the object. ",
142+
/*unit=*/"ms",
143+
/*boundaries=*/{1, 10, 100, 1000, 10000},
144+
/*tag_keys=*/{"Type"});
145+
}
146+
147+
inline ray::stats::Gauge GetPushManagerNumPushesRemainingGaugeMetric() {
148+
return ray::stats::Gauge(
149+
/*name=*/"push_manager_num_pushes_remaining",
150+
/*description=*/"Number of pushes not completed.",
151+
/*unit=*/"pushes",
152+
/*tag_keys=*/{"Type"});
153+
}
154+
155+
inline ray::stats::Gauge GetPushManagerChunksGaugeMetric() {
156+
return ray::stats::Gauge(
157+
/*name=*/"push_manager_chunks",
158+
/*description=*/
159+
"Number of object chunks transfer broken per type {InFlight, Remaining}.",
160+
/*unit=*/"chunks",
161+
/*tag_keys=*/{"Type"});
162+
}
163+
164+
} // namespace ray

src/ray/object_manager/object_manager.cc

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include "ray/common/asio/asio_util.h"
2727
#include "ray/object_manager/plasma/store_runner.h"
2828
#include "ray/object_manager/spilled_object_reader.h"
29-
#include "ray/stats/metric_defs.h"
3029
#include "ray/util/exponential_backoff.h"
3130

3231
namespace ray {
@@ -770,32 +769,31 @@ void ObjectManager::RecordMetrics() {
770769
push_manager_->RecordMetrics();
771770
// used_memory_ includes the fallback allocation, so we should add it again here
772771
// to calculate the exact available memory.
773-
ray_metric_object_store_available_memory_.Record(
772+
object_store_available_memory_gauge_.Record(
774773
config_.object_store_memory - used_memory_ +
775774
plasma::plasma_store_runner->GetFallbackAllocated());
776775
// Subtract fallback allocated memory. It is tracked separately by
777776
// `ObjectStoreFallbackMemory`.
778-
ray_metric_object_store_used_memory_.Record(
777+
object_store_used_memory_gauge_.Record(
779778
used_memory_ - plasma::plasma_store_runner->GetFallbackAllocated());
780-
ray_metric_object_store_fallback_memory_.Record(
779+
object_store_fallback_memory_gauge_.Record(
781780
plasma::plasma_store_runner->GetFallbackAllocated());
782-
ray_metric_object_store_local_objects_.Record(local_objects_.size());
783-
ray_metric_object_manager_pull_requests_.Record(pull_manager_->NumObjectPullRequests());
784-
785-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_pushed_from_plasma_,
786-
"PushedFromLocalPlasma");
787-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_pushed_from_disk_,
788-
"PushedFromLocalDisk");
789-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_received_total_, "Received");
790-
791-
ray::stats::STATS_object_manager_received_chunks.Record(num_chunks_received_total_,
792-
"Total");
793-
ray::stats::STATS_object_manager_received_chunks.Record(
794-
num_chunks_received_total_failed_, "FailedTotal");
795-
ray::stats::STATS_object_manager_received_chunks.Record(num_chunks_received_cancelled_,
796-
"FailedCancelled");
797-
ray::stats::STATS_object_manager_received_chunks.Record(
798-
num_chunks_received_failed_due_to_plasma_, "FailedPlasmaFull");
781+
object_store_local_objects_gauge_.Record(local_objects_.size());
782+
object_manager_pull_requests_gauge_.Record(pull_manager_->NumObjectPullRequests());
783+
784+
object_manager_bytes_gauge_.Record(num_bytes_pushed_from_plasma_,
785+
{{"Type", "PushedFromLocalPlasma"}});
786+
object_manager_bytes_gauge_.Record(num_bytes_pushed_from_disk_,
787+
{{"Type", "PushedFromLocalDisk"}});
788+
object_manager_bytes_gauge_.Record(num_bytes_received_total_, {{"Type", "Received"}});
789+
object_manager_received_chunks_gauge_.Record(num_chunks_received_total_,
790+
{{"Type", "Total"}});
791+
object_manager_received_chunks_gauge_.Record(num_chunks_received_total_failed_,
792+
{{"Type", "FailedTotal"}});
793+
object_manager_received_chunks_gauge_.Record(num_chunks_received_cancelled_,
794+
{{"Type", "FailedCancelled"}});
795+
object_manager_received_chunks_gauge_.Record(num_chunks_received_failed_due_to_plasma_,
796+
{{"Type", "FailedPlasmaFull"}});
799797
}
800798

801799
void ObjectManager::FillObjectStoreStats(rpc::GetNodeStatsReply *reply) const {

src/ray/object_manager/object_manager.h

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -521,31 +521,19 @@ class ObjectManager : public ObjectManagerInterface,
521521
/// plasma.
522522
size_t num_chunks_received_failed_due_to_plasma_ = 0;
523523

524-
/// Metrics
525-
ray::stats::Gauge ray_metric_object_store_available_memory_{
526-
/*name=*/"object_store_available_memory",
527-
/*description=*/"Amount of memory currently available in the object store.",
528-
/*unit=*/"bytes"};
529-
530-
ray::stats::Gauge ray_metric_object_store_used_memory_{
531-
/*name=*/"object_store_used_memory",
532-
/*description=*/"Amount of memory currently occupied in the object store.",
533-
/*unit=*/"bytes"};
534-
535-
ray::stats::Gauge ray_metric_object_store_fallback_memory_{
536-
/*name=*/"object_store_fallback_memory",
537-
/*description=*/"Amount of memory in fallback allocations in the filesystem.",
538-
/*unit=*/"bytes"};
539-
540-
ray::stats::Gauge ray_metric_object_store_local_objects_{
541-
/*name=*/"object_store_num_local_objects",
542-
/*description=*/"Number of objects currently in the object store.",
543-
/*unit=*/"objects"};
544-
545-
ray::stats::Gauge ray_metric_object_manager_pull_requests_{
546-
/*name=*/"object_manager_num_pull_requests",
547-
/*description=*/"Number of active pull requests for objects.",
548-
/*unit=*/"requests"};
524+
ray::stats::Gauge object_store_available_memory_gauge_{
525+
GetObjectStoreAvailableMemoryGaugeMetric()};
526+
ray::stats::Gauge object_store_used_memory_gauge_{
527+
ray::GetObjectStoreUsedMemoryGaugeMetric()};
528+
ray::stats::Gauge object_store_fallback_memory_gauge_{
529+
ray::GetObjectStoreFallbackMemoryGaugeMetric()};
530+
ray::stats::Gauge object_store_local_objects_gauge_{
531+
ray::GetObjectStoreLocalObjectsGaugeMetric()};
532+
ray::stats::Gauge object_manager_pull_requests_gauge_{
533+
ray::GetObjectManagerPullRequestsGaugeMetric()};
534+
ray::stats::Gauge object_manager_bytes_gauge_{ray::GetObjectManagerBytesGaugeMetric()};
535+
ray::stats::Gauge object_manager_received_chunks_gauge_{
536+
ray::GetObjectManagerReceivedChunksGaugeMetric()};
549537
};
550538

551539
} // namespace ray

src/ray/object_manager/ownership_object_directory.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include <unordered_set>
2020
#include <utility>
2121

22-
#include "ray/stats/metric_defs.h"
23-
2422
namespace ray {
2523

2624
OwnershipBasedObjectDirectory::OwnershipBasedObjectDirectory(

src/ray/object_manager/plasma/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ ray_cc_library(
181181
hdrs = ["stats_collector.h"],
182182
deps = [
183183
":object_manager_plasma_common",
184+
"//src/ray/common:metrics",
185+
"//src/ray/object_manager:metrics",
184186
"//src/ray/stats:stats_metric",
185187
"//src/ray/util:counter_map",
186188
],

0 commit comments

Comments
 (0)