Skip to content

Commit f3e327b

Browse files
committed
[core][stats-die/02] kill STATS in object manager component
Signed-off-by: Cuong Nguyen <[email protected]>
1 parent e51f803 commit f3e327b

22 files changed

+162
-190
lines changed

src/ray/common/metrics.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,29 @@ inline ray::stats::Gauge GetActorByStateGaugeMetric() {
4848
};
4949
}
5050

51+
inline ray::stats::Gauge GetObjectStoreMemoryGaugeMetric() {
52+
return ray::stats::Gauge{
53+
/*name=*/"object_store_memory",
54+
/*description=*/"Object store memory by various sub-kinds on this node",
55+
/*unit=*/"",
56+
/// Location:
57+
/// - MMAP_SHM: currently in shared memory(e.g. /dev/shm).
58+
/// - MMAP_DISK: memory that's fallback allocated on mmapped disk,
59+
/// e.g. /tmp.
60+
/// - WORKER_HEAP: ray objects smaller than ('max_direct_call_object_size',
61+
/// default 100KiB) stored in process memory, i.e. inlined return
62+
/// values, placeholders for objects stored in plasma store.
63+
/// - SPILLED: current number of bytes from objects spilled
64+
/// to external storage. Note this might be smaller than
65+
/// the physical storage incurred on the external storage because
66+
/// Ray might fuse spilled objects into a single file, so a deleted
67+
/// spill object might still exist in the spilled file. Check
68+
/// spilled object fusing for more details.
69+
/// ObjectState:
70+
/// - SEALED: sealed objects bytes (could be MMAP_SHM or MMAP_DISK)
71+
/// - UNSEALED: unsealed objects bytes (could be MMAP_SHM or MMAP_DISK)
72+
/*tag_keys=*/{"Location", "ObjectState"},
73+
};
74+
}
75+
5176
} // namespace ray

src/ray/core_worker/BUILD.bazel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ ray_cc_library(
272272
":reference_counter_interface",
273273
"//src/ray/common:asio",
274274
"//src/ray/common:id",
275+
"//src/ray/common:metrics",
275276
"//src/ray/common:ray_config",
276277
"//src/ray/common:status",
277278
"//src/ray/raylet_ipc_client:raylet_ipc_client_interface",
@@ -423,6 +424,6 @@ ray_cc_library(
423424
name = "metrics",
424425
hdrs = ["metrics.h"],
425426
deps = [
426-
"//src/ray/stats:stats_lib",
427+
"//src/ray/stats:stats_metric",
427428
],
428429
)

src/ray/core_worker/store_provider/memory_store/memory_store.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,8 @@ MemoryStoreStats CoreWorkerMemoryStore::GetMemoryStoreStatisticalData() {
597597

598598
void CoreWorkerMemoryStore::RecordMetrics() {
599599
absl::MutexLock lock(&mu_);
600-
stats::STATS_object_store_memory.Record(num_local_objects_bytes_,
601-
{{stats::LocationKey, "WORKER_HEAP"}});
600+
object_store_memory_gauge_.Record(num_local_objects_bytes_,
601+
{{stats::LocationKey, "WORKER_HEAP"}});
602602
}
603603

604604
} // namespace core

src/ray/core_worker/store_provider/memory_store/memory_store.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "absl/synchronization/mutex.h"
2525
#include "ray/common/asio/asio_util.h"
2626
#include "ray/common/id.h"
27+
#include "ray/common/metrics.h"
2728
#include "ray/common/status.h"
2829
#include "ray/core_worker/context.h"
2930
#include "ray/core_worker/reference_counter_interface.h"
@@ -253,6 +254,8 @@ class CoreWorkerMemoryStore {
253254
std::function<std::shared_ptr<RayObject>(const RayObject &object,
254255
const ObjectID &object_id)>
255256
object_allocator_;
257+
258+
ray::stats::Gauge object_store_memory_gauge_{ray::GetObjectStoreMemoryGaugeMetric()};
256259
};
257260

258261
} // namespace core

src/ray/object_manager/BUILD.bazel

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ ray_cc_library(
66
hdrs = ["object_manager.h"],
77
deps = [
88
":chunk_object_reader",
9+
":metrics",
910
":object_buffer_pool",
1011
":object_directory",
1112
":object_manager_common",
@@ -30,6 +31,7 @@ ray_cc_library(
3031
srcs = ["push_manager.cc"],
3132
hdrs = ["push_manager.h"],
3233
deps = [
34+
":metrics",
3335
"//src/ray/common:id",
3436
"//src/ray/stats:stats_metric",
3537
"@com_google_absl//absl/container:flat_hash_map",
@@ -41,6 +43,7 @@ ray_cc_library(
4143
srcs = ["pull_manager.cc"],
4244
hdrs = ["pull_manager.h"],
4345
deps = [
46+
":metrics",
4447
":object_manager_common",
4548
":ownership_object_directory",
4649
"//src/ray/common:id",
@@ -162,3 +165,11 @@ ray_cc_library(
162165
hdrs = ["object_reader.h"],
163166
deps = ["//src/ray/protobuf:common_cc_proto"],
164167
)
168+
169+
ray_cc_library(
170+
name = "metrics",
171+
hdrs = ["metrics.h"],
172+
deps = [
173+
"//src/ray/stats:stats_metric",
174+
],
175+
)

src/ray/object_manager/object_manager.cc

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include "ray/common/asio/asio_util.h"
2727
#include "ray/object_manager/plasma/store_runner.h"
2828
#include "ray/object_manager/spilled_object_reader.h"
29-
#include "ray/stats/metric_defs.h"
3029
#include "ray/util/exponential_backoff.h"
3130

3231
namespace ray {
@@ -770,32 +769,31 @@ void ObjectManager::RecordMetrics() {
770769
push_manager_->RecordMetrics();
771770
// used_memory_ includes the fallback allocation, so we should add it again here
772771
// to calculate the exact available memory.
773-
ray_metric_object_store_available_memory_.Record(
772+
object_store_available_memory_gauge_.Record(
774773
config_.object_store_memory - used_memory_ +
775774
plasma::plasma_store_runner->GetFallbackAllocated());
776775
// Subtract fallback allocated memory. It is tracked separately by
777776
// `ObjectStoreFallbackMemory`.
778-
ray_metric_object_store_used_memory_.Record(
777+
object_store_used_memory_gauge_.Record(
779778
used_memory_ - plasma::plasma_store_runner->GetFallbackAllocated());
780-
ray_metric_object_store_fallback_memory_.Record(
779+
object_store_fallback_memory_gauge_.Record(
781780
plasma::plasma_store_runner->GetFallbackAllocated());
782-
ray_metric_object_store_local_objects_.Record(local_objects_.size());
783-
ray_metric_object_manager_pull_requests_.Record(pull_manager_->NumObjectPullRequests());
784-
785-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_pushed_from_plasma_,
786-
"PushedFromLocalPlasma");
787-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_pushed_from_disk_,
788-
"PushedFromLocalDisk");
789-
ray::stats::STATS_object_manager_bytes.Record(num_bytes_received_total_, "Received");
790-
791-
ray::stats::STATS_object_manager_received_chunks.Record(num_chunks_received_total_,
792-
"Total");
793-
ray::stats::STATS_object_manager_received_chunks.Record(
794-
num_chunks_received_total_failed_, "FailedTotal");
795-
ray::stats::STATS_object_manager_received_chunks.Record(num_chunks_received_cancelled_,
796-
"FailedCancelled");
797-
ray::stats::STATS_object_manager_received_chunks.Record(
798-
num_chunks_received_failed_due_to_plasma_, "FailedPlasmaFull");
781+
object_store_local_objects_gauge_.Record(local_objects_.size());
782+
object_manager_pull_requests_gauge_.Record(pull_manager_->NumObjectPullRequests());
783+
784+
object_manager_bytes_gauge_.Record(num_bytes_pushed_from_plasma_,
785+
{{"Type", "PushedFromLocalPlasma"}});
786+
object_manager_bytes_gauge_.Record(num_bytes_pushed_from_disk_,
787+
{{"Type", "PushedFromLocalDisk"}});
788+
object_manager_bytes_gauge_.Record(num_bytes_received_total_, {{"Type", "Received"}});
789+
object_manager_received_chunks_gauge_.Record(num_chunks_received_total_,
790+
{{"Type", "Total"}});
791+
object_manager_received_chunks_gauge_.Record(num_chunks_received_total_failed_,
792+
{{"Type", "FailedTotal"}});
793+
object_manager_received_chunks_gauge_.Record(num_chunks_received_cancelled_,
794+
{{"Type", "FailedCancelled"}});
795+
object_manager_received_chunks_gauge_.Record(num_chunks_received_failed_due_to_plasma_,
796+
{{"Type", "FailedPlasmaFull"}});
799797
}
800798

801799
void ObjectManager::FillObjectStoreStats(rpc::GetNodeStatsReply *reply) const {

src/ray/object_manager/object_manager.h

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -521,31 +521,19 @@ class ObjectManager : public ObjectManagerInterface,
521521
/// plasma.
522522
size_t num_chunks_received_failed_due_to_plasma_ = 0;
523523

524-
/// Metrics
525-
ray::stats::Gauge ray_metric_object_store_available_memory_{
526-
/*name=*/"object_store_available_memory",
527-
/*description=*/"Amount of memory currently available in the object store.",
528-
/*unit=*/"bytes"};
529-
530-
ray::stats::Gauge ray_metric_object_store_used_memory_{
531-
/*name=*/"object_store_used_memory",
532-
/*description=*/"Amount of memory currently occupied in the object store.",
533-
/*unit=*/"bytes"};
534-
535-
ray::stats::Gauge ray_metric_object_store_fallback_memory_{
536-
/*name=*/"object_store_fallback_memory",
537-
/*description=*/"Amount of memory in fallback allocations in the filesystem.",
538-
/*unit=*/"bytes"};
539-
540-
ray::stats::Gauge ray_metric_object_store_local_objects_{
541-
/*name=*/"object_store_num_local_objects",
542-
/*description=*/"Number of objects currently in the object store.",
543-
/*unit=*/"objects"};
544-
545-
ray::stats::Gauge ray_metric_object_manager_pull_requests_{
546-
/*name=*/"object_manager_num_pull_requests",
547-
/*description=*/"Number of active pull requests for objects.",
548-
/*unit=*/"requests"};
524+
ray::stats::Gauge object_store_available_memory_gauge_{
525+
GetObjectStoreAvailableMemoryGaugeMetric()};
526+
ray::stats::Gauge object_store_used_memory_gauge_{
527+
ray::GetObjectStoreUsedMemoryGaugeMetric()};
528+
ray::stats::Gauge object_store_fallback_memory_gauge_{
529+
ray::GetObjectStoreFallbackMemoryGaugeMetric()};
530+
ray::stats::Gauge object_store_local_objects_gauge_{
531+
ray::GetObjectStoreLocalObjectsGaugeMetric()};
532+
ray::stats::Gauge object_manager_pull_requests_gauge_{
533+
ray::GetObjectManagerPullRequestsGaugeMetric()};
534+
ray::stats::Gauge object_manager_bytes_gauge_{ray::GetObjectManagerBytesGaugeMetric()};
535+
ray::stats::Gauge object_manager_received_chunks_gauge_{
536+
ray::GetObjectManagerReceivedChunksGaugeMetric()};
549537
};
550538

551539
} // namespace ray

src/ray/object_manager/ownership_object_directory.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include <unordered_set>
2020
#include <utility>
2121

22-
#include "ray/stats/metric_defs.h"
23-
2422
namespace ray {
2523

2624
OwnershipBasedObjectDirectory::OwnershipBasedObjectDirectory(

src/ray/object_manager/plasma/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ ray_cc_library(
181181
hdrs = ["stats_collector.h"],
182182
deps = [
183183
":object_manager_plasma_common",
184+
"//src/ray/common:metrics",
185+
"//src/ray/object_manager:metrics",
184186
"//src/ray/stats:stats_metric",
185187
"//src/ray/util:counter_map",
186188
],

src/ray/object_manager/plasma/stats_collector.cc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,23 @@ void ObjectStatsCollector::OnObjectCreated(const LocalObject &obj) {
3939
if (kSource == plasma::flatbuf::ObjectSource::CreatedByWorker) {
4040
num_objects_created_by_worker_++;
4141
num_bytes_created_by_worker_ += kObjectSize;
42-
ray::stats::STATS_object_store_dist.Record(
43-
kObjectSize, {{ray::stats::SourceKey, "CreatedByWorker"}});
42+
object_store_dist_histogram_.Record(kObjectSize,
43+
{{ray::stats::SourceKey, "CreatedByWorker"}});
4444
} else if (kSource == plasma::flatbuf::ObjectSource::RestoredFromStorage) {
4545
num_objects_restored_++;
4646
num_bytes_restored_ += kObjectSize;
47-
ray::stats::STATS_object_store_dist.Record(
48-
kObjectSize, {{ray::stats::SourceKey, "RestoredFromStorage"}});
47+
object_store_dist_histogram_.Record(kObjectSize,
48+
{{ray::stats::SourceKey, "RestoredFromStorage"}});
4949
} else if (kSource == plasma::flatbuf::ObjectSource::ReceivedFromRemoteRaylet) {
5050
num_objects_received_++;
5151
num_bytes_received_ += kObjectSize;
52-
ray::stats::STATS_object_store_dist.Record(
52+
object_store_dist_histogram_.Record(
5353
kObjectSize, {{ray::stats::SourceKey, "ReceivedFromRemoteRaylet"}});
5454
} else if (kSource == plasma::flatbuf::ObjectSource::ErrorStoredByRaylet) {
5555
num_objects_errored_++;
5656
num_bytes_errored_ += kObjectSize;
57-
ray::stats::STATS_object_store_dist.Record(
58-
kObjectSize, {{ray::stats::SourceKey, "ErrorStoredByRaylet"}});
57+
object_store_dist_histogram_.Record(kObjectSize,
58+
{{ray::stats::SourceKey, "ErrorStoredByRaylet"}});
5959
}
6060

6161
RAY_CHECK(!obj.Sealed());
@@ -206,25 +206,25 @@ void ObjectStatsCollector::RecordMetrics() const {
206206
static std::string kObjectLocMmapDisk = "MMAP_DISK";
207207

208208
// Shared memory sealed
209-
ray::stats::STATS_object_store_memory.Record(
209+
object_store_memory_gauge_.Record(
210210
bytes_by_loc_seal_.Get({/* fallback_allocated */ false, /* sealed */ true}),
211211
{{ray::stats::LocationKey, kObjectLocMmapShm},
212212
{ray::stats::ObjectStateKey, kObjectSealed}});
213213

214214
// Shared memory unsealed
215-
ray::stats::STATS_object_store_memory.Record(
215+
object_store_memory_gauge_.Record(
216216
bytes_by_loc_seal_.Get({/* fallback_allocated */ false, /* sealed */ false}),
217217
{{ray::stats::LocationKey, kObjectLocMmapShm},
218218
{ray::stats::ObjectStateKey, kObjectUnsealed}});
219219

220220
// Fallback memory sealed
221-
ray::stats::STATS_object_store_memory.Record(
221+
object_store_memory_gauge_.Record(
222222
bytes_by_loc_seal_.Get({/* fallback_allocated */ true, /* sealed */ true}),
223223
{{ray::stats::LocationKey, kObjectLocMmapDisk},
224224
{ray::stats::ObjectStateKey, kObjectSealed}});
225225

226226
// Fallback memory unsealed
227-
ray::stats::STATS_object_store_memory.Record(
227+
object_store_memory_gauge_.Record(
228228
bytes_by_loc_seal_.Get({/* fallback_allocated */ true, /* sealed */ false}),
229229
{{ray::stats::LocationKey, kObjectLocMmapDisk},
230230
{ray::stats::ObjectStateKey, kObjectUnsealed}});

0 commit comments

Comments
 (0)