Skip to content

Commit b2c67f5

Browse files
committed
[core][stats-die/03bis] improve scheduler_placement_time_s metric
Signed-off-by: Cuong Nguyen <[email protected]>
1 parent b950cff commit b2c67f5

25 files changed

+59
-68
lines changed

python/ray/tests/test_scheduling_2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -787,17 +787,17 @@ def ready(self):
787787
placement_metric_condition = get_metric_check_condition(
788788
[
789789
MetricSamplePattern(
790-
name="ray_scheduler_placement_time_s_bucket",
790+
name="ray_scheduler_placement_time_ms_bucket",
791791
value=1.0,
792792
partial_label_match={"WorkloadType": "Actor"},
793793
),
794794
MetricSamplePattern(
795-
name="ray_scheduler_placement_time_s_bucket",
795+
name="ray_scheduler_placement_time_ms_bucket",
796796
value=1.0,
797797
partial_label_match={"WorkloadType": "Task"},
798798
),
799799
MetricSamplePattern(
800-
name="ray_scheduler_placement_time_s_bucket",
800+
name="ray_scheduler_placement_time_ms_bucket",
801801
value=1.0,
802802
partial_label_match={"WorkloadType": "PlacementGroup"},
803803
),

src/ray/common/metrics.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,14 @@ inline ray::stats::Gauge GetObjectStoreMemoryGaugeMetric() {
7373
};
7474
}
7575

76-
inline ray::stats::Histogram GetSchedulerPlacementTimeSHistogramMetric() {
76+
inline ray::stats::Histogram GetSchedulerPlacementTimeMsHistogramMetric() {
7777
return ray::stats::Histogram{
78-
/*name=*/"scheduler_placement_time_s",
78+
/*name=*/"scheduler_placement_time_ms",
7979
/*description=*/
80-
"The time it takes for a worklod (task, actor, placement group) to "
80+
"The time it takes for a workload (task, actor, placement group) to "
8181
"be placed. This is the time from when the tasks dependencies are "
8282
"resolved to when it actually reserves resources on a node to run.",
83-
/*unit=*/"s",
83+
/*unit=*/"ms",
8484
/*boundaries=*/{0.1, 1, 10, 100, 1000, 10000},
8585
/*tag_keys=*/{"WorkloadType"},
8686
};

src/ray/common/task/task_spec.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -602,15 +602,15 @@ bool TaskSpecification::IsRetriable() const {
602602
}
603603

604604
void TaskSpecification::EmitTaskMetrics(
605-
ray::observability::MetricInterface &scheduler_placement_time_s_histogram) const {
606-
double duration_s = (GetMessage().lease_grant_timestamp_ms() -
607-
GetMessage().dependency_resolution_timestamp_ms()) /
608-
1000;
605+
ray::observability::MetricInterface &scheduler_placement_time_ms_histogram) const {
606+
double duration_ms = GetMessage().lease_grant_timestamp_ms() -
607+
GetMessage().dependency_resolution_timestamp_ms();
609608

610609
if (IsActorCreationTask()) {
611-
scheduler_placement_time_s_histogram.Record(duration_s, {{"WorkloadType", "Actor"}});
610+
scheduler_placement_time_ms_histogram.Record(duration_ms,
611+
{{"WorkloadType", "Actor"}});
612612
} else {
613-
scheduler_placement_time_s_histogram.Record(duration_s, {{"WorkloadType", "Task"}});
613+
scheduler_placement_time_ms_histogram.Record(duration_ms, {{"WorkloadType", "Task"}});
614614
}
615615
}
616616

src/ray/common/task/task_spec.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ class TaskSpecification : public MessageWrapper<rpc::TaskSpec> {
359359
bool IsRetriable() const;
360360

361361
void EmitTaskMetrics(
362-
ray::observability::MetricInterface &scheduler_placement_time_s_histogram) const;
362+
ray::observability::MetricInterface &scheduler_placement_time_ms_histogram) const;
363363

364364
/// \return true if task events from this task should be reported.
365365
bool EnableTaskEvents() const;

src/ray/core_worker/core_worker_process.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ std::shared_ptr<CoreWorker> CoreWorkerProcessImpl::CreateCoreWorker(
554554
return rpc::TensorTransport::OBJECT_STORE;
555555
},
556556
boost::asio::steady_timer(io_service_),
557-
*scheduler_placement_time_s_histogram_);
557+
*scheduler_placement_time_ms_histogram_);
558558

559559
auto report_locality_data_callback = [this](
560560
const ObjectID &object_id,
@@ -798,8 +798,8 @@ CoreWorkerProcessImpl::CoreWorkerProcessImpl(const CoreWorkerOptions &options)
798798
new ray::stats::Gauge(GetActorByStateGaugeMetric()));
799799
total_lineage_bytes_gauge_ = std::unique_ptr<ray::stats::Gauge>(
800800
new ray::stats::Gauge(GetTotalLineageBytesGaugeMetric()));
801-
scheduler_placement_time_s_histogram_ = std::unique_ptr<ray::stats::Histogram>(
802-
new ray::stats::Histogram(GetSchedulerPlacementTimeSHistogramMetric()));
801+
scheduler_placement_time_ms_histogram_ = std::unique_ptr<ray::stats::Histogram>(
802+
new ray::stats::Histogram(GetSchedulerPlacementTimeMsHistogramMetric()));
803803

804804
// Initialize event framework before starting up worker.
805805
if (RayConfig::instance().event_log_reporter_enabled() && !options_.log_dir.empty()) {

src/ray/core_worker/core_worker_process.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ class CoreWorkerProcessImpl {
188188
std::unique_ptr<ray::stats::Gauge> task_by_state_gauge_;
189189
std::unique_ptr<ray::stats::Gauge> actor_by_state_gauge_;
190190
std::unique_ptr<ray::stats::Gauge> total_lineage_bytes_gauge_;
191-
std::unique_ptr<ray::stats::Histogram> scheduler_placement_time_s_histogram_;
191+
std::unique_ptr<ray::stats::Histogram> scheduler_placement_time_ms_histogram_;
192192
};
193193
} // namespace core
194194
} // namespace ray

src/ray/core_worker/task_submission/normal_task_submitter.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ void NormalTaskSubmitter::OnWorkerIdle(
177177
scheduling_key_entry.num_busy_workers++;
178178

179179
task_spec.GetMutableMessage().set_lease_grant_timestamp_ms(current_sys_time_ms());
180-
task_spec.EmitTaskMetrics(scheduler_placement_time_s_histogram_);
180+
task_spec.EmitTaskMetrics(scheduler_placement_time_ms_histogram_);
181181

182182
executing_tasks_.emplace(task_spec.TaskId(), addr);
183183
PushNormalTask(

src/ray/core_worker/task_submission/normal_task_submitter.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class NormalTaskSubmitter {
9696
std::shared_ptr<LeaseRequestRateLimiter> lease_request_rate_limiter,
9797
const TensorTransportGetter &tensor_transport_getter,
9898
boost::asio::steady_timer cancel_timer,
99-
ray::observability::MetricInterface &scheduler_placement_time_s_histogram)
99+
ray::observability::MetricInterface &scheduler_placement_time_ms_histogram)
100100
: rpc_address_(std::move(rpc_address)),
101101
local_raylet_client_(std::move(local_raylet_client)),
102102
raylet_client_pool_(std::move(raylet_client_pool)),
@@ -111,7 +111,7 @@ class NormalTaskSubmitter {
111111
job_id_(job_id),
112112
lease_request_rate_limiter_(std::move(lease_request_rate_limiter)),
113113
cancel_retry_timer_(std::move(cancel_timer)),
114-
scheduler_placement_time_s_histogram_(scheduler_placement_time_s_histogram) {}
114+
scheduler_placement_time_ms_histogram_(scheduler_placement_time_ms_histogram) {}
115115

116116
/// Schedule a task for direct submission to a worker.
117117
void SubmitTask(TaskSpecification task_spec);
@@ -365,7 +365,7 @@ class NormalTaskSubmitter {
365365
// Retries cancelation requests if they were not successful.
366366
boost::asio::steady_timer cancel_retry_timer_ ABSL_GUARDED_BY(mu_);
367367

368-
ray::observability::MetricInterface &scheduler_placement_time_s_histogram_;
368+
ray::observability::MetricInterface &scheduler_placement_time_ms_histogram_;
369369
};
370370

371371
} // namespace core

src/ray/core_worker/task_submission/tests/normal_task_submitter_test.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ class NormalTaskSubmitterTest : public testing::Test {
496496
rate_limiter,
497497
[](const ObjectID &object_id) { return rpc::TensorTransport::OBJECT_STORE; },
498498
boost::asio::steady_timer(io_context),
499-
fake_scheduler_placement_time_s_histogram_);
499+
fake_scheduler_placement_time_ms_histogram_);
500500
}
501501

502502
NodeID local_node_id;
@@ -513,7 +513,7 @@ class NormalTaskSubmitterTest : public testing::Test {
513513
std::unique_ptr<MockLeasePolicy> lease_policy;
514514
MockLeasePolicy *lease_policy_ptr = nullptr;
515515
instrumented_io_context io_context;
516-
ray::observability::FakeHistogram fake_scheduler_placement_time_s_histogram_;
516+
ray::observability::FakeHistogram fake_scheduler_placement_time_ms_histogram_;
517517
};
518518

519519
TEST_F(NormalTaskSubmitterTest, TestLocalityAwareSubmitOneTask) {
@@ -1433,7 +1433,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
14331433
const TaskSpecification &same2,
14341434
const TaskSpecification &different) {
14351435
rpc::Address address;
1436-
ray::observability::FakeHistogram fake_scheduler_placement_time_s_histogram_;
1436+
ray::observability::FakeHistogram fake_scheduler_placement_time_ms_histogram_;
14371437
auto local_node_id = NodeID::FromRandom();
14381438
auto raylet_client = std::make_shared<MockRayletClient>();
14391439
auto raylet_client_pool = std::make_shared<rpc::RayletClientPool>(
@@ -1462,7 +1462,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
14621462
std::make_shared<StaticLeaseRequestRateLimiter>(1),
14631463
[](const ObjectID &object_id) { return rpc::TensorTransport::OBJECT_STORE; },
14641464
boost::asio::steady_timer(io_context),
1465-
fake_scheduler_placement_time_s_histogram_);
1465+
fake_scheduler_placement_time_ms_histogram_);
14661466

14671467
submitter.SubmitTask(same1);
14681468
submitter.SubmitTask(same2);

src/ray/core_worker/tests/core_worker_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ class CoreWorkerTest : public ::testing::Test {
224224
lease_request_rate_limiter,
225225
[](const ObjectID &object_id) { return rpc::TensorTransport::OBJECT_STORE; },
226226
boost::asio::steady_timer(io_service_),
227-
fake_scheduler_placement_time_s_histogram_);
227+
fake_scheduler_placement_time_ms_histogram_);
228228

229229
auto actor_task_submitter = std::make_unique<ActorTaskSubmitter>(
230230
*core_worker_client_pool,
@@ -300,7 +300,7 @@ class CoreWorkerTest : public ::testing::Test {
300300
ray::observability::FakeGauge fake_task_by_state_gauge_;
301301
ray::observability::FakeGauge fake_actor_by_state_gauge_;
302302
ray::observability::FakeGauge fake_total_lineage_bytes_gauge_;
303-
ray::observability::FakeHistogram fake_scheduler_placement_time_s_histogram_;
303+
ray::observability::FakeHistogram fake_scheduler_placement_time_ms_histogram_;
304304
std::unique_ptr<FakePeriodicalRunner> fake_periodical_runner_;
305305

306306
// Controllable time for testing publisher timeouts

0 commit comments

Comments
 (0)