From 72dcc868e2a81c2214f49534f51a3771ac186da3 Mon Sep 17 00:00:00 2001 From: "jianjian.xie" Date: Fri, 12 Dec 2025 20:11:53 +0000 Subject: [PATCH 1/2] Create a runtime metric for worker uptime to be used for restart alerts Summary: Metrics change only. Added a runtime metric for worker uptime called: presto_cpp_worker_runtime_uptime_secs The metrics is present in java worker but is missing in c++ worker, causing the missing alert for c++ workers. Test Plan: Deployed to staging cluster and checked the metrics status.{F1247554645} Impact -> no impact since this runs in a background thread Reviewers: #ldap_velox-core, jay.narale Reviewed By: #ldap_velox-core, jay.narale JIRA Issues: PRESTO-9381 Differential Revision: https://code.uberinternal.com/D20790631 --- .../presto_cpp/main/PrestoServer.cpp | 10 ++++++++++ .../presto_cpp/main/common/Counters.cpp | 2 ++ .../presto_cpp/main/common/Counters.h | 3 +++ 3 files changed, 15 insertions(+) diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 91e70c4f2c0bd..9c06fa12d4bdd 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -1194,6 +1194,16 @@ void PrestoServer::addServerPeriodicTasks() { 1'000'000, // 1 second "populate_mem_cpu_info"); + periodicTaskManager_->addTask( + [start = start_]() { + const auto seconds = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start) + .count(); + RECORD_METRIC_VALUE(kCounterWorkerRuntimeUptimeSecs, seconds); + }, + 2'000'000, // 2 seconds + "worker_runtime_uptime_secs"); + const auto timeslice = SystemConfig::instance()->taskRunTimeSliceMicros(); if (timeslice > 0) { periodicTaskManager_->addTask( diff --git a/presto-native-execution/presto_cpp/main/common/Counters.cpp b/presto-native-execution/presto_cpp/main/common/Counters.cpp index da56b7c9df91c..2704568c28c19 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.cpp +++ b/presto-native-execution/presto_cpp/main/common/Counters.cpp @@ -112,6 +112,8 @@ void registerPrestoMetrics() { DEFINE_METRIC( kCounterPartitionedOutputBufferGetDataLatencyMs, facebook::velox::StatType::AVG); + DEFINE_METRIC( + kCounterWorkerRuntimeUptimeSecs, facebook::velox::StatType::AVG); DEFINE_METRIC(kCounterOsUserCpuTimeMicros, facebook::velox::StatType::AVG); DEFINE_METRIC(kCounterOsSystemCpuTimeMicros, facebook::velox::StatType::AVG); DEFINE_METRIC(kCounterOsNumSoftPageFaults, facebook::velox::StatType::AVG); diff --git a/presto-native-execution/presto_cpp/main/common/Counters.h b/presto-native-execution/presto_cpp/main/common/Counters.h index 52ecc1356e06b..e91ca51fa97c6 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.h +++ b/presto-native-execution/presto_cpp/main/common/Counters.h @@ -157,6 +157,9 @@ constexpr std::string_view kCounterTotalPartitionedOutputBuffer{ /// OutputBufferManager. constexpr std::string_view kCounterPartitionedOutputBufferGetDataLatencyMs{ "presto_cpp.partitioned_output_buffer_get_data_latency_ms"}; +/// Worker runtime uptime in seconds the worker process started. +constexpr std::string_view kCounterWorkerRuntimeUptimeSecs{ + "presto_cpp.worker_runtime_uptime_secs"}; /// ================== OS Counters ================= From f542457e1ab6e9ff5300b3522fba759be208d777 Mon Sep 17 00:00:00 2001 From: "jianjian.xie" Date: Fri, 16 Jan 2026 21:37:41 -0800 Subject: [PATCH 2/2] test --- presto-native-execution/presto_cpp/main/common/Counters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presto-native-execution/presto_cpp/main/common/Counters.h b/presto-native-execution/presto_cpp/main/common/Counters.h index e91ca51fa97c6..c6379c575f011 100644 --- a/presto-native-execution/presto_cpp/main/common/Counters.h +++ b/presto-native-execution/presto_cpp/main/common/Counters.h @@ -157,7 +157,7 @@ constexpr std::string_view kCounterTotalPartitionedOutputBuffer{ /// OutputBufferManager. constexpr std::string_view kCounterPartitionedOutputBufferGetDataLatencyMs{ "presto_cpp.partitioned_output_buffer_get_data_latency_ms"}; -/// Worker runtime uptime in seconds the worker process started. +/// Worker runtime uptime in seconds after the worker process started. constexpr std::string_view kCounterWorkerRuntimeUptimeSecs{ "presto_cpp.worker_runtime_uptime_secs"};