From d1b4b05cca0805f7b3f1d92e20a8ae9bab4ffb47 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Tue, 28 Oct 2025 17:29:56 +0800 Subject: [PATCH 01/14] add secondary storage usage monitor --- mooncake-store/include/master_config.h | 16 ++++++++ .../include/master_metric_manager.h | 7 ++++ mooncake-store/include/master_service.h | 2 + mooncake-store/include/types.h | 1 + mooncake-store/src/master.cpp | 12 ++++++ mooncake-store/src/master_metric_manager.cpp | 37 ++++++++++++++++++- mooncake-store/src/master_service.cpp | 9 +++++ mooncake-store/src/rpc_service.cpp | 2 + 8 files changed, 85 insertions(+), 1 deletion(-) diff --git a/mooncake-store/include/master_config.h b/mooncake-store/include/master_config.h index 153492ee6..25cd5c27f 100644 --- a/mooncake-store/include/master_config.h +++ b/mooncake-store/include/master_config.h @@ -30,6 +30,7 @@ struct MasterConfig { std::string cluster_id; std::string root_fs_dir; + uint64_t global_file_segment_size; std::string memory_allocator; // HTTP metadata server configuration @@ -63,6 +64,7 @@ class MasterServiceSupervisorConfig { std::string local_hostname = "0.0.0.0:50051"; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; + uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; MasterServiceSupervisorConfig() = default; @@ -91,6 +93,7 @@ class MasterServiceSupervisorConfig { local_hostname = rpc_address + ":" + std::to_string(rpc_port); cluster_id = config.cluster_id; root_fs_dir = config.root_fs_dir; + global_file_segment_size = config.global_file_segment_size; // Convert string memory_allocator to BufferAllocatorType enum if (config.memory_allocator == "cachelib") { @@ -161,6 +164,7 @@ class WrappedMasterServiceConfig { bool enable_ha = false; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; + uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; WrappedMasterServiceConfig() = default; @@ -184,6 +188,7 @@ class WrappedMasterServiceConfig { enable_ha = config.enable_ha; cluster_id = config.cluster_id; root_fs_dir = config.root_fs_dir; + global_file_segment_size = config.global_file_segment_size; // Convert string memory_allocator to BufferAllocatorType enum if (config.memory_allocator == "cachelib") { @@ -214,6 +219,7 @@ class WrappedMasterServiceConfig { true; // This is used in HA mode, so enable_ha should be true cluster_id = config.cluster_id; root_fs_dir = config.root_fs_dir; + global_file_segment_size = config.global_file_segment_size; memory_allocator = config.memory_allocator; } }; @@ -236,6 +242,7 @@ class MasterServiceConfigBuilder { bool enable_ha_ = false; std::string cluster_id_ = DEFAULT_CLUSTER_ID; std::string root_fs_dir_ = DEFAULT_ROOT_FS_DIR; + uint64_t global_file_segment_size_ = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator_ = BufferAllocatorType::OFFSET; public: @@ -293,6 +300,12 @@ class MasterServiceConfigBuilder { return *this; } + MasterServiceConfigBuilder& set_global_file_segment_size( + uint64_t segment_size) { + global_file_segment_size_ = segment_size; + return *this; + } + MasterServiceConfigBuilder& set_memory_allocator( BufferAllocatorType allocator) { memory_allocator_ = allocator; @@ -316,6 +329,7 @@ class MasterServiceConfig { bool enable_ha = false; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; + uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; MasterServiceConfig() = default; @@ -333,6 +347,7 @@ class MasterServiceConfig { enable_ha = config.enable_ha; cluster_id = config.cluster_id; root_fs_dir = config.root_fs_dir; + global_file_segment_size = config.global_file_segment_size; memory_allocator = config.memory_allocator; } @@ -353,6 +368,7 @@ inline MasterServiceConfig MasterServiceConfigBuilder::build() const { config.enable_ha = enable_ha_; config.cluster_id = cluster_id_; config.root_fs_dir = root_fs_dir_; + config.global_file_segment_size = global_file_segment_size_; config.memory_allocator = memory_allocator_; return config; } diff --git a/mooncake-store/include/master_metric_manager.h b/mooncake-store/include/master_metric_manager.h index aa7bf1412..1ea3b6d46 100644 --- a/mooncake-store/include/master_metric_manager.h +++ b/mooncake-store/include/master_metric_manager.h @@ -28,6 +28,13 @@ class MasterMetricManager { int64_t get_total_capacity(); double get_global_used_ratio(void); + // File Storage Metrics + void inc_allocated_file_size(int64_t val = 1); + void dec_allocated_file_size(int64_t val = 1); + int64_t get_allocated_file_size(); + int64_t get_total_file_capacity(); + double get_global_file_used_ratio(void); + // Key/Value Metrics void inc_key_count(int64_t val = 1); void dec_key_count(int64_t val = 1); diff --git a/mooncake-store/include/master_service.h b/mooncake-store/include/master_service.h index 5413eaeee..08f701cd6 100644 --- a/mooncake-store/include/master_service.h +++ b/mooncake-store/include/master_service.h @@ -471,6 +471,8 @@ class MasterService { const std::string cluster_id_; // root filesystem directory for persistent storage const std::string root_fs_dir_; + // global 3fs/nfs segment size + uint64_t global_file_segment_size_; bool use_disk_replica_{false}; diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index 2364f0745..028b5a4ec 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -33,6 +33,7 @@ static constexpr int64_t ETCD_MASTER_VIEW_LEASE_TTL = 5; // in seconds static constexpr int64_t DEFAULT_CLIENT_LIVE_TTL_SEC = 10; // in seconds static const std::string DEFAULT_CLUSTER_ID = "mooncake_cluster"; static const std::string DEFAULT_ROOT_FS_DIR = ""; +static const uint64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = 536870912000; // 500 GiB static const std::string PUT_NO_SPACE_HELPER_STR = // A helpful string " due to insufficient space. Consider lowering " "eviction_high_watermark_ratio or mounting more segments."; diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index b5f68b2fa..d4a11f00c 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -70,6 +70,8 @@ DEFINE_int64(client_ttl, mooncake::DEFAULT_CLIENT_LIVE_TTL_SEC, DEFINE_string(root_fs_dir, mooncake::DEFAULT_ROOT_FS_DIR, "Root directory for storage backend, used in HA mode"); +DEFINE_uint64(global_file_segment_size, mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, + "Size of global NFS/3FS segment in bytes"); DEFINE_string(cluster_id, mooncake::DEFAULT_CLUSTER_ID, "Cluster ID for the master service, used for kvcache persistence " "in HA mode"); @@ -129,6 +131,9 @@ void InitMasterConf(const mooncake::DefaultConfig& default_config, FLAGS_cluster_id); default_config.GetString("root_fs_dir", &master_config.root_fs_dir, FLAGS_root_fs_dir); + default_config.GetUInt64("global_file_segment_size", + &master_config.global_file_segment_size, + FLAGS_global_file_segment_size); default_config.GetString("memory_allocator", &master_config.memory_allocator, FLAGS_memory_allocator); @@ -269,6 +274,11 @@ void LoadConfigFromCmdline(mooncake::MasterConfig& master_config, !conf_set) { master_config.root_fs_dir = FLAGS_root_fs_dir; } + if ((google::GetCommandLineFlagInfo("global_file_segment_size", &info) && + !info.is_default) || + !conf_set) { + master_config.global_file_segment_size = FLAGS_global_file_segment_size; + } if ((google::GetCommandLineFlagInfo("memory_allocator", &info) && !info.is_default) || !conf_set) { @@ -385,6 +395,8 @@ int main(int argc, char* argv[]) { << ", rpc protocol=" << protocol << ", cluster_id=" << master_config.cluster_id << ", root_fs_dir=" << master_config.root_fs_dir + << ", global_file_segment_size=" + << master_config.global_file_segment_size << ", memory_allocator=" << master_config.memory_allocator << ", enable_http_metadata_server=" << master_config.enable_http_metadata_server diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index 60e94ad89..b3268bb76 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -22,6 +22,10 @@ MasterMetricManager::MasterMetricManager() "Total bytes currently allocated across all segments"), total_capacity_("master_total_capacity_bytes", "Total capacity across all mounted segments"), + total_file_capacity_("master_total_file_capacity_bytes", + "Total capacity for file storage in 3fs/nfs"), + allocated_file_size_("master_allocated_file_size_bytes", + "Total bytes currently allocated for file storage in 3fs/nfs"), key_count_("master_key_count", "Total number of keys managed by the master"), soft_pin_key_count_( @@ -219,6 +223,31 @@ double MasterMetricManager::get_global_used_ratio(void) { return allocated / capacity; } +// File Storage Metrics +void MasterMetricManager::inc_allocated_file_size(int64_t val) { + allocated_file_size_.inc(val); +} +void MasterMetricManager::dec_allocated_file_size(int64_t val) { + allocated_file_size_.dec(val); +} + +int64_t MasterMetricManager::get_allocated_file_size() { + return allocated_file_size_.value(); +} + +int64_t MasterMetricManager::get_total_file_capacity() { + return total_file_capacity_.value(); +} + +double MasterMetricManager::get_global_file_used_ratio(void) { + double allocated = allocated_file_size_.value(); + double capacity = total_file_capacity_.value(); + if (capacity == 0) { + return 0.0; + } + return allocated / capacity; +} + // Key/Value Metrics void MasterMetricManager::inc_key_count(int64_t val) { key_count_.inc(val); } void MasterMetricManager::dec_key_count(int64_t val) { key_count_.dec(val); } @@ -654,6 +683,8 @@ std::string MasterMetricManager::serialize_metrics() { // Serialize Gauges serialize_metric(allocated_size_); serialize_metric(total_capacity_); + serialize_metric(allocated_file_size_); + serialize_metric(total_file_capacity_); serialize_metric(key_count_); serialize_metric(soft_pin_key_count_); if (enable_ha_) { @@ -721,6 +752,8 @@ std::string MasterMetricManager::get_summary_string() { // --- Get current values --- int64_t allocated = allocated_size_.value(); int64_t capacity = total_capacity_.value(); + int64_t file_allocated = allocated_file_size_.value(); + int64_t file_capacity = total_file_capacity_.value(); int64_t keys = key_count_.value(); int64_t soft_pin_keys = soft_pin_key_count_.value(); int64_t active_clients = active_clients_.value(); @@ -791,8 +824,10 @@ std::string MasterMetricManager::get_summary_string() { int64_t ping_fails = ping_failures_.value(); // --- Format the summary string --- - ss << "Storage: " << byte_size_to_string(allocated) << " / " + ss << "Mem Storage: " << byte_size_to_string(allocated) << " / " << byte_size_to_string(capacity); + ss << "SSD Storage: " << byte_size_to_string(file_allocated) << " / " + << byte_size_to_string(file_capacity); if (capacity > 0) { ss << " (" << std::fixed << std::setprecision(1) << ((double)allocated / (double)capacity * 100.0) << "%)"; diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 71dbe94c0..c8b07e417 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -24,6 +24,7 @@ MasterService::MasterService(const MasterServiceConfig& config) enable_ha_(config.enable_ha), cluster_id_(config.cluster_id), root_fs_dir_(config.root_fs_dir), + global_file_segment_size_(config.global_file_segment_size), segment_manager_(config.memory_allocator), memory_allocator_type_(config.memory_allocator), allocation_strategy_(std::make_shared()) { @@ -407,6 +408,7 @@ auto MasterService::PutStart(const std::string& key, std::string file_path = ResolvePath(key); replicas.emplace_back(file_path, total_length, ReplicaStatus::PROCESSING); + MasterMetricManager::instance().inc_allocated_file_size(total_length); } std::vector replica_list; @@ -460,6 +462,13 @@ auto MasterService::PutRevoke(const std::string& key, ReplicaType replica_type) << ", error=invalid_replica_status"; return tl::make_unexpected(ErrorCode::INVALID_WRITE); } + // When disk replica is enabled, update allocated_file_size + if (use_disk_replica_ && replica_type == ReplicaType::DISK) { + for (const auto& replica : metadata.replicas) { + auto disk_descriptor = replica.get_descriptor().get_disk_descriptor(); + MasterMetricManager::instance().dec_allocated_file_size(disk_descriptor.object_size); + } + } metadata.EraseReplica(replica_type); if (metadata.IsValid() == false) { accessor.Erase(); diff --git a/mooncake-store/src/rpc_service.cpp b/mooncake-store/src/rpc_service.cpp index 057e5908b..dbe071ac2 100644 --- a/mooncake-store/src/rpc_service.cpp +++ b/mooncake-store/src/rpc_service.cpp @@ -569,6 +569,7 @@ tl::expected WrappedMasterService::GetFsdir() { return result; } + tl::expected WrappedMasterService::Ping( const UUID& client_id) { ScopedVLogTimer timer(1, "Ping"); @@ -627,6 +628,7 @@ void RegisterRpcService( &wrapped_master_service); server.register_handler<&mooncake::WrappedMasterService::GetFsdir>( &wrapped_master_service); + // TODO 注册刷新3fs使用情况的rpc server.register_handler<&mooncake::WrappedMasterService::BatchExistKey>( &wrapped_master_service); server.register_handler<&mooncake::WrappedMasterService::ServiceReady>( From 6f3476ed0486979b8ad748fc33882d9e99381eb9 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Tue, 28 Oct 2025 17:45:32 +0800 Subject: [PATCH 02/14] remove useless code --- mooncake-store/src/rpc_service.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/mooncake-store/src/rpc_service.cpp b/mooncake-store/src/rpc_service.cpp index dbe071ac2..057e5908b 100644 --- a/mooncake-store/src/rpc_service.cpp +++ b/mooncake-store/src/rpc_service.cpp @@ -569,7 +569,6 @@ tl::expected WrappedMasterService::GetFsdir() { return result; } - tl::expected WrappedMasterService::Ping( const UUID& client_id) { ScopedVLogTimer timer(1, "Ping"); @@ -628,7 +627,6 @@ void RegisterRpcService( &wrapped_master_service); server.register_handler<&mooncake::WrappedMasterService::GetFsdir>( &wrapped_master_service); - // TODO 注册刷新3fs使用情况的rpc server.register_handler<&mooncake::WrappedMasterService::BatchExistKey>( &wrapped_master_service); server.register_handler<&mooncake::WrappedMasterService::ServiceReady>( From 1c1732d06ddec14714b747b4621987792c820399 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Wed, 29 Oct 2025 16:12:08 +0800 Subject: [PATCH 03/14] code format & rename memory related metrics --- .../include/master_metric_manager.h | 10 ++- mooncake-store/include/types.h | 3 +- mooncake-store/src/master.cpp | 5 +- mooncake-store/src/master_metric_manager.cpp | 62 ++++++++++--------- mooncake-store/src/master_service.cpp | 8 ++- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/mooncake-store/include/master_metric_manager.h b/mooncake-store/include/master_metric_manager.h index 1ea3b6d46..653bc35ed 100644 --- a/mooncake-store/include/master_metric_manager.h +++ b/mooncake-store/include/master_metric_manager.h @@ -182,9 +182,13 @@ class MasterMetricManager { // --- Metric Members --- - // Storage Metrics - ylt::metric::gauge_t allocated_size_; // Use update for gauge - ylt::metric::gauge_t total_capacity_; // Use update for gauge + // Memory Storage Metrics + ylt::metric::gauge_t mem_allocated_size_; // Use update for gauge + ylt::metric::gauge_t mem_total_capacity_; // Use update for gauge + + // File Storage Metrics + ylt::metric::gauge_t file_allocated_size_; + ylt::metric::gauge_t file_total_capacity_; // Key/Value Metrics ylt::metric::gauge_t key_count_; diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index 028b5a4ec..c1068e9b8 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -33,7 +33,8 @@ static constexpr int64_t ETCD_MASTER_VIEW_LEASE_TTL = 5; // in seconds static constexpr int64_t DEFAULT_CLIENT_LIVE_TTL_SEC = 10; // in seconds static const std::string DEFAULT_CLUSTER_ID = "mooncake_cluster"; static const std::string DEFAULT_ROOT_FS_DIR = ""; -static const uint64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = 536870912000; // 500 GiB +static const uint64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = + 536870912000; // 500 GiB static const std::string PUT_NO_SPACE_HELPER_STR = // A helpful string " due to insufficient space. Consider lowering " "eviction_high_watermark_ratio or mounting more segments."; diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index d4a11f00c..8f37dc5de 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -70,8 +70,9 @@ DEFINE_int64(client_ttl, mooncake::DEFAULT_CLIENT_LIVE_TTL_SEC, DEFINE_string(root_fs_dir, mooncake::DEFAULT_ROOT_FS_DIR, "Root directory for storage backend, used in HA mode"); -DEFINE_uint64(global_file_segment_size, mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, - "Size of global NFS/3FS segment in bytes"); +DEFINE_uint64(global_file_segment_size, + mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, + "Size of global NFS/3FS segment in bytes"); DEFINE_string(cluster_id, mooncake::DEFAULT_CLUSTER_ID, "Cluster ID for the master service, used for kvcache persistence " "in HA mode"); diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index b3268bb76..016453e13 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -18,14 +18,16 @@ MasterMetricManager& MasterMetricManager::instance() { // --- Constructor --- MasterMetricManager::MasterMetricManager() // Initialize Gauges - : allocated_size_("master_allocated_bytes", - "Total bytes currently allocated across all segments"), - total_capacity_("master_total_capacity_bytes", - "Total capacity across all mounted segments"), - total_file_capacity_("master_total_file_capacity_bytes", + : mem_allocated_size_( + "master_allocated_bytes", + "Total bytes currently allocated across all segments"), + mem_total_capacity_("master_total_capacity_bytes", + "Total capacity across all mounted segments"), + file_total_capacity_("master_total_file_capacity_bytes", "Total capacity for file storage in 3fs/nfs"), - allocated_file_size_("master_allocated_file_size_bytes", - "Total bytes currently allocated for file storage in 3fs/nfs"), + file_allocated_size_( + "master_allocated_file_size_bytes", + "Total bytes currently allocated for file storage in 3fs/nfs"), key_count_("master_key_count", "Total number of keys managed by the master"), soft_pin_key_count_( @@ -191,32 +193,32 @@ MasterMetricManager::MasterMetricManager() // --- Metric Interface Methods --- -// Storage Metrics +// Memory Storage Metrics void MasterMetricManager::inc_allocated_size(int64_t val) { - allocated_size_.inc(val); + mem_allocated_size_.inc(val); } void MasterMetricManager::dec_allocated_size(int64_t val) { - allocated_size_.dec(val); + mem_allocated_size_.dec(val); } void MasterMetricManager::inc_total_capacity(int64_t val) { - total_capacity_.inc(val); + mem_total_capacity_.inc(val); } void MasterMetricManager::dec_total_capacity(int64_t val) { - total_capacity_.dec(val); + mem_total_capacity_.dec(val); } int64_t MasterMetricManager::get_allocated_size() { - return allocated_size_.value(); + return mem_allocated_size_.value(); } int64_t MasterMetricManager::get_total_capacity() { - return total_capacity_.value(); + return mem_total_capacity_.value(); } double MasterMetricManager::get_global_used_ratio(void) { - double allocated = allocated_size_.value(); - double capacity = total_capacity_.value(); + double allocated = mem_allocated_size_.value(); + double capacity = mem_total_capacity_.value(); if (capacity == 0) { return 0.0; } @@ -225,23 +227,23 @@ double MasterMetricManager::get_global_used_ratio(void) { // File Storage Metrics void MasterMetricManager::inc_allocated_file_size(int64_t val) { - allocated_file_size_.inc(val); + file_allocated_size_.inc(val); } void MasterMetricManager::dec_allocated_file_size(int64_t val) { - allocated_file_size_.dec(val); + file_allocated_size_.dec(val); } int64_t MasterMetricManager::get_allocated_file_size() { - return allocated_file_size_.value(); + return file_allocated_size_.value(); } int64_t MasterMetricManager::get_total_file_capacity() { - return total_file_capacity_.value(); + return file_total_capacity_.value(); } double MasterMetricManager::get_global_file_used_ratio(void) { - double allocated = allocated_file_size_.value(); - double capacity = total_file_capacity_.value(); + double allocated = file_allocated_size_.value(); + double capacity = file_total_capacity_.value(); if (capacity == 0) { return 0.0; } @@ -681,10 +683,10 @@ std::string MasterMetricManager::serialize_metrics() { }; // Serialize Gauges - serialize_metric(allocated_size_); - serialize_metric(total_capacity_); - serialize_metric(allocated_file_size_); - serialize_metric(total_file_capacity_); + serialize_metric(mem_allocated_size_); + serialize_metric(mem_total_capacity_); + serialize_metric(file_allocated_size_); + serialize_metric(file_total_capacity_); serialize_metric(key_count_); serialize_metric(soft_pin_key_count_); if (enable_ha_) { @@ -750,10 +752,10 @@ std::string MasterMetricManager::get_summary_string() { std::stringstream ss; // --- Get current values --- - int64_t allocated = allocated_size_.value(); - int64_t capacity = total_capacity_.value(); - int64_t file_allocated = allocated_file_size_.value(); - int64_t file_capacity = total_file_capacity_.value(); + int64_t allocated = mem_allocated_size_.value(); + int64_t capacity = mem_total_capacity_.value(); + int64_t file_allocated = file_allocated_size_.value(); + int64_t file_capacity = file_total_capacity_.value(); int64_t keys = key_count_.value(); int64_t soft_pin_keys = soft_pin_key_count_.value(); int64_t active_clients = active_clients_.value(); diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index c8b07e417..57c51dbf7 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -462,11 +462,13 @@ auto MasterService::PutRevoke(const std::string& key, ReplicaType replica_type) << ", error=invalid_replica_status"; return tl::make_unexpected(ErrorCode::INVALID_WRITE); } - // When disk replica is enabled, update allocated_file_size + // When disk replica is enabled, update allocated_file_size if (use_disk_replica_ && replica_type == ReplicaType::DISK) { for (const auto& replica : metadata.replicas) { - auto disk_descriptor = replica.get_descriptor().get_disk_descriptor(); - MasterMetricManager::instance().dec_allocated_file_size(disk_descriptor.object_size); + auto disk_descriptor = + replica.get_descriptor().get_disk_descriptor(); + MasterMetricManager::instance().dec_allocated_file_size( + disk_descriptor.object_size); } } metadata.EraseReplica(replica_type); From 72c4b9be084a7c83f37d700c8af9add1136142d1 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Wed, 29 Oct 2025 21:14:58 +0800 Subject: [PATCH 04/14] fix some bug --- mooncake-store/include/types.h | 4 +++- mooncake-store/include/utils.h | 6 ++++-- mooncake-store/src/master_metric_manager.cpp | 20 ++++++++++---------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index c1068e9b8..dbd9bb2e6 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,9 @@ static constexpr int64_t ETCD_MASTER_VIEW_LEASE_TTL = 5; // in seconds static constexpr int64_t DEFAULT_CLIENT_LIVE_TTL_SEC = 10; // in seconds static const std::string DEFAULT_CLUSTER_ID = "mooncake_cluster"; static const std::string DEFAULT_ROOT_FS_DIR = ""; +// default do not limit DFS usage static const uint64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = - 536870912000; // 500 GiB + std::numeric_limits::max(); static const std::string PUT_NO_SPACE_HELPER_STR = // A helpful string " due to insufficient space. Consider lowering " "eviction_high_watermark_ratio or mounting more segments."; diff --git a/mooncake-store/include/utils.h b/mooncake-store/include/utils.h index 356ff4f70..aaa323c58 100644 --- a/mooncake-store/include/utils.h +++ b/mooncake-store/include/utils.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "types.h" @@ -111,8 +112,9 @@ void free_memory(const std::string& protocol, void* ptr); std::ostringstream oss; oss << std::fixed << std::setprecision(2); - - if (bytes >= static_cast(TB)) { + if (bytes == std::numeric_limits::max()) { + oss << "infinite"; + } else if (bytes >= static_cast(TB)) { oss << bytes / TB << " TB"; } else if (bytes >= static_cast(GB)) { oss << bytes / GB << " GB"; diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index 016453e13..8021b6c84 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -20,9 +20,9 @@ MasterMetricManager::MasterMetricManager() // Initialize Gauges : mem_allocated_size_( "master_allocated_bytes", - "Total bytes currently allocated across all segments"), + "Total memory bytes currently allocated across all segments"), mem_total_capacity_("master_total_capacity_bytes", - "Total capacity across all mounted segments"), + "Total memory capacity across all mounted segments"), file_total_capacity_("master_total_file_capacity_bytes", "Total capacity for file storage in 3fs/nfs"), file_allocated_size_( @@ -752,8 +752,8 @@ std::string MasterMetricManager::get_summary_string() { std::stringstream ss; // --- Get current values --- - int64_t allocated = mem_allocated_size_.value(); - int64_t capacity = mem_total_capacity_.value(); + int64_t mem_allocated = mem_allocated_size_.value(); + int64_t mem_capacity = mem_total_capacity_.value(); int64_t file_allocated = file_allocated_size_.value(); int64_t file_capacity = file_total_capacity_.value(); int64_t keys = key_count_.value(); @@ -826,14 +826,14 @@ std::string MasterMetricManager::get_summary_string() { int64_t ping_fails = ping_failures_.value(); // --- Format the summary string --- - ss << "Mem Storage: " << byte_size_to_string(allocated) << " / " - << byte_size_to_string(capacity); - ss << "SSD Storage: " << byte_size_to_string(file_allocated) << " / " - << byte_size_to_string(file_capacity); - if (capacity > 0) { + ss << "Mem Storage: " << byte_size_to_string(mem_allocated) << " / " + << byte_size_to_string(mem_capacity); + if (mem_capacity > 0) { ss << " (" << std::fixed << std::setprecision(1) - << ((double)allocated / (double)capacity * 100.0) << "%)"; + << ((double)mem_allocated / (double)mem_capacity * 100.0) << "%)"; } + ss << "SSD Storage: " << byte_size_to_string(file_allocated) << " / " + << byte_size_to_string(file_capacity); ss << " | Keys: " << keys << " (soft-pinned: " << soft_pin_keys << ")"; if (enable_ha_) { ss << " | Clients: " << active_clients; From 9a3378dc5eee23f4fa767da7a30c1e435cc22ba7 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Thu, 30 Oct 2025 10:48:03 +0800 Subject: [PATCH 05/14] use int64_t define file capacity & add init total_file_capacity --- mooncake-store/include/master_config.h | 8 ++++---- mooncake-store/include/master_metric_manager.h | 2 ++ mooncake-store/include/types.h | 7 ++++--- mooncake-store/src/master.cpp | 2 +- mooncake-store/src/master_metric_manager.cpp | 7 +++++++ mooncake-store/src/master_service.cpp | 2 ++ mooncake-store/tests/master_metrics_test.cpp | 7 ++++++- 7 files changed, 26 insertions(+), 9 deletions(-) diff --git a/mooncake-store/include/master_config.h b/mooncake-store/include/master_config.h index 25cd5c27f..80ad9a506 100644 --- a/mooncake-store/include/master_config.h +++ b/mooncake-store/include/master_config.h @@ -64,7 +64,7 @@ class MasterServiceSupervisorConfig { std::string local_hostname = "0.0.0.0:50051"; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; - uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; + int64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; MasterServiceSupervisorConfig() = default; @@ -164,7 +164,7 @@ class WrappedMasterServiceConfig { bool enable_ha = false; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; - uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; + int64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; WrappedMasterServiceConfig() = default; @@ -242,7 +242,7 @@ class MasterServiceConfigBuilder { bool enable_ha_ = false; std::string cluster_id_ = DEFAULT_CLUSTER_ID; std::string root_fs_dir_ = DEFAULT_ROOT_FS_DIR; - uint64_t global_file_segment_size_ = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; + int64_t global_file_segment_size_ = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator_ = BufferAllocatorType::OFFSET; public: @@ -329,7 +329,7 @@ class MasterServiceConfig { bool enable_ha = false; std::string cluster_id = DEFAULT_CLUSTER_ID; std::string root_fs_dir = DEFAULT_ROOT_FS_DIR; - uint64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; + int64_t global_file_segment_size = DEFAULT_GLOBAL_FILE_SEGMENT_SIZE; BufferAllocatorType memory_allocator = BufferAllocatorType::OFFSET; MasterServiceConfig() = default; diff --git a/mooncake-store/include/master_metric_manager.h b/mooncake-store/include/master_metric_manager.h index 653bc35ed..204253351 100644 --- a/mooncake-store/include/master_metric_manager.h +++ b/mooncake-store/include/master_metric_manager.h @@ -31,6 +31,8 @@ class MasterMetricManager { // File Storage Metrics void inc_allocated_file_size(int64_t val = 1); void dec_allocated_file_size(int64_t val = 1); + void inc_total_file_capacity(int64_t val = 1); + void dec_total_file_capacity(int64_t val = 1); int64_t get_allocated_file_size(); int64_t get_total_file_capacity(); double get_global_file_used_ratio(void); diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index dbd9bb2e6..5f9f540ca 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -34,9 +34,10 @@ static constexpr int64_t ETCD_MASTER_VIEW_LEASE_TTL = 5; // in seconds static constexpr int64_t DEFAULT_CLIENT_LIVE_TTL_SEC = 10; // in seconds static const std::string DEFAULT_CLUSTER_ID = "mooncake_cluster"; static const std::string DEFAULT_ROOT_FS_DIR = ""; -// default do not limit DFS usage -static const uint64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = - std::numeric_limits::max(); +// default do not limit DFS usage, and use +// int64_t to make it compaitable to file metrics monitor +static const int64_t DEFAULT_GLOBAL_FILE_SEGMENT_SIZE = + std::numeric_limits::max(); static const std::string PUT_NO_SPACE_HELPER_STR = // A helpful string " due to insufficient space. Consider lowering " "eviction_high_watermark_ratio or mounting more segments."; diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index 8f37dc5de..91c553be3 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -70,7 +70,7 @@ DEFINE_int64(client_ttl, mooncake::DEFAULT_CLIENT_LIVE_TTL_SEC, DEFINE_string(root_fs_dir, mooncake::DEFAULT_ROOT_FS_DIR, "Root directory for storage backend, used in HA mode"); -DEFINE_uint64(global_file_segment_size, +DEFINE_int64(global_file_segment_size, mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, "Size of global NFS/3FS segment in bytes"); DEFINE_string(cluster_id, mooncake::DEFAULT_CLUSTER_ID, diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index 8021b6c84..33e2f73e3 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -233,6 +233,13 @@ void MasterMetricManager::dec_allocated_file_size(int64_t val) { file_allocated_size_.dec(val); } +void MasterMetricManager::inc_total_file_capacity(int64_t val) { + file_total_capacity_.inc(val); +} +void MasterMetricManager::dec_total_file_capacity(int64_t val) { + file_total_capacity_.dec(val); +} + int64_t MasterMetricManager::get_allocated_file_size() { return file_allocated_size_.value(); } diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 57c51dbf7..497fd961a 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -53,6 +53,8 @@ MasterService::MasterService(const MasterServiceConfig& config) if (!root_fs_dir_.empty()) { use_disk_replica_ = true; + MasterMetricManager::instance().inc_total_file_capacity( + global_file_segment_size_); } } diff --git a/mooncake-store/tests/master_metrics_test.cpp b/mooncake-store/tests/master_metrics_test.cpp index 4614e04bb..e27455e3a 100644 --- a/mooncake-store/tests/master_metrics_test.cpp +++ b/mooncake-store/tests/master_metrics_test.cpp @@ -25,11 +25,16 @@ class MasterMetricsTest : public ::testing::Test { TEST_F(MasterMetricsTest, InitialStatusTest) { auto& metrics = MasterMetricManager::instance(); - // Storage Metrics + // Mem Storage Metrics ASSERT_EQ(metrics.get_allocated_size(), 0); ASSERT_EQ(metrics.get_total_capacity(), 0); ASSERT_DOUBLE_EQ(metrics.get_global_used_ratio(), 0.0); + // File Storage Metrics + ASSERT_EQ(metrics.get_allocated_file_size(), 0); + ASSERT_EQ(metrics.get_total_file_capacity(), 0); + ASSERT_DOUBLE_EQ(metrics.get_global_file_used_ratio(), 0.0); + // Key/Value Metrics ASSERT_EQ(metrics.get_key_count(), 0); From 72eacd1c9f3a3f2725a0f6dc299a11625d80decf Mon Sep 17 00:00:00 2001 From: yejj710 Date: Thu, 30 Oct 2025 11:24:10 +0800 Subject: [PATCH 06/14] use int64_t to define file_capacity --- mooncake-store/include/master_config.h | 4 ++-- mooncake-store/include/master_service.h | 2 +- mooncake-store/include/utils.h | 2 +- mooncake-store/src/master.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mooncake-store/include/master_config.h b/mooncake-store/include/master_config.h index 80ad9a506..c3fdbf3bc 100644 --- a/mooncake-store/include/master_config.h +++ b/mooncake-store/include/master_config.h @@ -30,7 +30,7 @@ struct MasterConfig { std::string cluster_id; std::string root_fs_dir; - uint64_t global_file_segment_size; + int64_t global_file_segment_size; std::string memory_allocator; // HTTP metadata server configuration @@ -301,7 +301,7 @@ class MasterServiceConfigBuilder { } MasterServiceConfigBuilder& set_global_file_segment_size( - uint64_t segment_size) { + int64_t segment_size) { global_file_segment_size_ = segment_size; return *this; } diff --git a/mooncake-store/include/master_service.h b/mooncake-store/include/master_service.h index 08f701cd6..d066a8e16 100644 --- a/mooncake-store/include/master_service.h +++ b/mooncake-store/include/master_service.h @@ -472,7 +472,7 @@ class MasterService { // root filesystem directory for persistent storage const std::string root_fs_dir_; // global 3fs/nfs segment size - uint64_t global_file_segment_size_; + int64_t global_file_segment_size_; bool use_disk_replica_{false}; diff --git a/mooncake-store/include/utils.h b/mooncake-store/include/utils.h index aaa323c58..a2bed5986 100644 --- a/mooncake-store/include/utils.h +++ b/mooncake-store/include/utils.h @@ -112,7 +112,7 @@ void free_memory(const std::string& protocol, void* ptr); std::ostringstream oss; oss << std::fixed << std::setprecision(2); - if (bytes == std::numeric_limits::max()) { + if (static_cast(bytes) == std::numeric_limits::max()) { oss << "infinite"; } else if (bytes >= static_cast(TB)) { oss << bytes / TB << " TB"; diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index 91c553be3..f741f8b2e 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -132,7 +132,7 @@ void InitMasterConf(const mooncake::DefaultConfig& default_config, FLAGS_cluster_id); default_config.GetString("root_fs_dir", &master_config.root_fs_dir, FLAGS_root_fs_dir); - default_config.GetUInt64("global_file_segment_size", + default_config.GetInt64("global_file_segment_size", &master_config.global_file_segment_size, FLAGS_global_file_segment_size); default_config.GetString("memory_allocator", From b7494f388f0f135fcde287092bf8263ff8e6369f Mon Sep 17 00:00:00 2001 From: yejj710 Date: Thu, 30 Oct 2025 11:35:17 +0800 Subject: [PATCH 07/14] rename memory usage related functions --- .../include/master_metric_manager.h | 16 +++++----- mooncake-store/src/allocator.cpp | 10 +++---- mooncake-store/src/master_metric_manager.cpp | 14 ++++----- mooncake-store/src/master_service.cpp | 2 +- mooncake-store/src/segment.cpp | 4 +-- mooncake-store/tests/master_metrics_test.cpp | 30 +++++++++---------- 6 files changed, 38 insertions(+), 38 deletions(-) diff --git a/mooncake-store/include/master_metric_manager.h b/mooncake-store/include/master_metric_manager.h index 204253351..e11ceaa33 100644 --- a/mooncake-store/include/master_metric_manager.h +++ b/mooncake-store/include/master_metric_manager.h @@ -19,14 +19,14 @@ class MasterMetricManager { MasterMetricManager(MasterMetricManager&&) = delete; MasterMetricManager& operator=(MasterMetricManager&&) = delete; - // Storage Metrics - void inc_allocated_size(int64_t val = 1); - void dec_allocated_size(int64_t val = 1); - void inc_total_capacity(int64_t val = 1); - void dec_total_capacity(int64_t val = 1); - int64_t get_allocated_size(); - int64_t get_total_capacity(); - double get_global_used_ratio(void); + // Memory Storage Metrics + void inc_allocated_mem_size(int64_t val = 1); + void dec_allocated_mem_size(int64_t val = 1); + void inc_total_mem_capacity(int64_t val = 1); + void dec_total_mem_capacity(int64_t val = 1); + int64_t get_allocated_mem_size(); + int64_t get_total_mem_capacity(); + double get_global_mem_used_ratio(void); // File Storage Metrics void inc_allocated_file_size(int64_t val = 1); diff --git a/mooncake-store/src/allocator.cpp b/mooncake-store/src/allocator.cpp index 5df83a44a..18ac7e00a 100644 --- a/mooncake-store/src/allocator.cpp +++ b/mooncake-store/src/allocator.cpp @@ -23,7 +23,7 @@ AllocatedBuffer::~AllocatedBuffer() { alloc->deallocate(this); VLOG(1) << "buf_handle_deallocated size=" << size_; } else { - MasterMetricManager::instance().dec_allocated_size(size_); + MasterMetricManager::instance().dec_allocated_mem_size(size_); VLOG(1) << "allocator=expired_or_null in buf_handle_destructor"; } } @@ -117,7 +117,7 @@ std::unique_ptr CachelibBufferAllocator::allocate( VLOG(1) << "allocation_succeeded size=" << size << " segment=" << segment_name_ << " address=" << buffer; cur_size_.fetch_add(size); - MasterMetricManager::instance().inc_allocated_size(size); + MasterMetricManager::instance().inc_allocated_mem_size(size); return std::make_unique(shared_from_this(), buffer, size); } @@ -128,7 +128,7 @@ void CachelibBufferAllocator::deallocate(AllocatedBuffer* handle) { size_t freed_size = handle->size_; // Store size before handle might become invalid cur_size_.fetch_sub(freed_size); - MasterMetricManager::instance().dec_allocated_size(freed_size); + MasterMetricManager::instance().dec_allocated_mem_size(freed_size); VLOG(1) << "deallocation_succeeded address=" << handle->buffer_ptr_ << " size=" << freed_size << " segment=" << segment_name_; } catch (const std::exception& e) { @@ -217,7 +217,7 @@ std::unique_ptr OffsetBufferAllocator::allocate(size_t size) { } cur_size_.fetch_add(size); - MasterMetricManager::instance().inc_allocated_size(size); + MasterMetricManager::instance().inc_allocated_mem_size(size); return allocated_buffer; } @@ -228,7 +228,7 @@ void OffsetBufferAllocator::deallocate(AllocatedBuffer* handle) { size_t freed_size = handle->size(); handle->offset_handle_.reset(); cur_size_.fetch_sub(freed_size); - MasterMetricManager::instance().dec_allocated_size(freed_size); + MasterMetricManager::instance().dec_allocated_mem_size(freed_size); VLOG(1) << "deallocation_succeeded address=" << handle->data() << " size=" << freed_size << " segment=" << segment_name_; } catch (const std::exception& e) { diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index 33e2f73e3..c4c2cb373 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -194,29 +194,29 @@ MasterMetricManager::MasterMetricManager() // --- Metric Interface Methods --- // Memory Storage Metrics -void MasterMetricManager::inc_allocated_size(int64_t val) { +void MasterMetricManager::inc_allocated_mem_size(int64_t val) { mem_allocated_size_.inc(val); } -void MasterMetricManager::dec_allocated_size(int64_t val) { +void MasterMetricManager::dec_allocated_mem_size(int64_t val) { mem_allocated_size_.dec(val); } -void MasterMetricManager::inc_total_capacity(int64_t val) { +void MasterMetricManager::inc_total_mem_capacity(int64_t val) { mem_total_capacity_.inc(val); } -void MasterMetricManager::dec_total_capacity(int64_t val) { +void MasterMetricManager::dec_total_mem_capacity(int64_t val) { mem_total_capacity_.dec(val); } -int64_t MasterMetricManager::get_allocated_size() { +int64_t MasterMetricManager::get_allocated_mem_size() { return mem_allocated_size_.value(); } -int64_t MasterMetricManager::get_total_capacity() { +int64_t MasterMetricManager::get_total_mem_capacity() { return mem_total_capacity_.value(); } -double MasterMetricManager::get_global_used_ratio(void) { +double MasterMetricManager::get_global_mem_used_ratio(void) { double allocated = mem_allocated_size_.value(); double capacity = mem_total_capacity_.value(); if (capacity == 0) { diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 497fd961a..3176df234 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -670,7 +670,7 @@ void MasterService::EvictionThreadFunc() { while (eviction_running_) { double used_ratio = - MasterMetricManager::instance().get_global_used_ratio(); + MasterMetricManager::instance().get_global_mem_used_ratio(); if (used_ratio > eviction_high_watermark_ratio_ || (need_eviction_ && eviction_ratio_ > 0.0)) { double evict_ratio_target = std::max( diff --git a/mooncake-store/src/segment.cpp b/mooncake-store/src/segment.cpp index 1d6b81b06..fb8ad3e4b 100644 --- a/mooncake-store/src/segment.cpp +++ b/mooncake-store/src/segment.cpp @@ -81,7 +81,7 @@ ErrorCode ScopedSegmentAccess::MountSegment(const Segment& segment, segment_manager_->mounted_segments_[segment.id] = { segment, SegmentStatus::OK, std::move(allocator)}; - MasterMetricManager::instance().inc_total_capacity(size); + MasterMetricManager::instance().inc_total_mem_capacity(size); return ErrorCode::OK; } @@ -203,7 +203,7 @@ ErrorCode ScopedSegmentAccess::CommitUnmountSegment( segment_manager_->mounted_segments_.erase(segment_id); // Decrease the total capacity - MasterMetricManager::instance().dec_total_capacity(metrics_dec_capacity); + MasterMetricManager::instance().dec_total_mem_capacity(metrics_dec_capacity); return ErrorCode::OK; } diff --git a/mooncake-store/tests/master_metrics_test.cpp b/mooncake-store/tests/master_metrics_test.cpp index e27455e3a..dda05c90d 100644 --- a/mooncake-store/tests/master_metrics_test.cpp +++ b/mooncake-store/tests/master_metrics_test.cpp @@ -26,9 +26,9 @@ TEST_F(MasterMetricsTest, InitialStatusTest) { auto& metrics = MasterMetricManager::instance(); // Mem Storage Metrics - ASSERT_EQ(metrics.get_allocated_size(), 0); - ASSERT_EQ(metrics.get_total_capacity(), 0); - ASSERT_DOUBLE_EQ(metrics.get_global_used_ratio(), 0.0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); + ASSERT_EQ(metrics.get_total_mem_capacity(), 0); + ASSERT_DOUBLE_EQ(metrics.get_global_mem_used_ratio(), 0.0); // File Storage Metrics ASSERT_EQ(metrics.get_allocated_file_size(), 0); @@ -121,9 +121,9 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { // Test MountSegment request auto mount_result = service_.MountSegment(segment, client_id); ASSERT_TRUE(mount_result.has_value()); - ASSERT_EQ(metrics.get_allocated_size(), 0); - ASSERT_EQ(metrics.get_total_capacity(), kSegmentSize); - ASSERT_DOUBLE_EQ(metrics.get_global_used_ratio(), 0.0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); + ASSERT_EQ(metrics.get_total_mem_capacity(), kSegmentSize); + ASSERT_DOUBLE_EQ(metrics.get_global_mem_used_ratio(), 0.0); ASSERT_EQ(metrics.get_mount_segment_requests(), 1); ASSERT_EQ(metrics.get_mount_segment_failures(), 0); @@ -131,13 +131,13 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { auto put_start_result1 = service_.PutStart(key, slice_lengths, config); ASSERT_TRUE(put_start_result1.has_value()); ASSERT_EQ(metrics.get_key_count(), 1); - ASSERT_EQ(metrics.get_allocated_size(), value_length); + ASSERT_EQ(metrics.get_allocated_mem_size(), value_length); ASSERT_EQ(metrics.get_put_start_requests(), 1); ASSERT_EQ(metrics.get_put_start_failures(), 0); auto put_revoke_result = service_.PutRevoke(key, ReplicaType::MEMORY); ASSERT_TRUE(put_revoke_result.has_value()); ASSERT_EQ(metrics.get_key_count(), 0); - ASSERT_EQ(metrics.get_allocated_size(), 0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); ASSERT_EQ(metrics.get_put_revoke_requests(), 1); ASSERT_EQ(metrics.get_put_revoke_failures(), 0); @@ -145,13 +145,13 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { auto put_start_result2 = service_.PutStart(key, slice_lengths, config); ASSERT_TRUE(put_start_result2.has_value()); ASSERT_EQ(metrics.get_key_count(), 1); - ASSERT_EQ(metrics.get_allocated_size(), value_length); + ASSERT_EQ(metrics.get_allocated_mem_size(), value_length); ASSERT_EQ(metrics.get_put_start_requests(), 2); ASSERT_EQ(metrics.get_put_start_failures(), 0); auto put_end_result = service_.PutEnd(key, ReplicaType::MEMORY); ASSERT_TRUE(put_end_result.has_value()); ASSERT_EQ(metrics.get_key_count(), 1); - ASSERT_EQ(metrics.get_allocated_size(), value_length); + ASSERT_EQ(metrics.get_allocated_mem_size(), value_length); ASSERT_EQ(metrics.get_put_end_requests(), 1); ASSERT_EQ(metrics.get_put_end_failures(), 0); @@ -175,7 +175,7 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { ASSERT_EQ(metrics.get_remove_requests(), 1); ASSERT_EQ(metrics.get_remove_failures(), 0); ASSERT_EQ(metrics.get_key_count(), 0); - ASSERT_EQ(metrics.get_allocated_size(), 0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); // Test RemoveAll request auto put_start_result3 = service_.PutStart(key, slice_lengths, config); @@ -187,7 +187,7 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { ASSERT_EQ(metrics.get_remove_all_requests(), 1); ASSERT_EQ(metrics.get_remove_all_failures(), 0); ASSERT_EQ(metrics.get_key_count(), 0); - ASSERT_EQ(metrics.get_allocated_size(), 0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); // Test UnmountSegment request auto put_start_result4 = service_.PutStart(key, slice_lengths, config); @@ -199,9 +199,9 @@ TEST_F(MasterMetricsTest, BasicRequestTest) { ASSERT_EQ(metrics.get_unmount_segment_requests(), 1); ASSERT_EQ(metrics.get_unmount_segment_failures(), 0); ASSERT_EQ(metrics.get_key_count(), 0); - ASSERT_EQ(metrics.get_allocated_size(), 0); - ASSERT_EQ(metrics.get_total_capacity(), 0); - ASSERT_DOUBLE_EQ(metrics.get_global_used_ratio(), 0.0); + ASSERT_EQ(metrics.get_allocated_mem_size(), 0); + ASSERT_EQ(metrics.get_total_mem_capacity(), 0); + ASSERT_DOUBLE_EQ(metrics.get_global_mem_used_ratio(), 0.0); } TEST_F(MasterMetricsTest, BatchRequestTest) { From 6b4ca2d5a3de2872bd4979a4aae03ec42108a063 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Thu, 30 Oct 2025 16:52:09 +0800 Subject: [PATCH 08/14] fix 2 bugs --- mooncake-store/src/master_metric_manager.cpp | 2 +- mooncake-store/src/master_service.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mooncake-store/src/master_metric_manager.cpp b/mooncake-store/src/master_metric_manager.cpp index c4c2cb373..330f1d841 100644 --- a/mooncake-store/src/master_metric_manager.cpp +++ b/mooncake-store/src/master_metric_manager.cpp @@ -839,7 +839,7 @@ std::string MasterMetricManager::get_summary_string() { ss << " (" << std::fixed << std::setprecision(1) << ((double)mem_allocated / (double)mem_capacity * 100.0) << "%)"; } - ss << "SSD Storage: " << byte_size_to_string(file_allocated) << " / " + ss << " | SSD Storage: " << byte_size_to_string(file_allocated) << " / " << byte_size_to_string(file_capacity); ss << " | Keys: " << keys << " (soft-pinned: " << soft_pin_keys << ")"; if (enable_ha_) { diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index 3176df234..d3260d579 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -467,6 +467,9 @@ auto MasterService::PutRevoke(const std::string& key, ReplicaType replica_type) // When disk replica is enabled, update allocated_file_size if (use_disk_replica_ && replica_type == ReplicaType::DISK) { for (const auto& replica : metadata.replicas) { + if (replica.is_memory_replica()) { + continue; + } auto disk_descriptor = replica.get_descriptor().get_disk_descriptor(); MasterMetricManager::instance().dec_allocated_file_size( From 0a06fdd453870d3523432bdfb619e2cc9f38a1d1 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Thu, 30 Oct 2025 17:13:41 +0800 Subject: [PATCH 09/14] add docs --- doc/en/mooncake-store.md | 6 ++++++ doc/zh/mooncake-store.md | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/doc/en/mooncake-store.md b/doc/en/mooncake-store.md index 0a191be8b..df2252eb6 100644 --- a/doc/en/mooncake-store.md +++ b/doc/en/mooncake-store.md @@ -525,6 +525,12 @@ When the user specifies `--root_fs_dir=/path/to/dir` when starting the master, a ​Note​​: When enabling this feature, the user must ensure that the DFS-mounted directory (`root_fs_dir=/path/to/dir`) is valid and consistent across all client hosts. If some clients have invalid or incorrect mount paths, it may cause abnormal behavior in Mooncake Store. +#### Persistent Storage Space Configuration​ +Mooncake provides configurable DFS available space. Users can specify `--global_file_segment_size=1048576` when starting the master, indicating a maximum usable space of 1MB on DFS. +The current default setting is the maximum value of int64 (as we generally do not restrict DFS storage usage), which is displayed as `infinite` in `mooncake_maseter`'s console logs. + +Note The DFS cache space configuration must be used together with the `--root_fs_dir` parameter. Otherwise, you will observe that the `SSD Storage` usage consistently shows: `0 B / 0 B`. + #### Data Access Mechanism The persistence feature also follows Mooncake Store's design principle of separating control flow from data flow. The read/write operations of kvcache objects are completed on the client side, while the query and management functions of kvcache objects are handled on the master side. In the file system, the key -> kvcache object index information is maintained by a fixed indexing mechanism, with each file corresponding to one kvcache object (the filename serves as the associated key name). diff --git a/doc/zh/mooncake-store.md b/doc/zh/mooncake-store.md index cd84687bf..09e6fa3b3 100644 --- a/doc/zh/mooncake-store.md +++ b/doc/zh/mooncake-store.md @@ -529,6 +529,12 @@ struct ReplicateConfig { 注意在开启该功能时,用户需要保证各client所在主机的DFS挂载目录都是有效且相同的(`root_fs_dir=/path/to/dir`),如果存在部分client挂载目录无效或错误,会导致mooncake store运行出现一些异常情况。 +#### 持久化存储空间配置 +mooncake提供了DFS可用空间的配置,用户可以在启动master时指定`--global_file_segment_size=100GB`,表示DFS上最大可用空间为100GB。 +当前默认设置为int64的最大值(因为我们一般不限制DFS的使用空间大小),在`mooncake_maseter`的打屏日志中使用`infinite`表示最大值。 + +**注意** DFS缓存空间配置必须结合`--root_fs_dir`参数一起使用,否则你会发现`SSD Storage`使用率一致是: `0 B / 0 B` + #### 数据访问机制 持久化功能同样遵循了mooncake store中控制流和数据流分离的设计。kvcache object的读\写操作在client端完成,kvcache object的查询和管理功能在master端完成。在文件系统中key -> kvcache object的索引信息是由固定的索引机制维护,每个文件对应一个kvcache object(文件名即为对应的key名称)。 From 800ac44f1436ff1e6d9988f66110cac5f983ec99 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Fri, 31 Oct 2025 08:49:21 +0800 Subject: [PATCH 10/14] fix code format --- mooncake-store/src/master.cpp | 8 ++++---- mooncake-store/src/segment.cpp | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mooncake-store/src/master.cpp b/mooncake-store/src/master.cpp index f741f8b2e..b96b52be2 100644 --- a/mooncake-store/src/master.cpp +++ b/mooncake-store/src/master.cpp @@ -71,8 +71,8 @@ DEFINE_int64(client_ttl, mooncake::DEFAULT_CLIENT_LIVE_TTL_SEC, DEFINE_string(root_fs_dir, mooncake::DEFAULT_ROOT_FS_DIR, "Root directory for storage backend, used in HA mode"); DEFINE_int64(global_file_segment_size, - mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, - "Size of global NFS/3FS segment in bytes"); + mooncake::DEFAULT_GLOBAL_FILE_SEGMENT_SIZE, + "Size of global NFS/3FS segment in bytes"); DEFINE_string(cluster_id, mooncake::DEFAULT_CLUSTER_ID, "Cluster ID for the master service, used for kvcache persistence " "in HA mode"); @@ -133,8 +133,8 @@ void InitMasterConf(const mooncake::DefaultConfig& default_config, default_config.GetString("root_fs_dir", &master_config.root_fs_dir, FLAGS_root_fs_dir); default_config.GetInt64("global_file_segment_size", - &master_config.global_file_segment_size, - FLAGS_global_file_segment_size); + &master_config.global_file_segment_size, + FLAGS_global_file_segment_size); default_config.GetString("memory_allocator", &master_config.memory_allocator, FLAGS_memory_allocator); diff --git a/mooncake-store/src/segment.cpp b/mooncake-store/src/segment.cpp index fb8ad3e4b..8d57f79ef 100644 --- a/mooncake-store/src/segment.cpp +++ b/mooncake-store/src/segment.cpp @@ -203,7 +203,8 @@ ErrorCode ScopedSegmentAccess::CommitUnmountSegment( segment_manager_->mounted_segments_.erase(segment_id); // Decrease the total capacity - MasterMetricManager::instance().dec_total_mem_capacity(metrics_dec_capacity); + MasterMetricManager::instance().dec_total_mem_capacity( + metrics_dec_capacity); return ErrorCode::OK; } From f44554835929a927ad514d732170bae7cf31c7d8 Mon Sep 17 00:00:00 2001 From: yejj710 Date: Fri, 31 Oct 2025 17:39:04 +0800 Subject: [PATCH 11/14] add more docs --- doc/en/mooncake-store.md | 3 ++- doc/zh/mooncake-store.md | 1 + docs/source/deployment/mooncake-store-deployment-guide.md | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/en/mooncake-store.md b/doc/en/mooncake-store.md index df2252eb6..30a2a903e 100644 --- a/doc/en/mooncake-store.md +++ b/doc/en/mooncake-store.md @@ -529,7 +529,8 @@ When the user specifies `--root_fs_dir=/path/to/dir` when starting the master, a Mooncake provides configurable DFS available space. Users can specify `--global_file_segment_size=1048576` when starting the master, indicating a maximum usable space of 1MB on DFS. The current default setting is the maximum value of int64 (as we generally do not restrict DFS storage usage), which is displayed as `infinite` in `mooncake_maseter`'s console logs. -Note The DFS cache space configuration must be used together with the `--root_fs_dir` parameter. Otherwise, you will observe that the `SSD Storage` usage consistently shows: `0 B / 0 B`. +**Notice** The DFS cache space configuration must be used together with the `--root_fs_dir` parameter. Otherwise, you will observe that the `SSD Storage` usage consistently shows: `0 B / 0 B` +**Notice** The capability for file eviction on DFS has not been provided yet #### Data Access Mechanism diff --git a/doc/zh/mooncake-store.md b/doc/zh/mooncake-store.md index 09e6fa3b3..1568eb60c 100644 --- a/doc/zh/mooncake-store.md +++ b/doc/zh/mooncake-store.md @@ -534,6 +534,7 @@ mooncake提供了DFS可用空间的配置,用户可以在启动master时指定 当前默认设置为int64的最大值(因为我们一般不限制DFS的使用空间大小),在`mooncake_maseter`的打屏日志中使用`infinite`表示最大值。 **注意** DFS缓存空间配置必须结合`--root_fs_dir`参数一起使用,否则你会发现`SSD Storage`使用率一致是: `0 B / 0 B` +**注意** 当前还没有提供DFS上文件驱逐的能力 #### 数据访问机制 持久化功能同样遵循了mooncake store中控制流和数据流分离的设计。kvcache object的读\写操作在client端完成,kvcache object的查询和管理功能在master端完成。在文件系统中key -> kvcache object的索引信息是由固定的索引机制维护,每个文件对应一个kvcache object(文件名即为对应的key名称)。 diff --git a/docs/source/deployment/mooncake-store-deployment-guide.md b/docs/source/deployment/mooncake-store-deployment-guide.md index 1b7b7a08b..f42a4ca96 100644 --- a/docs/source/deployment/mooncake-store-deployment-guide.md +++ b/docs/source/deployment/mooncake-store-deployment-guide.md @@ -33,6 +33,10 @@ This page summarizes useful flags, environment variables, and HTTP endpoints to - `--client_ttl` (int64, default `10` s): Client alive TTL after last ping (HA mode). - `--cluster_id` (str, default `mooncake_cluster`): Cluster ID for persistence in HA mode. +- DFS Storage (optional) + - `--root_fs_dir` (str, default empty): DFS mount directory for storage backend, used in Multi-layer Storage Support. + - `--global_file_segment_size` (int64, default `int64_max`): Maximum available space for DFS segments. + Example (enable embedded HTTP metadata and metrics): ```bash From b870c38fed7e8630305a6ae4d839abafb139e33c Mon Sep 17 00:00:00 2001 From: yejj710 Date: Fri, 31 Oct 2025 18:19:16 +0800 Subject: [PATCH 12/14] use RAII way to update allocated_file_size --- mooncake-store/include/replica.h | 11 +++++++++++ mooncake-store/src/master_service.cpp | 14 +------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/mooncake-store/include/replica.h b/mooncake-store/include/replica.h index 763f49646..6ee03bddb 100644 --- a/mooncake-store/include/replica.h +++ b/mooncake-store/include/replica.h @@ -11,6 +11,8 @@ #include "types.h" #include "allocator.h" +#include "master_metric_manager.h" + namespace mooncake { @@ -92,6 +94,15 @@ struct MemoryReplicaData { struct DiskReplicaData { std::string file_path; uint64_t object_size = 0; + // Automatic update allocated_file_size via RAII + DiskReplicaData(std::string file_path, uint64_t object_size) + : file_path(std::move(file_path)), object_size(object_size) { + MasterMetricManager::instance().inc_allocated_file_size(object_size); + } + + ~DiskReplicaData() { + MasterMetricManager::instance().dec_allocated_file_size(object_size); + } }; struct MemoryDescriptor { diff --git a/mooncake-store/src/master_service.cpp b/mooncake-store/src/master_service.cpp index d3260d579..3346ff7a9 100644 --- a/mooncake-store/src/master_service.cpp +++ b/mooncake-store/src/master_service.cpp @@ -410,7 +410,6 @@ auto MasterService::PutStart(const std::string& key, std::string file_path = ResolvePath(key); replicas.emplace_back(file_path, total_length, ReplicaStatus::PROCESSING); - MasterMetricManager::instance().inc_allocated_file_size(total_length); } std::vector replica_list; @@ -464,18 +463,7 @@ auto MasterService::PutRevoke(const std::string& key, ReplicaType replica_type) << ", error=invalid_replica_status"; return tl::make_unexpected(ErrorCode::INVALID_WRITE); } - // When disk replica is enabled, update allocated_file_size - if (use_disk_replica_ && replica_type == ReplicaType::DISK) { - for (const auto& replica : metadata.replicas) { - if (replica.is_memory_replica()) { - continue; - } - auto disk_descriptor = - replica.get_descriptor().get_disk_descriptor(); - MasterMetricManager::instance().dec_allocated_file_size( - disk_descriptor.object_size); - } - } + metadata.EraseReplica(replica_type); if (metadata.IsValid() == false) { accessor.Erase(); From 48c484da8b641f0b29f8b26d9ea1edce7e4196ea Mon Sep 17 00:00:00 2001 From: yejj710 Date: Sun, 2 Nov 2025 16:08:21 +0800 Subject: [PATCH 13/14] fix ci --- mooncake-store/include/replica.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mooncake-store/include/replica.h b/mooncake-store/include/replica.h index 6ee03bddb..6409f9bf6 100644 --- a/mooncake-store/include/replica.h +++ b/mooncake-store/include/replica.h @@ -13,7 +13,6 @@ #include "allocator.h" #include "master_metric_manager.h" - namespace mooncake { /** @@ -97,11 +96,11 @@ struct DiskReplicaData { // Automatic update allocated_file_size via RAII DiskReplicaData(std::string file_path, uint64_t object_size) : file_path(std::move(file_path)), object_size(object_size) { - MasterMetricManager::instance().inc_allocated_file_size(object_size); + MasterMetricManager::instance().inc_allocated_file_size(object_size); } ~DiskReplicaData() { - MasterMetricManager::instance().dec_allocated_file_size(object_size); + MasterMetricManager::instance().dec_allocated_file_size(object_size); } }; From 25a33d846ddf0c4742436968c26e9a7cceb6818b Mon Sep 17 00:00:00 2001 From: yejj710 Date: Sun, 2 Nov 2025 23:30:58 +0800 Subject: [PATCH 14/14] fix CI compile problem --- mooncake-store/include/master_service.h | 13 +++++++++++++ mooncake-store/include/replica.h | 9 --------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/mooncake-store/include/master_service.h b/mooncake-store/include/master_service.h index d066a8e16..28c318bbe 100644 --- a/mooncake-store/include/master_service.h +++ b/mooncake-store/include/master_service.h @@ -245,6 +245,8 @@ class MasterService { if (soft_pin_timeout) { MasterMetricManager::instance().dec_soft_pin_key_count(1); } + MasterMetricManager::instance().dec_allocated_file_size( + disk_replica_size); } ObjectMetadata() = delete; @@ -261,6 +263,16 @@ class MasterService { MasterMetricManager::instance().inc_soft_pin_key_count(1); } MasterMetricManager::instance().observe_value_size(value_length); + // Automatic update allocated_file_size via RAII + for (const auto& replica : replicas) { + if (replica.is_disk_replica()) { + disk_replica_size += replica.get_descriptor() + .get_disk_descriptor() + .object_size; + } + } + MasterMetricManager::instance().inc_allocated_file_size( + disk_replica_size); } ObjectMetadata(const ObjectMetadata&) = delete; @@ -275,6 +287,7 @@ class MasterService { std::chrono::steady_clock::time_point lease_timeout; // hard lease std::optional soft_pin_timeout; // optional soft pin, only set for vip objects + uint64_t disk_replica_size = 0; // Check if there are some replicas with a different status than the // given value. If there are, return the status of the first replica diff --git a/mooncake-store/include/replica.h b/mooncake-store/include/replica.h index 6409f9bf6..c37e6149a 100644 --- a/mooncake-store/include/replica.h +++ b/mooncake-store/include/replica.h @@ -93,15 +93,6 @@ struct MemoryReplicaData { struct DiskReplicaData { std::string file_path; uint64_t object_size = 0; - // Automatic update allocated_file_size via RAII - DiskReplicaData(std::string file_path, uint64_t object_size) - : file_path(std::move(file_path)), object_size(object_size) { - MasterMetricManager::instance().inc_allocated_file_size(object_size); - } - - ~DiskReplicaData() { - MasterMetricManager::instance().dec_allocated_file_size(object_size); - } }; struct MemoryDescriptor {