-
Notifications
You must be signed in to change notification settings - Fork 7k
[core] (cgroups 8/n) Wiring CgroupManager into the raylet. #56297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
6fc9652
70ed06d
fd39d7e
4e8e20c
9304014
20bf0dd
699f3ba
58e0c1a
e2e957d
9e7a2ef
f86a010
29a7ae4
a843dd4
43a180e
a3164a4
9342ed5
5b89821
4af9d3f
74612b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| // Copyright 2025 The Ray Authors. | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
| #include <memory> | ||
| #include <string> | ||
| #include <utility> | ||
|
|
||
| #include "ray/common/cgroup2/cgroup_driver_interface.h" | ||
| #include "ray/common/cgroup2/cgroup_manager.h" | ||
| #include "ray/common/status_or.h" | ||
|
|
||
| namespace ray { | ||
|
|
||
| CgroupManager::CgroupManager(std::string base_cgroup_path, | ||
| const std::string &node_id, | ||
| std::unique_ptr<CgroupDriverInterface> cgroup_driver) {} | ||
|
|
||
| CgroupManager::~CgroupManager() {} | ||
|
|
||
| StatusOr<std::unique_ptr<CgroupManager>> CgroupManager::Create( | ||
| std::string base_cgroup_path, | ||
| const std::string &node_id, | ||
| const int64_t system_reserved_cpu_weight, | ||
| const int64_t system_reserved_memory_bytes, | ||
| std::unique_ptr<CgroupDriverInterface> cgroup_driver) { | ||
| return std::unique_ptr<CgroupManager>( | ||
| new CgroupManager(base_cgroup_path, node_id, std::move(cgroup_driver))); | ||
| } | ||
| } // namespace ray |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,12 +24,14 @@ | |
| #include "gflags/gflags.h" | ||
| #include "nlohmann/json.hpp" | ||
| #include "ray/common/asio/instrumented_io_context.h" | ||
| #include "ray/common/cgroup/cgroup_manager.h" | ||
| #include "ray/common/cgroup2/cgroup_manager.h" | ||
| #include "ray/common/cgroup2/sysfs_cgroup_driver.h" | ||
| #include "ray/common/constants.h" | ||
| #include "ray/common/id.h" | ||
| #include "ray/common/lease/lease.h" | ||
| #include "ray/common/ray_config.h" | ||
| #include "ray/common/status.h" | ||
| #include "ray/common/status_or.h" | ||
| #include "ray/gcs/gcs_client/gcs_client.h" | ||
| #include "ray/object_manager/ownership_object_directory.h" | ||
| #include "ray/raylet/local_object_manager.h" | ||
|
|
@@ -96,12 +98,6 @@ DEFINE_int64(object_store_memory, -1, "The initial memory of the object store.") | |
| DEFINE_string(node_name, "", "The user-provided identifier or name for this node."); | ||
| DEFINE_string(session_name, "", "The current Ray session name."); | ||
| DEFINE_string(cluster_id, "", "ID of the cluster, separate from observability."); | ||
| // TODO(hjiang): At the moment only enablement flag is added, I will add other flags for | ||
| // CPU and memory resource reservation in the followup PR. | ||
| DEFINE_bool(enable_resource_isolation, | ||
| false, | ||
| "Enable resource isolation through cgroupv2 by reserving resources for ray " | ||
| "system processes."); | ||
|
|
||
| #ifdef __linux__ | ||
| DEFINE_string(plasma_directory, | ||
|
|
@@ -117,6 +113,30 @@ DEFINE_bool(huge_pages, false, "Enable huge pages."); | |
| DEFINE_string(labels, | ||
| "", | ||
| "Define the key-value format of node labels, which is a serialized JSON."); | ||
| DEFINE_bool( | ||
| enable_resource_isolation, | ||
| false, | ||
| "Enables resource isolation through cgroupv2. The raylet will create and " | ||
| "manage a cgroup hierarchy that separates system processes and worker processes " | ||
| "into separate cgroups."); | ||
| DEFINE_string( | ||
| cgroup_path, | ||
| "", | ||
| "Path of the cgroup that the raylet will take ownership of to create its cgorup " | ||
| "hierarchy. The raylet process must have read, write, and execute permission for " | ||
| "this path. If enable_resource_isolation is true, then this cannot be empty."); | ||
| DEFINE_int64( | ||
| system_reserved_cpu_weight, | ||
| -1, | ||
| "The amount of cores reserved for ray system processes. It will be applied " | ||
| "as a cpu.weight constraint to the system cgroup. 10000 - " | ||
| "system_reserved_cpu_weight will be applied as a constraint to the " | ||
| "application cgroup. If enable resource isolation is true, then this cannot be -1."); | ||
| DEFINE_int64(system_reserved_memory_bytes, | ||
| -1, | ||
| "The amount of memory in bytes reserved for ray system processes. It will " | ||
| "be applied as a memory.min constraint to the sytem cgroup. If enable " | ||
| "resource isolation is true, then this cannot be -1"); | ||
|
|
||
| absl::flat_hash_map<std::string, std::string> parse_node_labels( | ||
| const std::string &labels_json_str) { | ||
|
|
@@ -224,18 +244,50 @@ int main(int argc, char *argv[]) { | |
| const std::string session_name = FLAGS_session_name; | ||
| const bool is_head_node = FLAGS_head; | ||
| const std::string labels_json_str = FLAGS_labels; | ||
| const bool enable_resource_isolation = FLAGS_enable_resource_isolation; | ||
| const std::string cgroup_path = FLAGS_cgroup_path; | ||
| const int64_t system_reserved_cpu_weight = FLAGS_system_reserved_cpu_weight; | ||
| const int64_t system_reserved_memory_bytes = FLAGS_system_reserved_memory_bytes; | ||
|
|
||
| RAY_CHECK_NE(FLAGS_cluster_id, "") << "Expected cluster ID."; | ||
| ray::ClusterID cluster_id = ray::ClusterID::FromHex(FLAGS_cluster_id); | ||
| RAY_LOG(INFO) << "Setting cluster ID to: " << cluster_id; | ||
| gflags::ShutDownCommandLineFlags(); | ||
|
|
||
| // Get cgroup setup instance and perform necessary resource setup. | ||
| ray::GetCgroupSetup(FLAGS_enable_resource_isolation); | ||
| // TODO(#54703): Link OSS documentation once it's available in the error messages. | ||
| if (enable_resource_isolation) { | ||
| RAY_CHECK(!cgroup_path.empty()) | ||
| << "Failed to start up raylet. If enable_resource_isolation is set to true, " | ||
| "cgroup_path cannot be empty."; | ||
| RAY_CHECK_NE(system_reserved_cpu_weight, -1) | ||
| << "Failed to start up raylet. If enable_resource_isolation is set to true, " | ||
| "system_reserved_cpu_weight must be set to a value between [1,10000]"; | ||
| RAY_CHECK_NE(system_reserved_memory_bytes, -1) | ||
| << "Failed to start up raylet. If enable_resource_isolation is set to true, " | ||
| "system_reserved_memory_byres must be set to a value > 0"; | ||
|
|
||
| #ifndef __linux__ | ||
| RAY_LOG(WARNING) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to do a system check to see if cgroups are enabled on the host?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This warning is for if the user is trying to enable resource isolation on a non-linux system. So Cgroups will not be enabled. The line above this will create the appropriate target of CgroupManager based on the platform. If it's linux, it will fail if cgroups are not enabled on the host. RAY_CHECK(cgroup_manager.ok())
<< "Failed to start raylet. Could not create CgroupManager because of "
<< cgroup_manager.ToString();
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I misspoke. The following is implemented in 9/n:
In this PR, we still do a system check if cgroups are enabled on the host in CgroupManager::Create. |
||
| << "Resource isolation with cgroups is only supported in linux. Please set " | ||
| "enable_resource_isolation to false. This is likely a misconfiguration."; | ||
| #endif | ||
|
Comment on lines
+283
to
+287
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought it would be useful to add a warning when resource isolation is enabled on non-linux platforms. It's likely a misconfiguration. |
||
| } | ||
|
|
||
| std::unique_ptr<ray::SysFsCgroupDriver> cgroup_driver; | ||
| ray::StatusOr<std::unique_ptr<ray::CgroupManager>> cgroup_manager = | ||
| ray::CgroupManager::Create(std::move(cgroup_path), | ||
| node_id, | ||
| system_reserved_cpu_weight, | ||
| system_reserved_memory_bytes, | ||
| std::move(cgroup_driver)); | ||
|
|
||
| // TODO(#54703) - Link to OSS documentation once available. | ||
| RAY_CHECK(cgroup_manager.ok()) | ||
| << "Failed to start raylet. Could not create CgroupManager because of " | ||
| << cgroup_manager.ToString(); | ||
|
|
||
| // Configuration for the node manager. | ||
| ray::raylet::NodeManagerConfig node_manager_config; | ||
| node_manager_config.enable_resource_isolation = FLAGS_enable_resource_isolation; | ||
|
|
||
| absl::flat_hash_map<std::string, double> static_resource_conf; | ||
|
|
||
|
|
@@ -542,8 +594,7 @@ int main(int argc, char *argv[]) { | |
| /*starting_worker_timeout_callback=*/ | ||
| [&] { cluster_lease_manager->ScheduleAndGrantLeases(); }, | ||
| node_manager_config.ray_debugger_external, | ||
| /*get_time=*/[]() { return absl::Now(); }, | ||
| node_manager_config.enable_resource_isolation); | ||
| /*get_time=*/[]() { return absl::Now(); }); | ||
|
|
||
| client_call_manager = std::make_unique<ray::rpc::ClientCallManager>( | ||
| main_service, /*record_stats=*/true); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking out loud, do we need this boolean flag? Could we infer it to be true if cgroup_path and other configs are present?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair point. I see the argument for both. More flags means more configuration possibilities. In this case, I picked an
enable_flag because the config is complex and it simplifies invariant checking and understanding the user's intent.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. I'm not particularly opinionated one way or the other, mostly I guess probing to see if there was a reason to this rhyme and it sounds like you had one in mind. So we can keep this for now on the notion that it'll make it easier to extend later should that come along.