diff --git a/src/ray/common/cgroup2/BUILD.bazel b/src/ray/common/cgroup2/BUILD.bazel index 85cd2a9dc059..a3f39c7040ad 100644 --- a/src/ray/common/cgroup2/BUILD.bazel +++ b/src/ray/common/cgroup2/BUILD.bazel @@ -1,13 +1,40 @@ load("//bazel:ray.bzl", "ray_cc_library") +config_setting( + name = "is_linux", + constraint_values = ["@platforms//os:linux"], +) + +# Public targets. +ray_cc_library( + name = "cgroup_manager", + srcs = select({ + ":is_linux": ["cgroup_manager.cc"], + "//conditions:default": ["noop_cgroup_manager.cc"], + }), + hdrs = ["cgroup_manager.h"], + visibility = ["//visibility:public"], + deps = [ + ":cgroup_driver_interface", + ":cgroup_manager_interface", + "//src/ray/common:status", + "//src/ray/common:status_or", + ] + select({ + ":is_linux": [ + ":scoped_cgroup_operation", + "//src/ray/util:logging", + "@com_google_absl//absl/strings", + ], + "//conditions:default": [], + }), +) + ray_cc_library( name = "cgroup_driver_interface", hdrs = [ "cgroup_driver_interface.h", ], - target_compatible_with = [ - "@platforms//os:linux", - ], + visibility = ["//visibility:public"], deps = [ "//src/ray/common:status", "//src/ray/common:status_or", @@ -19,9 +46,7 @@ ray_cc_library( hdrs = [ "cgroup_manager_interface.h", ], - target_compatible_with = [ - "@platforms//os:linux", - ], + visibility = ["//visibility:public"], deps = [ "//src/ray/common:status", "//src/ray/common:status_or", @@ -29,18 +54,17 @@ ray_cc_library( ) ray_cc_library( - name = "cgroup_manager", - srcs = ["cgroup_manager.cc"], + name = "sysfs_cgroup_driver", + srcs = ["sysfs_cgroup_driver.cc"], hdrs = [ - "cgroup_manager.h", - "scoped_cgroup_operation.h", + "sysfs_cgroup_driver.h", ], target_compatible_with = [ "@platforms//os:linux", ], + visibility = ["//visibility:public"], deps = [ ":cgroup_driver_interface", - ":cgroup_manager_interface", "//src/ray/common:status", "//src/ray/common:status_or", "//src/ray/util:logging", @@ -48,22 +72,16 @@ ray_cc_library( ], ) +# Private Targets. ray_cc_library( - name = "sysfs_cgroup_driver", - srcs = ["sysfs_cgroup_driver.cc"], + name = "scoped_cgroup_operation", hdrs = [ - "sysfs_cgroup_driver.h", + "scoped_cgroup_operation.h", ], target_compatible_with = [ "@platforms//os:linux", ], - deps = [ - ":cgroup_driver_interface", - "//src/ray/common:status", - "//src/ray/common:status_or", - "//src/ray/util:logging", - "@com_google_absl//absl/strings", - ], + visibility = [":__subpackages__"], ) ray_cc_library( @@ -74,6 +92,7 @@ ray_cc_library( target_compatible_with = [ "@platforms//os:linux", ], + visibility = [":__subpackages__"], deps = [ ":cgroup_driver_interface", "//src/ray/common:status", @@ -87,6 +106,7 @@ ray_cc_library( target_compatible_with = [ "@platforms//os:linux", ], + visibility = [":__subpackages__"], deps = [ "//src/ray/common:id", "//src/ray/common:status", diff --git a/src/ray/common/cgroup2/noop_cgroup_manager.cc b/src/ray/common/cgroup2/noop_cgroup_manager.cc new file mode 100644 index 000000000000..1accae4827df --- /dev/null +++ b/src/ray/common/cgroup2/noop_cgroup_manager.cc @@ -0,0 +1,39 @@ +// Copyright 2025 The Ray Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include + +#include "ray/common/cgroup2/cgroup_driver_interface.h" +#include "ray/common/cgroup2/cgroup_manager.h" +#include "ray/common/status_or.h" + +namespace ray { + +CgroupManager::CgroupManager(std::string base_cgroup_path, + const std::string &node_id, + std::unique_ptr cgroup_driver) {} + +CgroupManager::~CgroupManager() {} + +StatusOr> CgroupManager::Create( + std::string base_cgroup_path, + const std::string &node_id, + const int64_t system_reserved_cpu_weight, + const int64_t system_reserved_memory_bytes, + std::unique_ptr cgroup_driver) { + return std::unique_ptr( + new CgroupManager(base_cgroup_path, node_id, std::move(cgroup_driver))); +} +} // namespace ray diff --git a/src/ray/raylet/BUILD.bazel b/src/ray/raylet/BUILD.bazel index de5d74cc85c6..2a0adb8f766b 100644 --- a/src/ray/raylet/BUILD.bazel +++ b/src/ray/raylet/BUILD.bazel @@ -282,6 +282,8 @@ ray_cc_binary( "//src/ray/common:lease", "//src/ray/common:ray_config", "//src/ray/common:status", + "//src/ray/common/cgroup2:cgroup_manager", + "//src/ray/common/cgroup2:sysfs_cgroup_driver", "//src/ray/core_worker:metrics", "//src/ray/gcs/gcs_client:gcs_client_lib", "//src/ray/object_manager:ownership_object_directory", diff --git a/src/ray/raylet/main.cc b/src/ray/raylet/main.cc index d5267ad73ead..941be0057bfc 100644 --- a/src/ray/raylet/main.cc +++ b/src/ray/raylet/main.cc @@ -24,11 +24,14 @@ #include "gflags/gflags.h" #include "nlohmann/json.hpp" #include "ray/common/asio/instrumented_io_context.h" +#include "ray/common/cgroup2/cgroup_manager.h" +#include "ray/common/cgroup2/sysfs_cgroup_driver.h" #include "ray/common/constants.h" #include "ray/common/id.h" #include "ray/common/lease/lease.h" #include "ray/common/ray_config.h" #include "ray/common/status.h" +#include "ray/common/status_or.h" #include "ray/core_worker/metrics.h" #include "ray/gcs/gcs_client/gcs_client.h" #include "ray/object_manager/ownership_object_directory.h" @@ -111,6 +114,30 @@ DEFINE_bool(huge_pages, false, "Enable huge pages."); DEFINE_string(labels, "", "Define the key-value format of node labels, which is a serialized JSON."); +DEFINE_bool( + enable_resource_isolation, + false, + "Enables resource isolation through cgroupv2. The raylet will create and " + "manage a cgroup hierarchy that separates system processes and worker processes " + "into separate cgroups."); +DEFINE_string( + cgroup_path, + "", + "Path of the cgroup that the raylet will take ownership of to create its cgorup " + "hierarchy. The raylet process must have read, write, and execute permission for " + "this path. If enable_resource_isolation is true, then this cannot be empty."); +DEFINE_int64( + system_reserved_cpu_weight, + -1, + "The amount of cores reserved for ray system processes. It will be applied " + "as a cpu.weight constraint to the system cgroup. 10000 - " + "system_reserved_cpu_weight will be applied as a constraint to the " + "application cgroup. If enable resource isolation is true, then this cannot be -1."); +DEFINE_int64(system_reserved_memory_bytes, + -1, + "The amount of memory in bytes reserved for ray system processes. It will " + "be applied as a memory.min constraint to the sytem cgroup. If enable " + "resource isolation is true, then this cannot be -1"); absl::flat_hash_map parse_node_labels( const std::string &labels_json_str) { @@ -218,12 +245,48 @@ int main(int argc, char *argv[]) { const std::string session_name = FLAGS_session_name; const bool is_head_node = FLAGS_head; const std::string labels_json_str = FLAGS_labels; + const bool enable_resource_isolation = FLAGS_enable_resource_isolation; + const std::string cgroup_path = FLAGS_cgroup_path; + const int64_t system_reserved_cpu_weight = FLAGS_system_reserved_cpu_weight; + const int64_t system_reserved_memory_bytes = FLAGS_system_reserved_memory_bytes; RAY_CHECK_NE(FLAGS_cluster_id, "") << "Expected cluster ID."; ray::ClusterID cluster_id = ray::ClusterID::FromHex(FLAGS_cluster_id); RAY_LOG(INFO) << "Setting cluster ID to: " << cluster_id; gflags::ShutDownCommandLineFlags(); + // TODO(#54703): Link OSS documentation once it's available in the error messages. + if (enable_resource_isolation) { + RAY_CHECK(!cgroup_path.empty()) + << "Failed to start up raylet. If enable_resource_isolation is set to true, " + "cgroup_path cannot be empty."; + RAY_CHECK_NE(system_reserved_cpu_weight, -1) + << "Failed to start up raylet. If enable_resource_isolation is set to true, " + "system_reserved_cpu_weight must be set to a value between [1,10000]"; + RAY_CHECK_NE(system_reserved_memory_bytes, -1) + << "Failed to start up raylet. If enable_resource_isolation is set to true, " + "system_reserved_memory_byres must be set to a value > 0"; + + std::unique_ptr cgroup_driver; + ray::StatusOr> cgroup_manager = + ray::CgroupManager::Create(std::move(cgroup_path), + node_id, + system_reserved_cpu_weight, + system_reserved_memory_bytes, + std::move(cgroup_driver)); + + // TODO(#54703) - Link to OSS documentation once available. + RAY_CHECK(cgroup_manager.ok()) + << "Failed to start raylet. Could not create CgroupManager because of " + << cgroup_manager.ToString(); + +#ifndef __linux__ + RAY_LOG(WARNING) + << "Resource isolation with cgroups is only supported in linux. Please set " + "enable_resource_isolation to false. This is likely a misconfiguration."; +#endif + } + // Configuration for the node manager. ray::raylet::NodeManagerConfig node_manager_config;