Skip to content

Commit 0c87ca9

Browse files
authored
Revert "[core] (cgroups 8/n) Wiring CgroupManager into the raylet. (#56297)"
This reverts commit 161dd95.
1 parent 161dd95 commit 0c87ca9

File tree

4 files changed

+21
-145
lines changed

4 files changed

+21
-145
lines changed

src/ray/common/cgroup2/BUILD.bazel

Lines changed: 21 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,13 @@
11
load("//bazel:ray.bzl", "ray_cc_library")
22

3-
config_setting(
4-
name = "is_linux",
5-
constraint_values = ["@platforms//os:linux"],
6-
)
7-
8-
# Public targets.
9-
ray_cc_library(
10-
name = "cgroup_manager",
11-
srcs = select({
12-
":is_linux": ["cgroup_manager.cc"],
13-
"//conditions:default": ["noop_cgroup_manager.cc"],
14-
}),
15-
hdrs = ["cgroup_manager.h"],
16-
visibility = ["//visibility:public"],
17-
deps = [
18-
":cgroup_driver_interface",
19-
":cgroup_manager_interface",
20-
"//src/ray/common:status",
21-
"//src/ray/common:status_or",
22-
] + select({
23-
":is_linux": [
24-
":scoped_cgroup_operation",
25-
"//src/ray/util:logging",
26-
"@com_google_absl//absl/strings",
27-
],
28-
"//conditions:default": [],
29-
}),
30-
)
31-
323
ray_cc_library(
334
name = "cgroup_driver_interface",
345
hdrs = [
356
"cgroup_driver_interface.h",
367
],
37-
visibility = ["//visibility:public"],
8+
target_compatible_with = [
9+
"@platforms//os:linux",
10+
],
3811
deps = [
3912
"//src/ray/common:status",
4013
"//src/ray/common:status_or",
@@ -46,42 +19,51 @@ ray_cc_library(
4619
hdrs = [
4720
"cgroup_manager_interface.h",
4821
],
49-
visibility = ["//visibility:public"],
22+
target_compatible_with = [
23+
"@platforms//os:linux",
24+
],
5025
deps = [
5126
"//src/ray/common:status",
5227
"//src/ray/common:status_or",
5328
],
5429
)
5530

5631
ray_cc_library(
57-
name = "sysfs_cgroup_driver",
58-
srcs = ["sysfs_cgroup_driver.cc"],
32+
name = "cgroup_manager",
33+
srcs = ["cgroup_manager.cc"],
5934
hdrs = [
60-
"sysfs_cgroup_driver.h",
35+
"cgroup_manager.h",
36+
"scoped_cgroup_operation.h",
6137
],
6238
target_compatible_with = [
6339
"@platforms//os:linux",
6440
],
65-
visibility = ["//visibility:public"],
6641
deps = [
6742
":cgroup_driver_interface",
43+
":cgroup_manager_interface",
6844
"//src/ray/common:status",
6945
"//src/ray/common:status_or",
7046
"//src/ray/util:logging",
7147
"@com_google_absl//absl/strings",
7248
],
7349
)
7450

75-
# Private Targets.
7651
ray_cc_library(
77-
name = "scoped_cgroup_operation",
52+
name = "sysfs_cgroup_driver",
53+
srcs = ["sysfs_cgroup_driver.cc"],
7854
hdrs = [
79-
"scoped_cgroup_operation.h",
55+
"sysfs_cgroup_driver.h",
8056
],
8157
target_compatible_with = [
8258
"@platforms//os:linux",
8359
],
84-
visibility = [":__subpackages__"],
60+
deps = [
61+
":cgroup_driver_interface",
62+
"//src/ray/common:status",
63+
"//src/ray/common:status_or",
64+
"//src/ray/util:logging",
65+
"@com_google_absl//absl/strings",
66+
],
8567
)
8668

8769
ray_cc_library(
@@ -92,7 +74,6 @@ ray_cc_library(
9274
target_compatible_with = [
9375
"@platforms//os:linux",
9476
],
95-
visibility = [":__subpackages__"],
9677
deps = [
9778
":cgroup_driver_interface",
9879
"//src/ray/common:status",
@@ -106,7 +87,6 @@ ray_cc_library(
10687
target_compatible_with = [
10788
"@platforms//os:linux",
10889
],
109-
visibility = [":__subpackages__"],
11090
deps = [
11191
"//src/ray/common:id",
11292
"//src/ray/common:status",

src/ray/common/cgroup2/noop_cgroup_manager.cc

Lines changed: 0 additions & 39 deletions
This file was deleted.

src/ray/raylet/BUILD.bazel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,6 @@ ray_cc_binary(
283283
"//src/ray/common:lease",
284284
"//src/ray/common:ray_config",
285285
"//src/ray/common:status",
286-
"//src/ray/common/cgroup2:cgroup_manager",
287-
"//src/ray/common/cgroup2:sysfs_cgroup_driver",
288286
"//src/ray/core_worker:metrics",
289287
"//src/ray/gcs/gcs_client:gcs_client_lib",
290288
"//src/ray/object_manager:ownership_object_directory",

src/ray/raylet/main.cc

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,11 @@
2424
#include "gflags/gflags.h"
2525
#include "nlohmann/json.hpp"
2626
#include "ray/common/asio/instrumented_io_context.h"
27-
#include "ray/common/cgroup2/cgroup_manager.h"
28-
#include "ray/common/cgroup2/sysfs_cgroup_driver.h"
2927
#include "ray/common/constants.h"
3028
#include "ray/common/id.h"
3129
#include "ray/common/lease/lease.h"
3230
#include "ray/common/ray_config.h"
3331
#include "ray/common/status.h"
34-
#include "ray/common/status_or.h"
3532
#include "ray/core_worker/metrics.h"
3633
#include "ray/gcs/gcs_client/gcs_client.h"
3734
#include "ray/object_manager/ownership_object_directory.h"
@@ -114,30 +111,6 @@ DEFINE_bool(huge_pages, false, "Enable huge pages.");
114111
DEFINE_string(labels,
115112
"",
116113
"Define the key-value format of node labels, which is a serialized JSON.");
117-
DEFINE_bool(
118-
enable_resource_isolation,
119-
false,
120-
"Enables resource isolation through cgroupv2. The raylet will create and "
121-
"manage a cgroup hierarchy that separates system processes and worker processes "
122-
"into separate cgroups.");
123-
DEFINE_string(
124-
cgroup_path,
125-
"",
126-
"Path of the cgroup that the raylet will take ownership of to create its cgorup "
127-
"hierarchy. The raylet process must have read, write, and execute permission for "
128-
"this path. If enable_resource_isolation is true, then this cannot be empty.");
129-
DEFINE_int64(
130-
system_reserved_cpu_weight,
131-
-1,
132-
"The amount of cores reserved for ray system processes. It will be applied "
133-
"as a cpu.weight constraint to the system cgroup. 10000 - "
134-
"system_reserved_cpu_weight will be applied as a constraint to the "
135-
"application cgroup. If enable resource isolation is true, then this cannot be -1.");
136-
DEFINE_int64(system_reserved_memory_bytes,
137-
-1,
138-
"The amount of memory in bytes reserved for ray system processes. It will "
139-
"be applied as a memory.min constraint to the sytem cgroup. If enable "
140-
"resource isolation is true, then this cannot be -1");
141114

142115
absl::flat_hash_map<std::string, std::string> parse_node_labels(
143116
const std::string &labels_json_str) {
@@ -245,48 +218,12 @@ int main(int argc, char *argv[]) {
245218
const std::string session_name = FLAGS_session_name;
246219
const bool is_head_node = FLAGS_head;
247220
const std::string labels_json_str = FLAGS_labels;
248-
const bool enable_resource_isolation = FLAGS_enable_resource_isolation;
249-
const std::string cgroup_path = FLAGS_cgroup_path;
250-
const int64_t system_reserved_cpu_weight = FLAGS_system_reserved_cpu_weight;
251-
const int64_t system_reserved_memory_bytes = FLAGS_system_reserved_memory_bytes;
252221

253222
RAY_CHECK_NE(FLAGS_cluster_id, "") << "Expected cluster ID.";
254223
ray::ClusterID cluster_id = ray::ClusterID::FromHex(FLAGS_cluster_id);
255224
RAY_LOG(INFO) << "Setting cluster ID to: " << cluster_id;
256225
gflags::ShutDownCommandLineFlags();
257226

258-
// TODO(#54703): Link OSS documentation once it's available in the error messages.
259-
if (enable_resource_isolation) {
260-
RAY_CHECK(!cgroup_path.empty())
261-
<< "Failed to start up raylet. If enable_resource_isolation is set to true, "
262-
"cgroup_path cannot be empty.";
263-
RAY_CHECK_NE(system_reserved_cpu_weight, -1)
264-
<< "Failed to start up raylet. If enable_resource_isolation is set to true, "
265-
"system_reserved_cpu_weight must be set to a value between [1,10000]";
266-
RAY_CHECK_NE(system_reserved_memory_bytes, -1)
267-
<< "Failed to start up raylet. If enable_resource_isolation is set to true, "
268-
"system_reserved_memory_byres must be set to a value > 0";
269-
270-
std::unique_ptr<ray::SysFsCgroupDriver> cgroup_driver;
271-
ray::StatusOr<std::unique_ptr<ray::CgroupManager>> cgroup_manager =
272-
ray::CgroupManager::Create(std::move(cgroup_path),
273-
node_id,
274-
system_reserved_cpu_weight,
275-
system_reserved_memory_bytes,
276-
std::move(cgroup_driver));
277-
278-
// TODO(#54703) - Link to OSS documentation once available.
279-
RAY_CHECK(cgroup_manager.ok())
280-
<< "Failed to start raylet. Could not create CgroupManager because of "
281-
<< cgroup_manager.ToString();
282-
283-
#ifndef __linux__
284-
RAY_LOG(WARNING)
285-
<< "Resource isolation with cgroups is only supported in linux. Please set "
286-
"enable_resource_isolation to false. This is likely a misconfiguration.";
287-
#endif
288-
}
289-
290227
// Configuration for the node manager.
291228
ray::raylet::NodeManagerConfig node_manager_config;
292229

0 commit comments

Comments
 (0)