@@ -224,14 +224,23 @@ def cleanup_test_suite():
224224 ) as base_subtree_control_file :
225225 base_subtree_control_file .write ("-cpu -memory" )
226226 base_subtree_control_file .flush ()
227- # 2) Move processes back into the leaf cgroup.
227+ # 2) Move processes back into the root cgroup.
228228 with open (_ROOT_CGROUP / "cgroup.procs" , "w" ) as root_procs_file , open (
229229 _LEAF_GROUP / "cgroup.procs" , "r"
230230 ) as leaf_procs_file :
231231 leaf_cgroup_lines = leaf_procs_file .readlines ()
232232 for line in leaf_cgroup_lines :
233233 root_procs_file .write (line .strip ())
234234 root_procs_file .flush ()
235+ # 3) Move the current process back into the _ROOT_CGROUP
236+ with open (_ROOT_CGROUP / "cgroup.procs" , "w" ) as root_procs_file , open (
237+ _TEST_CGROUP / "cgroup.procs" , "r"
238+ ) as test_procs_file :
239+ test_cgroup_lines = test_procs_file .readlines ()
240+ for line in test_cgroup_lines :
241+ root_procs_file .write (line .strip ())
242+ root_procs_file .flush ()
243+
235244 # 3) Delete the cgroups.
236245 os .rmdir (_LEAF_GROUP )
237246 os .rmdir (_TEST_CGROUP )
@@ -431,9 +440,6 @@ def test_ray_cli_start_resource_isolation_creates_cgroup_hierarchy_and_cleans_up
431440 assert result .exit_code == 0
432441 resource_isolation_config .add_object_store_memory (object_store_memory )
433442 assert_cgroup_hierarchy_exists_for_node (node_id , resource_isolation_config )
434- assert_system_processes_are_in_system_cgroup (
435- node_id , resource_isolation_config , len (_EXPECTED_SYSTEM_PROCESSES_RAY_START )
436- )
437443
438444 @ray .remote (num_cpus = 1 )
439445 class Actor :
@@ -447,12 +453,17 @@ def get_pid(self):
447453 for _ in range (num_cpus ):
448454 actor_refs .append (Actor .remote ())
449455 worker_pids = set ()
456+ worker_pids .add (str (os .getpid ()))
450457 for actor in actor_refs :
451458 worker_pids .add (str (ray .get (actor .get_pid .remote ())))
459+ assert_system_processes_are_in_system_cgroup (
460+ node_id , resource_isolation_config , len (_EXPECTED_SYSTEM_PROCESSES_RAY_START )
461+ )
452462 assert_worker_processes_are_in_workers_cgroup (
453463 node_id , resource_isolation_config , worker_pids
454464 )
455465 runner .invoke (scripts .stop )
466+
456467 assert_cgroup_hierarchy_cleaned_up_for_node (node_id , resource_isolation_config )
457468
458469
@@ -492,9 +503,6 @@ def test_ray_init_resource_isolation_creates_cgroup_hierarchy_and_cleans_up(
492503 object_store_memory = object_store_memory ,
493504 )
494505 assert_cgroup_hierarchy_exists_for_node (node_id , resource_isolation_config )
495- assert_system_processes_are_in_system_cgroup (
496- node_id , resource_isolation_config , len (_EXPECTED_SYSTEM_PROCESSES_RAY_INIT )
497- )
498506
499507 @ray .remote (num_cpus = 1 )
500508 class Actor :
@@ -508,8 +516,12 @@ def get_pid(self):
508516 for _ in range (num_cpus ):
509517 actor_refs .append (Actor .remote ())
510518 worker_pids = set ()
519+ worker_pids .add (str (os .getpid ()))
511520 for actor in actor_refs :
512521 worker_pids .add (str (ray .get (actor .get_pid .remote ())))
522+ assert_system_processes_are_in_system_cgroup (
523+ node_id , resource_isolation_config , len (_EXPECTED_SYSTEM_PROCESSES_RAY_INIT )
524+ )
513525 assert_worker_processes_are_in_workers_cgroup (
514526 node_id , resource_isolation_config , worker_pids
515527 )
0 commit comments