diff --git a/python/ray/dashboard/client/src/components/ActorTable.component.test.tsx b/python/ray/dashboard/client/src/components/ActorTable.component.test.tsx index f7487a4edd8b..cebe9825ed7a 100644 --- a/python/ray/dashboard/client/src/components/ActorTable.component.test.tsx +++ b/python/ray/dashboard/client/src/components/ActorTable.component.test.tsx @@ -53,6 +53,9 @@ const MOCK_ACTORS: { [actorId: string]: ActorDetail } = { }, pid: 25321, }, + labelSelector: { + "test-label-key": "test-label-value", + }, }, ACTOR_2: { actorId: "ACTOR_2", @@ -101,6 +104,7 @@ const MOCK_ACTORS: { [actorId: string]: ActorDetail } = { }, pid: 25322, }, + labelSelector: {}, }, }; @@ -184,6 +188,9 @@ describe("ActorTable", () => { expect(within(actor1Row).getByText("ACTOR_1")).toBeInTheDocument(); expect(within(actor2Row).getByText("ACTOR_2")).toBeInTheDocument(); + expect( + screen.queryByText('{ "test-label-key": "test-label-value" }'), + ).toBeInTheDocument(); expect(actor2Row.compareDocumentPosition(actor1Row)).toBe( Node.DOCUMENT_POSITION_FOLLOWING, diff --git a/python/ray/dashboard/client/src/components/ActorTable.tsx b/python/ray/dashboard/client/src/components/ActorTable.tsx index 38453e7af722..21c429a4b4d1 100644 --- a/python/ray/dashboard/client/src/components/ActorTable.tsx +++ b/python/ray/dashboard/client/src/components/ActorTable.tsx @@ -20,6 +20,7 @@ import Pagination from "@mui/material/Pagination"; import _ from "lodash"; import React, { useMemo, useState } from "react"; import { Link as RouterLink } from "react-router-dom"; +import { CodeDialogButtonWithPreview } from "../common/CodeDialogButton"; import { DurationText, getDurationVal } from "../common/DurationText"; import { ActorLink, generateNodeLink } from "../common/links"; import { @@ -319,6 +320,10 @@ const ActorTable = ({ ), }, + { + label: "Label selector", + helpInfo: The label selector of the actor., + }, { label: "Exit detail", helpInfo: ( @@ -550,6 +555,7 @@ const ActorTable = ({ gpus, processStats, mem, + labelSelector, }) => ( - ( -
- {key}: {val} -
- ), - )} - arrow - > - `${key}: ${val}`) - .join(", ")} - wordBreak="break-all" + {Object.entries(requiredResources || {}).length > 0 ? ( + -
+ ) : ( + "{}" + )} +
+ + {Object.entries(labelSelector || {}).length > 0 ? ( + + ) : ( + "{}" + )} diff --git a/python/ray/dashboard/client/src/components/TaskTable.tsx b/python/ray/dashboard/client/src/components/TaskTable.tsx index c9308611ed4c..0e3bbc3d1e19 100644 --- a/python/ray/dashboard/client/src/components/TaskTable.tsx +++ b/python/ray/dashboard/client/src/components/TaskTable.tsx @@ -94,6 +94,7 @@ const TaskTable = ({ { label: "Type" }, { label: "Placement group ID" }, { label: "Required resources" }, + { label: "Label selector" }, ]; return ( @@ -229,6 +230,7 @@ const TaskTable = ({ start_time_ms, end_time_ms, worker_id, + label_selector, } = task; return ( @@ -312,6 +314,16 @@ const TaskTable = ({ "{}" )} + + {Object.entries(label_selector || {}).length > 0 ? ( + + ) : ( + "{}" + )} + ); })} diff --git a/python/ray/dashboard/client/src/pages/actor/hook/mockedUseActorList.ts b/python/ray/dashboard/client/src/pages/actor/hook/mockedUseActorList.ts index ec496818f88e..d4bb6061bd7d 100644 --- a/python/ray/dashboard/client/src/pages/actor/hook/mockedUseActorList.ts +++ b/python/ray/dashboard/client/src/pages/actor/hook/mockedUseActorList.ts @@ -22,6 +22,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = { placementGroupId: "123", reprName: ",", callSite: "", + labelSelector: {}, }, ACTOR_2: { actorId: "ACTOR_2", @@ -44,6 +45,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = { placementGroupId: "123", reprName: ",", callSite: "", + labelSelector: {}, }, ACTOR_3: { actorId: "ACTOR_3", @@ -66,6 +68,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = { placementGroupId: "123", reprName: ",", callSite: "", + labelSelector: {}, }, ACTOR_4: { actorId: "ACTOR_4", @@ -88,6 +91,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = { placementGroupId: "123", reprName: ",", callSite: "", + labelSelector: {}, }, ACTOR_5: { actorId: "ACTOR_5", @@ -110,6 +114,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = { placementGroupId: "123", reprName: ",", callSite: "", + labelSelector: {}, }, }; diff --git a/python/ray/dashboard/client/src/type/actor.ts b/python/ray/dashboard/client/src/type/actor.ts index 87cb0b9ad8e9..52c8527ab94e 100644 --- a/python/ray/dashboard/client/src/type/actor.ts +++ b/python/ray/dashboard/client/src/type/actor.ts @@ -33,6 +33,7 @@ export type Actor = { exitDetail: string; reprName: string; callSite?: string | undefined; + labelSelector: { [key: string]: string } | null; }; export type ActorDetail = { diff --git a/python/ray/dashboard/client/src/type/task.ts b/python/ray/dashboard/client/src/type/task.ts index b4e738ab7cad..95c80ad86b90 100644 --- a/python/ray/dashboard/client/src/type/task.ts +++ b/python/ray/dashboard/client/src/type/task.ts @@ -46,6 +46,7 @@ export type Task = { error_message: string | null; task_log_info: { [key: string]: string | null | number }; call_site: string | null; + label_selector: { [key: string]: string } | null; }; export type ProfilingData = { diff --git a/python/ray/dashboard/modules/node/node_head.py b/python/ray/dashboard/modules/node/node_head.py index fb2949bf3f8d..44ab1ef2f289 100644 --- a/python/ray/dashboard/modules/node/node_head.py +++ b/python/ray/dashboard/modules/node/node_head.py @@ -114,6 +114,7 @@ def _actor_table_data_to_dict(message): "reprName", "placementGroupId", "callSite", + "labelSelector", } light_message = {k: v for (k, v) in orig_message.items() if k in fields} light_message["actorClass"] = orig_message["className"] @@ -134,7 +135,7 @@ def _actor_table_data_to_dict(message): light_message["startTime"] = int(light_message["startTime"]) light_message["endTime"] = int(light_message["endTime"]) light_message["requiredResources"] = dict(message.required_resources) - + light_message["labelSelector"] = dict(message.label_selector) return light_message diff --git a/python/ray/tests/test_state_api.py b/python/ray/tests/test_state_api.py index 0806c49ce203..8c74ece6be0b 100644 --- a/python/ray/tests/test_state_api.py +++ b/python/ray/tests/test_state_api.py @@ -2548,6 +2548,71 @@ def verify(): print(list_tasks()) +def test_list_get_tasks_label_selector(ray_start_cluster): + """ + Call chain: Driver -> caller -> callee. + Verify that the call site is captured in callee, and it contains string + "caller". + """ + cluster = ray_start_cluster + cluster.add_node( + num_cpus=2, labels={"ray.io/accelerator-type": "A100", "region": "us-west4"} + ) + ray.init(address=cluster.address) + cluster.wait_for_nodes() + + @ray.remote(label_selector={"region": "us-west4"}) + def foo(): + import time + + time.sleep(5) + + call_ref = foo.remote() + + ray.get(call_ref) + + def verify(): + task = get_task(call_ref) + assert task["label_selector"] == {"region": "us-west4"} + return True + + wait_for_condition(verify) + print(list_tasks()) + + +def test_list_actor_tasks_label_selector(ray_start_cluster): + """ + Call chain: Driver -> create_actor -> (Actor, Actor.method). + + Verify that the call sites are captured in both Actor and Actor.method, + and they contain string "create_actor". + """ + cluster = ray_start_cluster + cluster.add_node(num_cpus=2, labels={"region": "us-west4"}) + ray.init(address=cluster.address) + cluster.wait_for_nodes() + + @ray.remote(label_selector={"region": "us-west4"}) + class Actor: + def method(self): + import time + + time.sleep(5) + + actor = Actor.remote() + ray.get(actor.method.remote()) + + def verify(): + actors = list_actors(detail=True) + assert len(actors) == 1 + actor = actors[0] + assert actor["label_selector"] == {"region": "us-west4"} + return True + + wait_for_condition(verify) + print(list_actors(detail=True)) + + def test_pg_worker_id_tasks(shutdown_only): ray.init(num_cpus=1) pg = ray.util.placement_group(bundles=[{"CPU": 1}]) diff --git a/python/ray/util/state/common.py b/python/ray/util/state/common.py index 31f132594440..545a87c6048b 100644 --- a/python/ray/util/state/common.py +++ b/python/ray/util/state/common.py @@ -502,6 +502,8 @@ class ActorState(StateSchema): ) #: The call site of the actor creation. call_site: Optional[str] = state_column(detail=True, filterable=False) + #: The label selector for the actor. + label_selector: Optional[dict] = state_column(detail=True, filterable=False) @dataclass(init=not IS_PYDANTIC_2) @@ -797,6 +799,8 @@ class TaskState(StateSchema): is_debugger_paused: Optional[bool] = state_column(detail=True, filterable=True) #: The call site of the task. call_site: Optional[str] = state_column(detail=True, filterable=False) + #: The label selector for the task. + label_selector: Optional[dict] = state_column(detail=True, filterable=False) @dataclass(init=not IS_PYDANTIC_2) @@ -1617,6 +1621,7 @@ def protobuf_to_task_state_dict(message: TaskEvents) -> dict: "parent_task_id", "placement_group_id", "call_site", + "label_selector", ], ), (task_attempt, ["task_id", "attempt_number", "job_id"]), diff --git a/src/ray/gcs/gcs_server/gcs_actor_manager.cc b/src/ray/gcs/gcs_server/gcs_actor_manager.cc index ac59673c1caa..6d21c44c7292 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_actor_manager.cc @@ -274,6 +274,9 @@ void GcsActor::WriteActorExportEvent() const { export_actor_data_ptr->set_repr_name(actor_table_data_.repr_name()); export_actor_data_ptr->mutable_labels()->insert(task_spec_.get()->labels().begin(), task_spec_.get()->labels().end()); + export_actor_data_ptr->mutable_label_selector()->insert( + actor_table_data_.label_selector().begin(), + actor_table_data_.label_selector().end()); RayExportEvent(export_actor_data_ptr).SendEvent(); } diff --git a/src/ray/gcs/gcs_server/gcs_actor_manager.h b/src/ray/gcs/gcs_server/gcs_actor_manager.h index 2db4de232d37..ce1a539750c8 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_manager.h +++ b/src/ray/gcs/gcs_server/gcs_actor_manager.h @@ -145,6 +145,10 @@ class GcsActor { if (task_spec.call_site().size() > 0) { actor_table_data_.set_call_site(task_spec.call_site()); } + if (task_spec.label_selector().size() > 0) { + actor_table_data_.mutable_label_selector()->insert( + task_spec.label_selector().begin(), task_spec.label_selector().end()); + } RefreshMetrics(); export_event_write_enabled_ = IsExportAPIEnabledActor(); } diff --git a/src/ray/gcs/pb_util.h b/src/ray/gcs/pb_util.h index 59bd9c62ddea..045f80771e42 100644 --- a/src/ray/gcs/pb_util.h +++ b/src/ray/gcs/pb_util.h @@ -245,6 +245,11 @@ inline void FillTaskInfo(rpc::TaskInfoEntry *task_info, if (task_spec.GetMessage().call_site().size() > 0) { task_info->set_call_site(task_spec.GetMessage().call_site()); } + if (task_spec.GetMessage().label_selector().size() > 0) { + task_info->mutable_label_selector()->insert( + task_spec.GetMessage().label_selector().begin(), + task_spec.GetMessage().label_selector().end()); + } } // Fill task_info for the export API with task specification from task_spec @@ -300,6 +305,11 @@ inline void FillExportTaskInfo(rpc::ExportTaskEventData::TaskInfoEntry *task_inf if (!pg_id.IsNil()) { task_info->set_placement_group_id(pg_id.Binary()); } + if (task_spec.GetMessage().label_selector().size() > 0) { + task_info->mutable_label_selector()->insert( + task_spec.GetMessage().label_selector().begin(), + task_spec.GetMessage().label_selector().end()); + } } /// Generate a RayErrorInfo from ErrorType diff --git a/src/ray/protobuf/common.proto b/src/ray/protobuf/common.proto index e8e779e9e8f1..36b39cde8a91 100644 --- a/src/ray/protobuf/common.proto +++ b/src/ray/protobuf/common.proto @@ -606,6 +606,8 @@ message TaskInfoEntry { // Human readable stacktrace of the task invocation, or actor creation. The exact data // format depends on the language. Only populated if the flag is enabled. optional string call_site = 27; + // The key-value label constraints of the node to schedule this task or actor on. + map label_selector = 28; } message TaskAttempt { diff --git a/src/ray/protobuf/export_actor_data.proto b/src/ray/protobuf/export_actor_data.proto index ea0a3068dafe..b1e0f5467bb3 100644 --- a/src/ray/protobuf/export_actor_data.proto +++ b/src/ray/protobuf/export_actor_data.proto @@ -75,4 +75,6 @@ message ExportActorData { string repr_name = 14; // The key-value labels for task and actor. map labels = 15; + // The label selector for the actor. + map label_selector = 16; } diff --git a/src/ray/protobuf/export_task_event.proto b/src/ray/protobuf/export_task_event.proto index 5a20fd45c7dc..ce594dc29c43 100644 --- a/src/ray/protobuf/export_task_event.proto +++ b/src/ray/protobuf/export_task_event.proto @@ -88,6 +88,8 @@ message ExportTaskEventData { optional bytes placement_group_id = 9; // The key-value labels for task and actor. map labels = 10; + // The key-value label constraints of the node to schedule this task or actor on. + map label_selector = 11; } message ProfileEventEntry { diff --git a/src/ray/protobuf/gcs.proto b/src/ray/protobuf/gcs.proto index e7805db82317..f3acd695f704 100644 --- a/src/ray/protobuf/gcs.proto +++ b/src/ray/protobuf/gcs.proto @@ -157,6 +157,8 @@ message ActorTableData { // format depends on the language. Only populated if // `RAY_record_task_actor_creation_sites` is set to `true`. optional string call_site = 34; + // The label selector of the actor. + map label_selector = 35; } message ErrorTableData {