Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ const MOCK_ACTORS: { [actorId: string]: ActorDetail } = {
},
pid: 25321,
},
labelSelector: {
"test-label-key": "test-label-value",
},
},
ACTOR_2: {
actorId: "ACTOR_2",
Expand Down Expand Up @@ -101,6 +104,7 @@ const MOCK_ACTORS: { [actorId: string]: ActorDetail } = {
},
pid: 25322,
},
labelSelector: {},
},
};

Expand Down Expand Up @@ -184,6 +188,9 @@ describe("ActorTable", () => {

expect(within(actor1Row).getByText("ACTOR_1")).toBeInTheDocument();
expect(within(actor2Row).getByText("ACTOR_2")).toBeInTheDocument();
expect(
screen.queryByText('{ "test-label-key": "test-label-value" }'),
).toBeInTheDocument();

expect(actor2Row.compareDocumentPosition(actor1Row)).toBe(
Node.DOCUMENT_POSITION_FOLLOWING,
Expand Down
41 changes: 25 additions & 16 deletions python/ray/dashboard/client/src/components/ActorTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import Pagination from "@mui/material/Pagination";
import _ from "lodash";
import React, { useMemo, useState } from "react";
import { Link as RouterLink } from "react-router-dom";
import { CodeDialogButtonWithPreview } from "../common/CodeDialogButton";
import { DurationText, getDurationVal } from "../common/DurationText";
import { ActorLink, generateNodeLink } from "../common/links";
import {
Expand Down Expand Up @@ -319,6 +320,10 @@ const ActorTable = ({
</Typography>
),
},
{
label: "Label selector",
helpInfo: <Typography>The label selector of the actor.</Typography>,
},
{
label: "Exit detail",
helpInfo: (
Expand Down Expand Up @@ -550,6 +555,7 @@ const ActorTable = ({
gpus,
processStats,
mem,
labelSelector,
}) => (
<ExpandableTableRow
length={
Expand Down Expand Up @@ -702,23 +708,26 @@ const ActorTable = ({
</Tooltip>
</TableCell>
<TableCell align="center">
<Tooltip
title={Object.entries(requiredResources || {}).map(
([key, val]) => (
<div style={{ margin: 4 }}>
{key}: {val}
</div>
),
)}
arrow
>
<OverflowCollapsibleCell
text={Object.entries(requiredResources || {})
.map(([key, val]) => `${key}: ${val}`)
.join(", ")}
wordBreak="break-all"
{Object.entries(requiredResources || {}).length > 0 ? (
<CodeDialogButtonWithPreview
sx={{ maxWidth: 200 }}
title="Required resources"
code={JSON.stringify(requiredResources, undefined, 2)}
/>
</Tooltip>
) : (
"{}"
)}
</TableCell>
<TableCell align="center">
{Object.entries(labelSelector || {}).length > 0 ? (
<CodeDialogButtonWithPreview
sx={{ maxWidth: 200 }}
title="Label selector"
code={JSON.stringify(labelSelector, undefined, 2)}
/>
) : (
"{}"
)}
</TableCell>
<TableCell align="center">
<OverflowCollapsibleCell text={exitDetail} />
Expand Down
12 changes: 12 additions & 0 deletions python/ray/dashboard/client/src/components/TaskTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ const TaskTable = ({
{ label: "Type" },
{ label: "Placement group ID" },
{ label: "Required resources" },
{ label: "Label selector" },
];

return (
Expand Down Expand Up @@ -229,6 +230,7 @@ const TaskTable = ({
start_time_ms,
end_time_ms,
worker_id,
label_selector,
} = task;
return (
<TableRow key={task_id}>
Expand Down Expand Up @@ -312,6 +314,16 @@ const TaskTable = ({
"{}"
)}
</TableCell>
<TableCell align="center">
{Object.entries(label_selector || {}).length > 0 ? (
<CodeDialogButton
title="Label selector"
code={JSON.stringify(label_selector, undefined, 2)}
/>
) : (
"{}"
)}
</TableCell>
</TableRow>
);
})}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = {
placementGroupId: "123",
reprName: ",",
callSite: "",
labelSelector: {},
},
ACTOR_2: {
actorId: "ACTOR_2",
Expand All @@ -44,6 +45,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = {
placementGroupId: "123",
reprName: ",",
callSite: "",
labelSelector: {},
},
ACTOR_3: {
actorId: "ACTOR_3",
Expand All @@ -66,6 +68,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = {
placementGroupId: "123",
reprName: ",",
callSite: "",
labelSelector: {},
},
ACTOR_4: {
actorId: "ACTOR_4",
Expand All @@ -88,6 +91,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = {
placementGroupId: "123",
reprName: ",",
callSite: "",
labelSelector: {},
},
ACTOR_5: {
actorId: "ACTOR_5",
Expand All @@ -110,6 +114,7 @@ const MOCK_ACTORS: { [actorId: string]: Actor } = {
placementGroupId: "123",
reprName: ",",
callSite: "",
labelSelector: {},
},
};

Expand Down
1 change: 1 addition & 0 deletions python/ray/dashboard/client/src/type/actor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export type Actor = {
exitDetail: string;
reprName: string;
callSite?: string | undefined;
labelSelector: { [key: string]: string } | null;
};

export type ActorDetail = {
Expand Down
1 change: 1 addition & 0 deletions python/ray/dashboard/client/src/type/task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export type Task = {
error_message: string | null;
task_log_info: { [key: string]: string | null | number };
call_site: string | null;
label_selector: { [key: string]: string } | null;
};

export type ProfilingData = {
Expand Down
3 changes: 2 additions & 1 deletion python/ray/dashboard/modules/node/node_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def _actor_table_data_to_dict(message):
"reprName",
"placementGroupId",
"callSite",
"labelSelector",
}
light_message = {k: v for (k, v) in orig_message.items() if k in fields}
light_message["actorClass"] = orig_message["className"]
Expand All @@ -134,7 +135,7 @@ def _actor_table_data_to_dict(message):
light_message["startTime"] = int(light_message["startTime"])
light_message["endTime"] = int(light_message["endTime"])
light_message["requiredResources"] = dict(message.required_resources)

light_message["labelSelector"] = dict(message.label_selector)
return light_message


Expand Down
65 changes: 65 additions & 0 deletions python/ray/tests/test_state_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,71 @@ def verify():
print(list_tasks())


def test_list_get_tasks_label_selector(ray_start_cluster):
"""
Call chain: Driver -> caller -> callee.
Verify that the call site is captured in callee, and it contains string
"caller".
"""
cluster = ray_start_cluster
cluster.add_node(
num_cpus=2, labels={"ray.io/accelerator-type": "A100", "region": "us-west4"}
)
ray.init(address=cluster.address)
cluster.wait_for_nodes()

@ray.remote(label_selector={"region": "us-west4"})
def foo():
import time

time.sleep(5)

call_ref = foo.remote()

ray.get(call_ref)

def verify():
task = get_task(call_ref)
assert task["label_selector"] == {"region": "us-west4"}
return True

wait_for_condition(verify)
print(list_tasks())


def test_list_actor_tasks_label_selector(ray_start_cluster):
"""
Call chain: Driver -> create_actor -> (Actor, Actor.method).

Verify that the call sites are captured in both Actor and Actor.method,
and they contain string "create_actor".
"""
cluster = ray_start_cluster
cluster.add_node(num_cpus=2, labels={"region": "us-west4"})
ray.init(address=cluster.address)
cluster.wait_for_nodes()

@ray.remote(label_selector={"region": "us-west4"})
class Actor:
def method(self):
import time

time.sleep(5)

actor = Actor.remote()
ray.get(actor.method.remote())

def verify():
actors = list_actors(detail=True)
assert len(actors) == 1
actor = actors[0]
assert actor["label_selector"] == {"region": "us-west4"}
return True

wait_for_condition(verify)
print(list_actors(detail=True))


def test_pg_worker_id_tasks(shutdown_only):
ray.init(num_cpus=1)
pg = ray.util.placement_group(bundles=[{"CPU": 1}])
Expand Down
5 changes: 5 additions & 0 deletions python/ray/util/state/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,8 @@ class ActorState(StateSchema):
)
#: The call site of the actor creation.
call_site: Optional[str] = state_column(detail=True, filterable=False)
#: The label selector for the actor.
label_selector: Optional[dict] = state_column(detail=True, filterable=False)


@dataclass(init=not IS_PYDANTIC_2)
Expand Down Expand Up @@ -797,6 +799,8 @@ class TaskState(StateSchema):
is_debugger_paused: Optional[bool] = state_column(detail=True, filterable=True)
#: The call site of the task.
call_site: Optional[str] = state_column(detail=True, filterable=False)
#: The label selector for the task.
label_selector: Optional[dict] = state_column(detail=True, filterable=False)


@dataclass(init=not IS_PYDANTIC_2)
Expand Down Expand Up @@ -1617,6 +1621,7 @@ def protobuf_to_task_state_dict(message: TaskEvents) -> dict:
"parent_task_id",
"placement_group_id",
"call_site",
"label_selector",
],
),
(task_attempt, ["task_id", "attempt_number", "job_id"]),
Expand Down
3 changes: 3 additions & 0 deletions src/ray/gcs/gcs_server/gcs_actor_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,9 @@ void GcsActor::WriteActorExportEvent() const {
export_actor_data_ptr->set_repr_name(actor_table_data_.repr_name());
export_actor_data_ptr->mutable_labels()->insert(task_spec_.get()->labels().begin(),
task_spec_.get()->labels().end());
export_actor_data_ptr->mutable_label_selector()->insert(
actor_table_data_.label_selector().begin(),
actor_table_data_.label_selector().end());

RayExportEvent(export_actor_data_ptr).SendEvent();
}
Expand Down
4 changes: 4 additions & 0 deletions src/ray/gcs/gcs_server/gcs_actor_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ class GcsActor {
if (task_spec.call_site().size() > 0) {
actor_table_data_.set_call_site(task_spec.call_site());
}
if (task_spec.label_selector().size() > 0) {
actor_table_data_.mutable_label_selector()->insert(
task_spec.label_selector().begin(), task_spec.label_selector().end());
}
RefreshMetrics();
export_event_write_enabled_ = IsExportAPIEnabledActor();
}
Expand Down
10 changes: 10 additions & 0 deletions src/ray/gcs/pb_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ inline void FillTaskInfo(rpc::TaskInfoEntry *task_info,
if (task_spec.GetMessage().call_site().size() > 0) {
task_info->set_call_site(task_spec.GetMessage().call_site());
}
if (task_spec.GetMessage().label_selector().size() > 0) {
task_info->mutable_label_selector()->insert(
task_spec.GetMessage().label_selector().begin(),
task_spec.GetMessage().label_selector().end());
}
}

// Fill task_info for the export API with task specification from task_spec
Expand Down Expand Up @@ -300,6 +305,11 @@ inline void FillExportTaskInfo(rpc::ExportTaskEventData::TaskInfoEntry *task_inf
if (!pg_id.IsNil()) {
task_info->set_placement_group_id(pg_id.Binary());
}
if (task_spec.GetMessage().label_selector().size() > 0) {
task_info->mutable_label_selector()->insert(
task_spec.GetMessage().label_selector().begin(),
task_spec.GetMessage().label_selector().end());
}
}

/// Generate a RayErrorInfo from ErrorType
Expand Down
2 changes: 2 additions & 0 deletions src/ray/protobuf/common.proto
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,8 @@ message TaskInfoEntry {
// Human readable stacktrace of the task invocation, or actor creation. The exact data
// format depends on the language. Only populated if the flag is enabled.
optional string call_site = 27;
// The key-value label constraints of the node to schedule this task or actor on.
map<string, string> label_selector = 28;
}

message TaskAttempt {
Expand Down
2 changes: 2 additions & 0 deletions src/ray/protobuf/export_actor_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,6 @@ message ExportActorData {
string repr_name = 14;
// The key-value labels for task and actor.
map<string, string> labels = 15;
// The label selector for the actor.
map<string, string> label_selector = 16;
}
2 changes: 2 additions & 0 deletions src/ray/protobuf/export_task_event.proto
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ message ExportTaskEventData {
optional bytes placement_group_id = 9;
// The key-value labels for task and actor.
map<string, string> labels = 10;
// The key-value label constraints of the node to schedule this task or actor on.
map<string, string> label_selector = 11;
}

message ProfileEventEntry {
Expand Down
2 changes: 2 additions & 0 deletions src/ray/protobuf/gcs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ message ActorTableData {
// format depends on the language. Only populated if
// `RAY_record_task_actor_creation_sites` is set to `true`.
optional string call_site = 34;
// The label selector of the actor.
map<string, string> label_selector = 35;
}

message ErrorTableData {
Expand Down