diff --git a/python/ray/dashboard/modules/reporter/reporter_agent.py b/python/ray/dashboard/modules/reporter/reporter_agent.py index fff076e4cf51..ee493539a7a6 100644 --- a/python/ray/dashboard/modules/reporter/reporter_agent.py +++ b/python/ray/dashboard/modules/reporter/reporter_agent.py @@ -1312,8 +1312,8 @@ def generate_worker_stats_record(self, worker_stats: List[dict]) -> List[Record] for stat in worker_stats: cmdline = stat.get("cmdline") - # All ray processes start with ray:: - if cmdline and len(cmdline) > 0 and cmdline[0].startswith("ray::"): + # collect both worker and driver stats + if cmdline: proc_name = cmdline[0] proc_name_to_stats[proc_name].append(stat) @@ -1323,9 +1323,6 @@ def generate_worker_stats_record(self, worker_stats: List[dict]) -> List[Record] or stat.get("gpu_utilization", 0) > 0 ): gpu_worker_proc_names.add(proc_name) - # We will lose worker stats that don't follow the ray worker proc - # naming convention. Theoretically, there should be no data loss here - # because all worker processes are renamed to ray::. records = [] diff --git a/python/ray/dashboard/modules/reporter/tests/test_reporter.py b/python/ray/dashboard/modules/reporter/tests/test_reporter.py index f281653d7ab6..5e5686f25c70 100644 --- a/python/ray/dashboard/modules/reporter/tests/test_reporter.py +++ b/python/ray/dashboard/modules/reporter/tests/test_reporter.py @@ -844,37 +844,6 @@ def verify_metrics_values( 0, ) - """ - Verify worker names are only reported when they start with ray::. - """ - # Verify if the command doesn't start with ray::, metrics are not reported. - unknown_stats = { - "memory_info": Bunch(rss=55934976, vms=7026937856, pfaults=15354, pageins=0), - "memory_full_info": Bunch( - uss=51428381, rss=55934976, vms=7026937856, pfaults=15354, pageins=0 - ), - "cpu_percent": 6.0, - "num_fds": 8, - "cmdline": ["python mock", "", "", "", "", "", "", "", "", "", "", ""], - "create_time": 1614826391.338613, - "pid": 7175, - "cpu_times": Bunch( - user=0.607899328, - system=0.274044032, - children_user=0.0, - children_system=0.0, - ), - } - test_stats["workers"] = [idle_stats, unknown_stats] - - records = agent._to_records(test_stats, cluster_stats) - uss_records, cpu_records, num_fds_records = get_uss_and_cpu_and_num_fds_records( - records - ) - assert "python mock" not in uss_records - assert "python mock" not in cpu_records - assert "python mock" not in num_fds_records - stats_payload = agent._generate_stats_payload(test_stats) assert stats_payload is not None assert isinstance(stats_payload, str) diff --git a/python/ray/tests/test_metrics_agent.py b/python/ray/tests/test_metrics_agent.py index 4f3814a44a01..2b2829821818 100644 --- a/python/ray/tests/test_metrics_agent.py +++ b/python/ray/tests/test_metrics_agent.py @@ -439,7 +439,7 @@ def verify_node_metrics(): samples = avail_metrics[metric] for sample in samples: components.add(sample.labels["Component"]) - assert components == {"gcs", "raylet", "agent", "ray::IDLE"} + assert components == {"gcs", "raylet", "agent", "ray::IDLE", sys.executable} avail_metrics = set(avail_metrics) @@ -886,6 +886,7 @@ def verify_components(): components.add(sample.labels["Component"]) print(components) assert { + sys.executable, # driver process "raylet", "agent", "ray::Actor",