From 3abb8551ee5510980d338f5a777385c6e055dc1b Mon Sep 17 00:00:00 2001 From: chrisfellowes Date: Thu, 30 Oct 2025 00:30:44 +0000 Subject: [PATCH 01/10] add entrypoint log for jobs Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/job_supervisor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ray/dashboard/modules/job/job_supervisor.py b/python/ray/dashboard/modules/job/job_supervisor.py index 60766ee93935..2a9517d767f7 100644 --- a/python/ray/dashboard/modules/job/job_supervisor.py +++ b/python/ray/dashboard/modules/job/job_supervisor.py @@ -173,6 +173,7 @@ def _exec_entrypoint(self, env: dict, logs_path: str) -> subprocess.Popen: # Open in append mode to avoid overwriting runtime_env setup logs for the # supervisor actor, which are also written to the same file. with open(logs_path, "a") as logs_file: + logs_file.write(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") child_process = subprocess.Popen( self._entrypoint, shell=True, From daa1fdae91f3bda907747d3f703d052a912052bb Mon Sep 17 00:00:00 2001 From: chrisfellowes Date: Thu, 30 Oct 2025 00:46:43 +0000 Subject: [PATCH 02/10] fix test for entrypoint log Signed-off-by: chrisfellowes Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/tests/test_job_manager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index 6e886adf42da..4696275640d0 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -335,8 +335,9 @@ async def test_runtime_env_setup_logged_to_job_driver_logs( gcs_client = ray._private.worker.global_worker.gcs_client job_manager = JobManager(gcs_client, tmp_path) + entrypoint = "echo hello 1" job_id = await job_manager.submit_job( - entrypoint="echo hello 1", submission_id="test_runtime_env_setup_logs" + entrypoint=entrypoint, submission_id="test_runtime_env_setup_logs" ) await async_wait_for_condition( check_job_succeeded, job_manager=job_manager, job_id=job_id @@ -347,10 +348,10 @@ async def test_runtime_env_setup_logged_to_job_driver_logs( ray._private.worker._global_node.get_logs_dir_path(), f"job-driver-{job_id}.log", ) - start_message = "Runtime env is setting up." with open(job_driver_log_path, "r") as f: logs = f.read() - assert start_message in logs + assert "Runtime env is setting up." in logs + assert f"Running entrypoint for job {job_id}: {entrypoint}" in logs @pytest.mark.asyncio From 5fd357d5e04c044e56d6272e1d1cdc8030182616 Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Wed, 29 Oct 2025 18:22:34 -0700 Subject: [PATCH 03/10] use logger Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/job_supervisor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ray/dashboard/modules/job/job_supervisor.py b/python/ray/dashboard/modules/job/job_supervisor.py index 2a9517d767f7..fe81a43cdd8b 100644 --- a/python/ray/dashboard/modules/job/job_supervisor.py +++ b/python/ray/dashboard/modules/job/job_supervisor.py @@ -173,7 +173,7 @@ def _exec_entrypoint(self, env: dict, logs_path: str) -> subprocess.Popen: # Open in append mode to avoid overwriting runtime_env setup logs for the # supervisor actor, which are also written to the same file. with open(logs_path, "a") as logs_file: - logs_file.write(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") + self._logger.info(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") child_process = subprocess.Popen( self._entrypoint, shell=True, From b7afa9ad986bf915d03b9e689112fb07599e2091 Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Thu, 30 Oct 2025 07:47:16 -0700 Subject: [PATCH 04/10] use correct log file Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/job_supervisor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ray/dashboard/modules/job/job_supervisor.py b/python/ray/dashboard/modules/job/job_supervisor.py index fe81a43cdd8b..2a9517d767f7 100644 --- a/python/ray/dashboard/modules/job/job_supervisor.py +++ b/python/ray/dashboard/modules/job/job_supervisor.py @@ -173,7 +173,7 @@ def _exec_entrypoint(self, env: dict, logs_path: str) -> subprocess.Popen: # Open in append mode to avoid overwriting runtime_env setup logs for the # supervisor actor, which are also written to the same file. with open(logs_path, "a") as logs_file: - self._logger.info(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") + logs_file.write(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") child_process = subprocess.Popen( self._entrypoint, shell=True, From dd06aed8d712da3863a0c55b2ffad9e2c6eec17a Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Thu, 30 Oct 2025 12:39:30 -0700 Subject: [PATCH 05/10] fix linter Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/job_supervisor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ray/dashboard/modules/job/job_supervisor.py b/python/ray/dashboard/modules/job/job_supervisor.py index 2a9517d767f7..94f03be9b0af 100644 --- a/python/ray/dashboard/modules/job/job_supervisor.py +++ b/python/ray/dashboard/modules/job/job_supervisor.py @@ -173,7 +173,9 @@ def _exec_entrypoint(self, env: dict, logs_path: str) -> subprocess.Popen: # Open in append mode to avoid overwriting runtime_env setup logs for the # supervisor actor, which are also written to the same file. with open(logs_path, "a") as logs_file: - logs_file.write(f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n") + logs_file.write( + f"Running entrypoint for job {self._job_id}: {self._entrypoint}\n" + ) child_process = subprocess.Popen( self._entrypoint, shell=True, From baa2942de90f2e0a0d5f65e31a6b6cbf45432025 Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Fri, 31 Oct 2025 09:03:01 -0500 Subject: [PATCH 06/10] fix Signed-off-by: Edward Oakes --- .../modules/job/tests/test_cli_integration.py | 10 ++++++++-- .../ray/dashboard/modules/job/tests/test_job_agent.py | 2 +- .../dashboard/modules/job/tests/test_job_manager.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/ray/dashboard/modules/job/tests/test_cli_integration.py b/python/ray/dashboard/modules/job/tests/test_cli_integration.py index 872a1c823d57..e837053ba627 100644 --- a/python/ray/dashboard/modules/job/tests/test_cli_integration.py +++ b/python/ray/dashboard/modules/job/tests/test_cli_integration.py @@ -186,7 +186,10 @@ def test_basic_submit(self, ray_start_stop): """Should tail logs and wait for process to exit.""" cmd = "sleep 1 && echo hello && sleep 1 && echo hello" stdout, _ = _run_cmd(f"ray job submit -- bash -c '{cmd}'") - assert stdout.count("hello") == 2 + + # 'hello' should appear four times: twice when we print the entrypoint, then + # two more times in the logs from the `echo`. + assert stdout.count("hello") == 4 assert "succeeded" in stdout def test_submit_no_wait(self, ray_start_stop): @@ -200,7 +203,10 @@ def test_submit_with_logs_instant_job(self, ray_start_stop): """Should exit immediately and print logs even if job returns instantly.""" cmd = "echo hello" stdout, _ = _run_cmd(f"ray job submit -- bash -c '{cmd}'") - assert "hello" in stdout + + # 'hello' should appear twice: once when we print the entrypoint, then + # again from the `echo`. + assert stdout.count("hello") == 2 def test_multiple_ray_init(self, ray_start_stop): cmd = ( diff --git a/python/ray/dashboard/modules/job/tests/test_job_agent.py b/python/ray/dashboard/modules/job/tests/test_job_agent.py index 963ef0b5b078..f179a78aad3a 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_agent.py +++ b/python/ray/dashboard/modules/job/tests/test_job_agent.py @@ -428,7 +428,7 @@ async def test_tail_job_logs_with_echo(job_sdk_client): async for lines in agent_client.tail_job_logs(job_id): print(lines, end="") for line in lines.strip().split("\n"): - if "Runtime env is setting up." in line: + if "Runtime env is setting up." in line or "Running entrypoint for job" in line: continue assert line.split(" ") == ["Hello", str(i)] i += 1 diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index 4696275640d0..b4b6517cc322 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -1041,7 +1041,7 @@ async def _tail_and_assert_logs( i = 0 async for lines in job_manager.tail_job_logs(job_id): assert all( - s == expected_log or "Runtime env" in s + s == expected_log or "Runtime env" in s or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) print(lines, end="") From 535059facc2f880554d6d0c4bffa7faecf059a05 Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Fri, 31 Oct 2025 09:03:56 -0500 Subject: [PATCH 07/10] fix lint Signed-off-by: Edward Oakes --- python/ray/dashboard/modules/job/tests/test_job_agent.py | 5 ++++- python/ray/dashboard/modules/job/tests/test_job_manager.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/ray/dashboard/modules/job/tests/test_job_agent.py b/python/ray/dashboard/modules/job/tests/test_job_agent.py index f179a78aad3a..42259aad1d47 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_agent.py +++ b/python/ray/dashboard/modules/job/tests/test_job_agent.py @@ -428,7 +428,10 @@ async def test_tail_job_logs_with_echo(job_sdk_client): async for lines in agent_client.tail_job_logs(job_id): print(lines, end="") for line in lines.strip().split("\n"): - if "Runtime env is setting up." in line or "Running entrypoint for job" in line: + if ( + "Runtime env is setting up." in line + or "Running entrypoint for job" in line + ): continue assert line.split(" ") == ["Hello", str(i)] i += 1 diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index b4b6517cc322..f3e2d186cc3c 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -1041,7 +1041,9 @@ async def _tail_and_assert_logs( i = 0 async for lines in job_manager.tail_job_logs(job_id): assert all( - s == expected_log or "Runtime env" in s or "Running entrypoint for job" in s + s == expected_log + or "Runtime env" in s + or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) print(lines, end="") From 476cd6dab8ab2d0a7fd224f7716fb07938322b4c Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Fri, 31 Oct 2025 09:02:12 -0700 Subject: [PATCH 08/10] fix test Signed-off-by: Chris Fellowes --- .../ray/dashboard/modules/job/tests/test_job_manager.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index f3e2d186cc3c..3e1ccc7a971c 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -1083,7 +1083,9 @@ async def test_successful_job(self, job_manager): async for lines in job_manager.tail_job_logs(job_id): assert all( - s == "Waiting..." or "Runtime env" in s + s == "Waiting..." + or "Runtime env" + or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) print(lines, end="") @@ -1107,7 +1109,9 @@ async def test_failed_job(self, job_manager): async for lines in job_manager.tail_job_logs(job_id): assert all( - s == "Waiting..." or "Runtime env" in s + s == "Waiting..." + or "Runtime env" + or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) print(lines, end="") From bb4c75813a7a78b5664739ed43f57f8a8f0d75b6 Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Fri, 31 Oct 2025 09:23:09 -0700 Subject: [PATCH 09/10] fix test Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/tests/test_job_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index 3e1ccc7a971c..bf814ddfe33a 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -1084,7 +1084,7 @@ async def test_successful_job(self, job_manager): async for lines in job_manager.tail_job_logs(job_id): assert all( s == "Waiting..." - or "Runtime env" + or "Runtime env" in s or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) @@ -1110,7 +1110,7 @@ async def test_failed_job(self, job_manager): async for lines in job_manager.tail_job_logs(job_id): assert all( s == "Waiting..." - or "Runtime env" + or "Runtime env" in s or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) From 2fbd61278ea1d79b4b5da176b2a7bcfa12e3b3d3 Mon Sep 17 00:00:00 2001 From: Chris Fellowes Date: Fri, 31 Oct 2025 11:08:59 -0700 Subject: [PATCH 10/10] fix test Signed-off-by: Chris Fellowes --- python/ray/dashboard/modules/job/tests/test_job_manager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ray/dashboard/modules/job/tests/test_job_manager.py b/python/ray/dashboard/modules/job/tests/test_job_manager.py index bf814ddfe33a..c27ff903319b 100644 --- a/python/ray/dashboard/modules/job/tests/test_job_manager.py +++ b/python/ray/dashboard/modules/job/tests/test_job_manager.py @@ -1140,7 +1140,10 @@ async def test_stopped_job(self, job_manager): async for lines in job_manager.tail_job_logs(job_id): assert all( - s == "Waiting..." or s == "Terminated" or "Runtime env" in s + s == "Waiting..." + or s == "Terminated" + or "Runtime env" in s + or "Running entrypoint for job" in s for s in lines.strip().split("\n") ) print(lines, end="")