From e18b8cf12fe4a281979b77d9c69a13cfb234d074 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Sat, 16 Aug 2025 04:13:09 -0700 Subject: [PATCH 1/3] allow overlapping sandbox with run_cmd Signed-off-by: SeanNaren --- nemo_skills/pipeline/utils/exp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nemo_skills/pipeline/utils/exp.py b/nemo_skills/pipeline/utils/exp.py index df992b6a09..0e6313027c 100644 --- a/nemo_skills/pipeline/utils/exp.py +++ b/nemo_skills/pipeline/utils/exp.py @@ -187,7 +187,6 @@ def get_executor( ipc_mode="host", volumes=mounts, ntasks_per_node=1, - privileged=bool(os.getenv('NEMO_SKILLS_PRIVILEGED_DOCKER', 0)), # locally we are always asking for all GPUs to be able to select a subset with CUDA_VISIBLE_DEVICES num_gpus=-1 if gpus_per_node is not None else None, network="host", @@ -471,7 +470,7 @@ def add_task( heterogeneous=heterogeneous, het_group=het_group, total_het_groups=total_het_groups, - overlap=server_config is not None, + overlap=(server_config is not None) or with_sandbox, with_ray=with_ray, ) ) @@ -509,7 +508,7 @@ def add_task( heterogeneous=heterogeneous, het_group=het_group, total_het_groups=total_het_groups, - overlap=server_config is not None, + overlap=True, with_ray=with_ray, ) executors.append(sandbox_executor) @@ -643,5 +642,6 @@ def get_nsight_cmd(profile_step_range): f'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/lib/x86_64-linux-gnu" && ' f"export NRL_NSYS_PROFILE_STEP_RANGE={profile_step_range} && " 'export NRL_NSYS_WORKER_PATTERNS="*policy*,*vllm*" && ' - ) - return cmd + + ) + return cmd \ No newline at end of file From 5e9074fe34dd5d70881461ed4d69ab43066d5a82 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Sat, 16 Aug 2025 04:15:28 -0700 Subject: [PATCH 2/3] fix Signed-off-by: SeanNaren --- nemo_skills/pipeline/utils/exp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo_skills/pipeline/utils/exp.py b/nemo_skills/pipeline/utils/exp.py index 0e6313027c..7de347b6d9 100644 --- a/nemo_skills/pipeline/utils/exp.py +++ b/nemo_skills/pipeline/utils/exp.py @@ -187,6 +187,7 @@ def get_executor( ipc_mode="host", volumes=mounts, ntasks_per_node=1, + privileged=bool(os.getenv('NEMO_SKILLS_PRIVILEGED_DOCKER', 0)), # locally we are always asking for all GPUs to be able to select a subset with CUDA_VISIBLE_DEVICES num_gpus=-1 if gpus_per_node is not None else None, network="host", @@ -642,6 +643,5 @@ def get_nsight_cmd(profile_step_range): f'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/lib/x86_64-linux-gnu" && ' f"export NRL_NSYS_PROFILE_STEP_RANGE={profile_step_range} && " 'export NRL_NSYS_WORKER_PATTERNS="*policy*,*vllm*" && ' - - ) + ) return cmd \ No newline at end of file From ed67f8392dc5fd0b7917b4928a696823a17344f2 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Sat, 16 Aug 2025 04:16:04 -0700 Subject: [PATCH 3/3] fix Signed-off-by: SeanNaren --- nemo_skills/pipeline/utils/exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_skills/pipeline/utils/exp.py b/nemo_skills/pipeline/utils/exp.py index 7de347b6d9..51e3140933 100644 --- a/nemo_skills/pipeline/utils/exp.py +++ b/nemo_skills/pipeline/utils/exp.py @@ -644,4 +644,4 @@ def get_nsight_cmd(profile_step_range): f"export NRL_NSYS_PROFILE_STEP_RANGE={profile_step_range} && " 'export NRL_NSYS_WORKER_PATTERNS="*policy*,*vllm*" && ' ) - return cmd \ No newline at end of file + return cmd