From 5ea97da956ef6ffb371a81142532ca992f58e2f4 Mon Sep 17 00:00:00 2001 From: edbeeching Date: Wed, 5 Feb 2025 08:53:03 +0000 Subject: [PATCH 1/2] fix uv env path + details --- slurm/eval_callback.slurm | 5 ++--- slurm/evaluate.slurm | 2 +- slurm/generate.slurm | 2 +- slurm/grpo.slurm | 3 ++- slurm/sft.slurm | 2 +- src/open_r1/utils/upload_details.py | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/slurm/eval_callback.slurm b/slurm/eval_callback.slurm index 093c3d067..bec49ab96 100644 --- a/slurm/eval_callback.slurm +++ b/slurm/eval_callback.slurm @@ -8,8 +8,7 @@ set -x -e source ~/.bashrc -conda activate openr1 - +source openr1/bin/activate TASK_NAME=$1 TASKS=$2 MODEL_ID=$3 @@ -31,7 +30,7 @@ fi LM_EVAL_REPO_ID="open-r1/open-r1-eval-leaderboard" MODEL_NAME=$(echo $MODEL_ID | sed 's/\//_/g') # replaces / with _ -DETAILS_REPO_ID="open-r1//details-$MODEL_NAME" +DETAILS_REPO_ID="open-r1/details-$MODEL_NAME" OUTPUT_DIR="eval_results/$MODEL_ID/$MODEL_REVISION/$TASK_NAME" # We need this flag since we run this script from training jobs that use DeepSpeed and the env vars get progated which causes errors during evaluation ACCELERATE_USE_DEEPSPEED=false diff --git a/slurm/evaluate.slurm b/slurm/evaluate.slurm index 5fe7f8e33..0ca4a8701 100644 --- a/slurm/evaluate.slurm +++ b/slurm/evaluate.slurm @@ -14,7 +14,7 @@ set -x -e source ~/.bashrc -conda activate openr1 +source openr1/bin/activate module load cuda/12.1 echo "START TIME: $(date)" echo "PYTHON ENV: $(which python)" diff --git a/slurm/generate.slurm b/slurm/generate.slurm index c154d64af..9cc9b1cce 100644 --- a/slurm/generate.slurm +++ b/slurm/generate.slurm @@ -129,7 +129,7 @@ export LD_LIBRARY_PATH=.venv/lib/python3.11/site-packages/nvidia/nvjitlink/lib echo "SLURM_JOB_ID: $SLURM_JOB_ID" echo "SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST" -source .venv/bin/activate +source openr1/bin/activate # Getting the node names nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST") diff --git a/slurm/grpo.slurm b/slurm/grpo.slurm index 419af2373..6b858bfa8 100644 --- a/slurm/grpo.slurm +++ b/slurm/grpo.slurm @@ -11,7 +11,8 @@ set -x -e source ~/.bashrc -conda activate openr1 +# conda activate openr1 +source openr1/bin/activate echo "START TIME: $(date)" echo "PYTHON ENV: $(which python)" diff --git a/slurm/sft.slurm b/slurm/sft.slurm index 120f8e8a2..31528cdcb 100644 --- a/slurm/sft.slurm +++ b/slurm/sft.slurm @@ -11,7 +11,7 @@ set -x -e source ~/.bashrc -conda activate openr1 +source openr1/bin/activate echo "START TIME: $(date)" echo "PYTHON ENV: $(which python)" diff --git a/src/open_r1/utils/upload_details.py b/src/open_r1/utils/upload_details.py index 273e48bdb..caa491cfa 100644 --- a/src/open_r1/utils/upload_details.py +++ b/src/open_r1/utils/upload_details.py @@ -39,7 +39,7 @@ class ScriptArguments: def main(): parser = HfArgumentParser(ScriptArguments) - args = parser.parse() + args = parser.parse_args_into_dataclasses()[0] if all(file.endswith(".json") for file in args.data_files): ds = load_dataset("json", data_files=args.data_files) From 9a6086f4c971d5e409ae31ddff4d9a79266f6053 Mon Sep 17 00:00:00 2001 From: lewtun Date: Wed, 5 Feb 2025 23:56:12 +0100 Subject: [PATCH 2/2] Update slurm/grpo.slurm --- slurm/grpo.slurm | 1 - 1 file changed, 1 deletion(-) diff --git a/slurm/grpo.slurm b/slurm/grpo.slurm index 6b858bfa8..8a3d63a32 100644 --- a/slurm/grpo.slurm +++ b/slurm/grpo.slurm @@ -11,7 +11,6 @@ set -x -e source ~/.bashrc -# conda activate openr1 source openr1/bin/activate echo "START TIME: $(date)" echo "PYTHON ENV: $(which python)"