Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions tests/slurm-tests/gpt_oss_python_aime25/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path

# Add parent directory to path to import utils
sys.path.insert(0, str(Path(__file__).parents[1]))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consideration: If there are common utilities, then perhaps we can move them under nemo_skills itself, and avoid the ugly sys.path.

from utils import add_common_args, prepare_cluster_config_for_test

from nemo_skills.pipeline.cli import eval, prepare_data, run_cmd, wrap_arguments

Expand Down Expand Up @@ -50,18 +56,22 @@ def eval_gpt_oss_python(workspace, cluster, expname_prefix, wandb_project):

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--workspace", required=True, help="Workspace directory containing all experiment data")
parser.add_argument("--cluster", required=True, help="Cluster name")
parser.add_argument("--expname_prefix", required=True, help="Experiment name prefix")
parser.add_argument("--wandb_project", default="nemo-skills-slurm-ci", help="W&B project name")
add_common_args(parser)

args = parser.parse_args()

# Prepare cluster config with job_dir set to workspace
cluster = prepare_cluster_config_for_test(
args.cluster,
args.workspace,
cluster_config_mode=args.cluster_config_mode,
)

prepare_data(ctx=wrap_arguments("aime25"))

eval_expname = eval_gpt_oss_python(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)
Expand All @@ -71,7 +81,7 @@ def main():

run_cmd(
ctx=wrap_arguments(checker_cmd),
cluster=args.cluster,
cluster=cluster,
expname=args.expname_prefix + "-check-results",
log_dir=f"{args.workspace}/check-results-logs",
run_after=eval_expname,
Expand Down
20 changes: 15 additions & 5 deletions tests/slurm-tests/omr_simple_recipe/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@

import argparse
import subprocess
import sys
from pathlib import Path

# Add parent directory to path to import utils
sys.path.insert(0, str(Path(__file__).parents[1]))
from utils import add_common_args, prepare_cluster_config_for_test

from nemo_skills.pipeline.cli import run_cmd, wrap_arguments


def main():
ap = argparse.ArgumentParser()
ap.add_argument("--cluster", required=True)
ap.add_argument("--workspace", required=True, help="Workspace path")
ap.add_argument("--wandb_project", default="nemo-skills-slurm-ci", help="W&B project name")
ap.add_argument("--expname_prefix", required=True, help="Experiment name prefix used inside the recipe")
add_common_args(ap)
ap.add_argument("--disable_wandb", action="store_true", help="Disable W&B logging in the recipe")
ap.add_argument(
"--backend",
Expand All @@ -33,6 +36,13 @@ def main():
)
args = ap.parse_args()

# Prepare cluster config with job_dir set to workspace
cluster = prepare_cluster_config_for_test(
args.cluster,
args.workspace,
cluster_config_mode=args.cluster_config_mode,
)

cmd = (
f"python -m recipes.openmathreasoning.scripts.simplified_recipe "
f" --cluster {args.cluster} "
Expand All @@ -54,7 +64,7 @@ def main():

run_cmd(
ctx=wrap_arguments(checker_cmd),
cluster=args.cluster,
cluster=cluster,
expname=args.expname_prefix + "-check-results",
log_dir=f"{args.workspace}/check-results-logs",
# these are launched in simplified recipe
Expand Down
26 changes: 18 additions & 8 deletions tests/slurm-tests/qwen3_4b_evals/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path

# Add parent directory to path to import utils
sys.path.insert(0, str(Path(__file__).parents[1]))
from utils import add_common_args, prepare_cluster_config_for_test

from nemo_skills.pipeline.cli import eval, prepare_data, run_cmd, wrap_arguments

Expand Down Expand Up @@ -135,33 +141,37 @@ def eval_qwen3_offline_genselect(workspace, cluster, expname_prefix, wandb_proje

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--workspace", required=True, help="Workspace directory containing all experiment data")
parser.add_argument("--cluster", required=True, help="Cluster name, e.g. oci")
parser.add_argument("--expname_prefix", required=True, help="Experiment name prefix")
parser.add_argument("--wandb_project", default="nemo-skills-slurm-ci", help="W&B project name")
add_common_args(parser)

args = parser.parse_args()

# Prepare cluster config with job_dir set to workspace
cluster = prepare_cluster_config_for_test(
args.cluster,
args.workspace,
cluster_config_mode=args.cluster_config_mode,
)

prepare_data(ctx=wrap_arguments("bfcl_v3 aime24"))

bfcl_expname = eval_qwen3_bfcl(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)

# GenSelect Tests
online_genselect_expname = eval_qwen3_online_genselect(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)

offline_genselect_expname = eval_qwen3_offline_genselect(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)
Expand All @@ -171,7 +181,7 @@ def main():

run_cmd(
ctx=wrap_arguments(checker_cmd),
cluster=args.cluster,
cluster=cluster,
expname=args.expname_prefix + "-check-results",
log_dir=f"{args.workspace}/check-results-logs",
run_after=[bfcl_expname, online_genselect_expname, offline_genselect_expname],
Expand Down
22 changes: 16 additions & 6 deletions tests/slurm-tests/qwen3coder_30b_swebench/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path

# Add parent directory to path to import utils
sys.path.insert(0, str(Path(__file__).parents[1]))
from utils import add_common_args, prepare_cluster_config_for_test

from nemo_skills.pipeline.cli import eval, prepare_data, run_cmd, wrap_arguments

Expand Down Expand Up @@ -44,14 +50,18 @@ def eval_qwen3coder(workspace, cluster, expname_prefix, wandb_project, agent_fra

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--workspace", required=True, help="Workspace directory containing all experiment data")
parser.add_argument("--cluster", required=True, help="Cluster name, e.g. oci")
parser.add_argument("--expname_prefix", required=True, help="Experiment name prefix")
parser.add_argument("--wandb_project", default="nemo-skills-slurm-ci", help="W&B project name")
add_common_args(parser)
parser.add_argument("--container_formatter", default=None, help="Container formatter for SWE-bench")

args = parser.parse_args()

# Prepare cluster config with job_dir set to workspace
cluster = prepare_cluster_config_for_test(
args.cluster,
args.workspace,
cluster_config_mode=args.cluster_config_mode,
)

if args.container_formatter is None:
prepare_data_args = "swe-bench"
else:
Expand All @@ -64,7 +74,7 @@ def main():

eval_qwen3coder(
workspace=workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=expname_prefix,
wandb_project=args.wandb_project,
agent_framework=agent_framework,
Expand All @@ -79,7 +89,7 @@ def main():

run_cmd(
ctx=wrap_arguments(checker_cmd),
cluster=args.cluster,
cluster=cluster,
expname=f"{expname_prefix}-check-results",
log_dir=f"{workspace}/check-results-logs",
run_after=expname_prefix,
Expand Down
33 changes: 28 additions & 5 deletions tests/slurm-tests/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,39 @@

CLUSTER=$1
RUN_NAME=${2:-$(date +%Y-%m-%d)}
# Parse --cluster_config_mode flag with default 'assert'
CLUSTER_CONFIG_MODE="assert"
POSITIONAL_ARGS=()

while [[ $# -gt 0 ]]; do
case "$1" in
--cluster_config_mode)
CLUSTER_CONFIG_MODE="$2"
shift 2
;;
*)
POSITIONAL_ARGS+=("$1")
shift
;;
esac
done

# Restore positional parameters (so $1 = cluster, $2 = run_name, etc.)
set -- "${POSITIONAL_ARGS[@]}"

CLUSTER=$1
RUN_NAME=${2:-$(date +%Y-%m-%d)}


# TODO: change back to parallel submission after fixing https://github.com/NVIDIA-NeMo/Skills/issues/964

python tests/slurm-tests/gpt_oss_python_aime25/run_test.py --cluster $CLUSTER --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/gpt_oss_python_aime25 --expname_prefix gpt_oss_python_aime25_$RUN_NAME
python tests/slurm-tests/gpt_oss_python_aime25/run_test.py --cluster $CLUSTER --cluster_config_mode $CLUSTER_CONFIG_MODE --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/gpt_oss_python_aime25 --expname_prefix gpt_oss_python_aime25_$RUN_NAME
# sleep 10
python tests/slurm-tests/super_49b_evals/run_test.py --cluster $CLUSTER --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/super_49b_evals --expname_prefix super_49b_evals_$RUN_NAME
python tests/slurm-tests/super_49b_evals/run_test.py --cluster $CLUSTER --cluster_config_mode $CLUSTER_CONFIG_MODE --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/super_49b_evals --expname_prefix super_49b_evals_$RUN_NAME
# sleep 10
python tests/slurm-tests/qwen3_4b_evals/run_test.py --cluster $CLUSTER --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/qwen3_4b_evals --expname_prefix qwen3_4b_evals_$RUN_NAME
python tests/slurm-tests/qwen3_4b_evals/run_test.py --cluster $CLUSTER --cluster_config_mode $CLUSTER_CONFIG_MODE --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/qwen3_4b_evals --expname_prefix qwen3_4b_evals_$RUN_NAME
# sleep 10
python tests/slurm-tests/omr_simple_recipe/run_test.py --cluster $CLUSTER --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/omr_simple_recipe/nemo-rl --expname_prefix omr_simple_recipe_nemo_rl_$RUN_NAME
python tests/slurm-tests/omr_simple_recipe/run_test.py --cluster $CLUSTER --cluster_config_mode $CLUSTER_CONFIG_MODE --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/omr_simple_recipe/nemo-rl --expname_prefix omr_simple_recipe_nemo_rl_$RUN_NAME
# sleep 10
python tests/slurm-tests/qwen3coder_30b_swebench/run_test.py --cluster $CLUSTER --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/qwen3coder_30b_swebench --expname_prefix qwen3coder_30b_swebench_$RUN_NAME --container_formatter '/swe-bench-images/swebench_sweb.eval.x86_64.{instance_id}.sif'
python tests/slurm-tests/qwen3coder_30b_swebench/run_test.py --cluster $CLUSTER --cluster_config_mode $CLUSTER_CONFIG_MODE --workspace /workspace/nemo-skills-slurm-ci/$RUN_NAME/qwen3coder_30b_swebench --expname_prefix qwen3coder_30b_swebench_$RUN_NAME --container_formatter '/swe-bench-images/swebench_sweb.eval.x86_64.{instance_id}.sif'
# wait
26 changes: 18 additions & 8 deletions tests/slurm-tests/super_49b_evals/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
# limitations under the License.

import argparse
import sys
from pathlib import Path

# Add parent directory to path to import utils
sys.path.insert(0, str(Path(__file__).parents[1]))
from utils import add_common_args, prepare_cluster_config_for_test

from nemo_skills.pipeline.cli import eval, prepare_data, run_cmd, wrap_arguments

Expand Down Expand Up @@ -312,29 +318,33 @@ def eval_reasoning_off(workspace, cluster, expname_prefix, wandb_project):

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--workspace", required=True, help="Workspace directory containing all experiment data")
parser.add_argument("--cluster", required=True, help="Cluster name")
parser.add_argument("--expname_prefix", required=True, help="Experiment name prefix")
parser.add_argument("--wandb_project", default="nemo-skills-slurm-ci", help="W&B project name")
add_common_args(parser)

args = parser.parse_args()

# Prepare cluster config with job_dir set to workspace and get normalized expname prefix
cluster = prepare_cluster_config_for_test(
args.cluster,
args.workspace,
cluster_config_mode=args.cluster_config_mode,
)

prepare_data(
ctx=wrap_arguments("gpqa mmlu-pro hle livecodebench scicode bfcl_v3 math-500 aime24 aime25"),
)

setup(workspace=args.workspace, cluster=args.cluster, expname_prefix=args.expname_prefix)
setup(workspace=args.workspace, cluster=cluster, expname_prefix=args.expname_prefix)

reasoning_on_expnames = eval_reasoning_on(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)

reasoning_off_expnames = eval_reasoning_off(
workspace=args.workspace,
cluster=args.cluster,
cluster=cluster,
expname_prefix=args.expname_prefix,
wandb_project=args.wandb_project,
)
Expand All @@ -344,7 +354,7 @@ def main():

run_cmd(
ctx=wrap_arguments(checker_cmd),
cluster=args.cluster,
cluster=cluster,
expname=args.expname_prefix + "-check-results",
log_dir=f"{args.workspace}/check-results-logs",
run_after=reasoning_on_expnames + reasoning_off_expnames,
Expand Down
Loading
Loading