From 6fc4d6f6daa07e1f95b70eaa84e611e62db44589 Mon Sep 17 00:00:00 2001 From: Rayen Date: Mon, 5 Jan 2026 20:11:26 +0800 Subject: [PATCH] fix: relax nanov3 nightly test metrics strict (#1712) Signed-off-by: ruit Signed-off-by: NeMo Bot --- tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2-lora.sh | 2 +- tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2-lora.sh b/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2-lora.sh index f20120f158..445dc48b5a 100755 --- a/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2-lora.sh +++ b/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2-lora.sh @@ -33,6 +33,6 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS # Only run metrics if the target step is reached if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then uv run tests/check_metrics.py $JSON_METRICS \ - 'data["train/loss"]["20"] < 2.03' \ + 'data["train/loss"]["20"] < 2.05' \ 'mean(data["timing/train/total_step_time"], 2) < 18' fi diff --git a/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2.sh b/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2.sh index 90eda7713f..ec0e22bf6b 100755 --- a/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2.sh +++ b/tests/test_suites/llm/sft-nanov3-30BA3B-2n8g-fsdp2.sh @@ -33,6 +33,6 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS # Only run metrics if the target step is reached if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then uv run tests/check_metrics.py $JSON_METRICS \ - 'data["train/loss"]["20"] < 1.98' \ + 'data["train/loss"]["20"] < 2.05' \ 'mean(data["timing/train/total_step_time"], 2) < 15' fi