diff --git a/tests/test_generation.py b/tests/test_generation.py index 77a73ca15d..c1e28ade3e 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -29,8 +29,8 @@ def test_eval_gsm8k_api(tmp_path): cmd = ( f"ns eval " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --benchmarks=gsm8k " f" --output_dir={tmp_path} " f" ++max_samples=2 " @@ -60,12 +60,12 @@ def test_eval_judge_api(tmp_path): cmd = ( f"ns eval " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --benchmarks=math-500 " f" --output_dir={tmp_path} " - f" --judge_model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --judge_server_address=https://inference-api.nvidia.com/v1/ " + f" --judge_model=nvidia/nemotron-3-nano-30b-a3b " + f" --judge_server_address=https://integrate.api.nvidia.com/v1 " f" --judge_server_type=openai " f" --judge_generation_type=math_judge " f" --extra_judge_args='++max_concurrent_requests=1 ++inference.timeout=120 ++server.max_retries=1' " @@ -96,8 +96,8 @@ def test_fail_on_api_key_env_var(tmp_path): cmd = ( f"ns eval " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --benchmarks=gsm8k " f" --output_dir={tmp_path} " f" ++max_samples=2 " @@ -118,8 +118,8 @@ def test_succeed_on_api_key_env_var(tmp_path): f"unset NVIDIA_API_KEY && " f"ns eval " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --benchmarks=gsm8k " f" --output_dir={tmp_path} " f" ++max_samples=2 " @@ -151,8 +151,8 @@ def test_generate_openai_format(tmp_path, format): cmd = ( f"ns generate " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --input_file=/nemo_run/code/tests/data/openai-input-{format}.test " f" --output_dir={tmp_path} " f" ++prompt_format=openai " @@ -217,12 +217,12 @@ def test_judge_generations_with_structured_output(tmp_path): cmd = ( f"ns eval " f" --server_type=openai " - f" --model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --server_address=https://inference-api.nvidia.com/v1/ " + f" --model=nvidia/nemotron-3-nano-30b-a3b " + f" --server_address=https://integrate.api.nvidia.com/v1 " f" --benchmarks=hle " f" --output_dir={tmp_path} " - f" --judge_model=nvidia/nvidia/nemotron-nano-30b-v3 " - f" --judge_server_address=https://inference-api.nvidia.com/v1/ " + f" --judge_model=nvidia/nemotron-3-nano-30b-a3b " + f" --judge_server_address=https://integrate.api.nvidia.com/v1 " f" --judge_server_type=openai " f" --metric_type=hle-aa " f' --extra_judge_args="++structured_output=HLE_JUDGE_AA ++max_concurrent_requests=1 ++inference.timeout=120 ++server.max_retries=1" '