remove changes to test_nemotron_nas.py

venkywonka · venkywonka · commit d0f098d8cd9d · 2025-08-26T14:59:48.000-07:00
Signed-off-by: Venky Ganesh &lt;23023424+venkywonka@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/examples/test_nemotron_nas.py b/tests/integration/defs/examples/test_nemotron_nas.py
@@ -1,16 +1,10 @@
 from pathlib import Path
 
 import pytest
-from defs import ci_profiler
 from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
-from defs.conftest import get_device_memory, get_sm_version, llm_models_root
+from defs.conftest import get_device_memory, get_sm_version
 from defs.trt_test_alternative import check_call
 
-from tensorrt_llm import LLM
-from tensorrt_llm.executor.request import LoRARequest
-from tensorrt_llm.lora_manager import LoraConfig
-from tensorrt_llm.sampling_params import SamplingParams
-
 # skip trt flow cases on post-Blackwell-Ultra
 if get_sm_version() >= 103:
     pytest.skip(
@@ -128,81 +122,3 @@ def test_nemotron_nas_summary_2gpu(nemotron_nas_example_root, llm_venv,
     ]
 
     venv_mpi_check_call(llm_venv, mpi_cmd, summary_cmd)
-
-
-@pytest.mark.skip_less_device(4)
-@pytest.mark.skip_less_device_memory(80000)
-@pytest.mark.parametrize(
-    "nemotron_nas_model_root",
-    [
-        # "Llama-3_3-Nemotron-Super-49B-v1",
-        f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1"
-    ],
-    indirect=True)
-def test_nemotron_super_49b_real_lora_torch(nemotron_nas_example_root, llm_venv,
-                                            nemotron_nas_model_root,
-                                            llm_datasets_root, llm_rouge_root,
-                                            engine_dir, cmodel_dir):
-    """Run Nemotron Super 49B with real LoRA adapters using LLM-API Torch backend."""
-
-    print("Testing Nemotron Super 49B with real LoRA adapters...")
-
-    lora_adapter_path = f"/home/gvenkatarama/scratch_new/Bugs/TRTLLM/5463720/llama-3.3-nemotron-super-49b-v1_vlora-1a2cb80-v2"
-    print(f"Using real LoRA from: {lora_adapter_path}")
-
-    ci_profiler.start("test_nemotron_real_lora_torch")
-
-    lora_config = LoraConfig(
-        lora_dir=[lora_adapter_path],
-        max_lora_rank=32,  # From adapter_config.json: "r": 32
-        max_loras=1,
-        max_cpu_loras=1,
-    )
-
-    with LLM(
-            model=nemotron_nas_model_root,
-            lora_config=lora_config,
-            tensor_parallel_size=4,
-            dtype="bfloat16",
-            max_batch_size=2,
-            max_input_len=512,
-            max_seq_len=1024,
-            #  load_format="dummy",
-            max_beam_width=1) as llm:
-
-        prompts = [
-            "What is the capital of France?",
-            "Explain quantum computing in simple terms."
-        ]
-
-        sampling_params = SamplingParams(max_tokens=50,
-                                         temperature=0.7,
-                                         top_p=0.9)
-
-        lora_request = LoRARequest("nemotron-lora", 0, lora_adapter_path)
-
-        print("Running inference with real LoRA adapter...")
-        outputs_with_lora = llm.generate(
-            prompts, sampling_params, lora_request=[lora_request, lora_request])
-
-        outputs_without_lora = llm.generate(prompts, sampling_params)
-
-        for i, (output_lora, output_no_lora) in enumerate(
-                zip(outputs_with_lora, outputs_without_lora)):
-            print(f"Prompt {i+1}: {prompts[i]}")
-            print(f"Response with LoRA {i+1}: {output_lora.outputs[0].text}")
-            print(
-                f"Response without LoRA {i+1}: {output_no_lora.outputs[0].text}"
-            )
-            print("-" * 50)
-
-        assert len(outputs_with_lora) == 2
-        assert len(outputs_with_lora[0].outputs) > 0
-        assert len(outputs_with_lora[1].outputs) > 0
-        assert len(outputs_with_lora[0].outputs[0].text) > 0
-        assert len(outputs_with_lora[1].outputs[0].text) > 0
-
-    ci_profiler.stop("test_nemotron_real_lora_torch")
-    print(
-        f"test_nemotron_real_lora_torch: {ci_profiler.elapsed_time_in_sec('test_nemotron_real_lora_torch')} sec"
-    )