Skip to content

Commit d0f098d

Browse files
committed
remove changes to test_nemotron_nas.py
Signed-off-by: Venky Ganesh <[email protected]>
1 parent 163e57e commit d0f098d

File tree

1 file changed

+1
-85
lines changed

1 file changed

+1
-85
lines changed

tests/integration/defs/examples/test_nemotron_nas.py

Lines changed: 1 addition & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
from pathlib import Path
22

33
import pytest
4-
from defs import ci_profiler
54
from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
6-
from defs.conftest import get_device_memory, get_sm_version, llm_models_root
5+
from defs.conftest import get_device_memory, get_sm_version
76
from defs.trt_test_alternative import check_call
87

9-
from tensorrt_llm import LLM
10-
from tensorrt_llm.executor.request import LoRARequest
11-
from tensorrt_llm.lora_manager import LoraConfig
12-
from tensorrt_llm.sampling_params import SamplingParams
13-
148
# skip trt flow cases on post-Blackwell-Ultra
159
if get_sm_version() >= 103:
1610
pytest.skip(
@@ -128,81 +122,3 @@ def test_nemotron_nas_summary_2gpu(nemotron_nas_example_root, llm_venv,
128122
]
129123

130124
venv_mpi_check_call(llm_venv, mpi_cmd, summary_cmd)
131-
132-
133-
@pytest.mark.skip_less_device(4)
134-
@pytest.mark.skip_less_device_memory(80000)
135-
@pytest.mark.parametrize(
136-
"nemotron_nas_model_root",
137-
[
138-
# "Llama-3_3-Nemotron-Super-49B-v1",
139-
f"{llm_models_root()}/nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1"
140-
],
141-
indirect=True)
142-
def test_nemotron_super_49b_real_lora_torch(nemotron_nas_example_root, llm_venv,
143-
nemotron_nas_model_root,
144-
llm_datasets_root, llm_rouge_root,
145-
engine_dir, cmodel_dir):
146-
"""Run Nemotron Super 49B with real LoRA adapters using LLM-API Torch backend."""
147-
148-
print("Testing Nemotron Super 49B with real LoRA adapters...")
149-
150-
lora_adapter_path = f"/home/gvenkatarama/scratch_new/Bugs/TRTLLM/5463720/llama-3.3-nemotron-super-49b-v1_vlora-1a2cb80-v2"
151-
print(f"Using real LoRA from: {lora_adapter_path}")
152-
153-
ci_profiler.start("test_nemotron_real_lora_torch")
154-
155-
lora_config = LoraConfig(
156-
lora_dir=[lora_adapter_path],
157-
max_lora_rank=32, # From adapter_config.json: "r": 32
158-
max_loras=1,
159-
max_cpu_loras=1,
160-
)
161-
162-
with LLM(
163-
model=nemotron_nas_model_root,
164-
lora_config=lora_config,
165-
tensor_parallel_size=4,
166-
dtype="bfloat16",
167-
max_batch_size=2,
168-
max_input_len=512,
169-
max_seq_len=1024,
170-
# load_format="dummy",
171-
max_beam_width=1) as llm:
172-
173-
prompts = [
174-
"What is the capital of France?",
175-
"Explain quantum computing in simple terms."
176-
]
177-
178-
sampling_params = SamplingParams(max_tokens=50,
179-
temperature=0.7,
180-
top_p=0.9)
181-
182-
lora_request = LoRARequest("nemotron-lora", 0, lora_adapter_path)
183-
184-
print("Running inference with real LoRA adapter...")
185-
outputs_with_lora = llm.generate(
186-
prompts, sampling_params, lora_request=[lora_request, lora_request])
187-
188-
outputs_without_lora = llm.generate(prompts, sampling_params)
189-
190-
for i, (output_lora, output_no_lora) in enumerate(
191-
zip(outputs_with_lora, outputs_without_lora)):
192-
print(f"Prompt {i+1}: {prompts[i]}")
193-
print(f"Response with LoRA {i+1}: {output_lora.outputs[0].text}")
194-
print(
195-
f"Response without LoRA {i+1}: {output_no_lora.outputs[0].text}"
196-
)
197-
print("-" * 50)
198-
199-
assert len(outputs_with_lora) == 2
200-
assert len(outputs_with_lora[0].outputs) > 0
201-
assert len(outputs_with_lora[1].outputs) > 0
202-
assert len(outputs_with_lora[0].outputs[0].text) > 0
203-
assert len(outputs_with_lora[1].outputs[0].text) > 0
204-
205-
ci_profiler.stop("test_nemotron_real_lora_torch")
206-
print(
207-
f"test_nemotron_real_lora_torch: {ci_profiler.elapsed_time_in_sec('test_nemotron_real_lora_torch')} sec"
208-
)

0 commit comments

Comments
 (0)