Skip to content

Commit 5fd27f3

Browse files
committed
address comments
1 parent 39615c3 commit 5fd27f3

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

tensorrt_llm/_torch/models/modeling_gemma3.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,21 +164,22 @@ def __init__(self, model_config: ModelConfig[Gemma3TextConfig]):
164164
self.hidden_size = self.config.hidden_size
165165
self.intermediate_size = self.config.intermediate_size
166166
self.dtype = self.config.torch_dtype
167+
self.quant_config = model_config.get_quant_config()
167168
self.gate_proj = Linear(self.hidden_size,
168169
self.intermediate_size,
169170
bias=False,
170171
dtype=self.dtype,
171-
quant_config=model_config.get_quant_config())
172+
quant_config=self.quant_config)
172173
self.up_proj = Linear(self.hidden_size,
173174
self.intermediate_size,
174175
bias=False,
175176
dtype=self.dtype,
176-
quant_config=model_config.get_quant_config())
177+
quant_config=self.quant_config)
177178
self.down_proj = Linear(self.intermediate_size,
178179
self.hidden_size,
179180
bias=False,
180181
dtype=self.dtype,
181-
quant_config=model_config.get_quant_config())
182+
quant_config=self.quant_config)
182183
self.act_fn = ACT2FN[self.config.hidden_activation]
183184

184185
@torch.inference_mode()

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ def test_fp8_prequantized(self):
612612
kv_cache_config = KvCacheConfig(enable_block_reuse=False,
613613
enable_partial_reuse=False,
614614
dtype="fp8")
615-
prequantized_model_path = "/home/bbuddharaju/scratch/random/hf_models/gemma-3-1b-it-fp8/"
615+
prequantized_model_path = f"{llm_models_root()}/gemma/gemma-3-1b-it-fp8/"
616616
with LLM(prequantized_model_path,
617617
kv_cache_config=kv_cache_config) as llm:
618618
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8

0 commit comments

Comments
 (0)