diff --git a/examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py b/examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py index d5f5d887408..14ebd6e12fc 100644 --- a/examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py +++ b/examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py @@ -1,14 +1,11 @@ import torch -from transformers import AutoModelForCausalLM, AutoTokenizer - from llmcompressor import oneshot from llmcompressor.modifiers.quantization import QuantizationModifier +from transformers import AutoModelForCausalLM, AutoTokenizer MODEL_ID = "Qwen/Qwen3-30B-A3B-Instruct-2507" -model = AutoModelForCausalLM.from_pretrained( - MODEL_ID, dtype=torch.bfloat16, trust_remote_code=True -) +model = AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=torch.bfloat16, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) recipe = QuantizationModifier(