vllm-project
diff --git a/‎examples/transform/spinquant_example.py‎
Lines changed: 8 additions & 5 deletions b/‎examples/transform/spinquant_example.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/llmcompressor/modifiers/quantization/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎src/llmcompressor/modifiers/quantization/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/llmcompressor/modifiers/quantization/cache.py‎
Lines changed: 0 additions & 208 deletions b/‎src/llmcompressor/modifiers/quantization/cache.py‎
Lines changed: 0 additions & 208 deletions
@@ -11,14 +11,17 @@
 model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
-# NOTE: currently only fused rotations (R1 & R2) are available
-# Learned rotations and online rotations (R3 & R4) will be added
-# in a future release.
+# NOTE: currently only rotations R1, R2, and R4 are available
+# R3 and learned R1/R2 rotations will be added in a future release.
 # Configure the quantization algorithm to run.
 #   * apply spinquant transforms to model to reduce quantization loss
 #   * quantize the weights to 4 bit with group size 128
 recipe = [
-    SpinQuantModifier(rotations=["R1", "R2"], transform_type="hadamard"),
+    SpinQuantModifier(
+        rotations=["R1", "R2", "R4"],
+        transform_block_size=128,
+        transform_type="hadamard",
+    ),
     QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
 ]
 
@@ -37,6 +40,6 @@
 print("==========================================\n\n")
 
 # Save to disk compressed.
-SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2-w4a16"
+SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2R4-w4a16"
 model.save_pretrained(SAVE_DIR, save_compressed=True)
 tokenizer.save_pretrained(SAVE_DIR)
@@ -157,7 +157,7 @@ def localversion_func(version: ScmVersion) -> str:
             "torchvision",
             "librosa==0.11.0",
             "soundfile",
-            "torchcodec",
+            #"torchcodec",
             # linting, formatting, and type checking
             "mypy~=1.10.0",
             "ruff~=0.4.8",
 
@@ -1,5 +1,4 @@
 # ruff: noqa
 
-from .cache import *
 from .gptq import *
 from .quantization import *