Skip to content

Commit 3a5a04a

Browse files
committed
squash
Signed-off-by: Kyle Sayers <[email protected]>
1 parent 727513c commit 3a5a04a

File tree

22 files changed

+935
-1095
lines changed

22 files changed

+935
-1095
lines changed

examples/transform/spinquant_example.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@
1111
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
1212
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
1313

14-
# NOTE: currently only fused rotations (R1 & R2) are available
15-
# Learned rotations and online rotations (R3 & R4) will be added
16-
# in a future release.
14+
# NOTE: currently only rotations R1, R2, and R4 are available
15+
# R3 and learned R1/R2 rotations will be added in a future release.
1716
# Configure the quantization algorithm to run.
1817
# * apply spinquant transforms to model to reduce quantization loss
1918
# * quantize the weights to 4 bit with group size 128
2019
recipe = [
21-
SpinQuantModifier(rotations=["R1", "R2"], transform_type="hadamard"),
20+
SpinQuantModifier(
21+
rotations=["R1", "R2", "R4"],
22+
transform_block_size=128,
23+
transform_type="hadamard",
24+
),
2225
QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
2326
]
2427

@@ -37,6 +40,6 @@
3740
print("==========================================\n\n")
3841

3942
# Save to disk compressed.
40-
SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2-w4a16"
43+
SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2R4-w4a16"
4144
model.save_pretrained(SAVE_DIR, save_compressed=True)
4245
tokenizer.save_pretrained(SAVE_DIR)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def localversion_func(version: ScmVersion) -> str:
157157
"torchvision",
158158
"librosa==0.11.0",
159159
"soundfile",
160-
"torchcodec",
160+
#"torchcodec",
161161
# linting, formatting, and type checking
162162
"mypy~=1.10.0",
163163
"ruff~=0.4.8",
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# ruff: noqa
22

3-
from .cache import *
43
from .gptq import *
54
from .quantization import *

src/llmcompressor/modifiers/quantization/cache.py

Lines changed: 0 additions & 208 deletions
This file was deleted.

0 commit comments

Comments
 (0)