hybridgroup · deadprogram · Feb 24, 2026 · Feb 24, 2026
diff --git a/pkg/llama/llama.go b/pkg/llama/llama.go
@@ -363,6 +363,7 @@ type ModelQuantizeParams struct {
 	OnlyCopy             uint8 // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
 	Pure                 uint8 // quantize all tensors to the default type
 	KeepSplit            uint8 // keep split tensors (bool as uint8)
+	DryRun               uint8 // calculate and show the final quantization size without performing quantization (bool as uint8)
 	IMatrix              *byte // pointer to importance matrix data
 	KvOverrides          *byte // pointer to vector containing overrides
 	TensorTypes          *byte // pointer to vector containing tensor types

diff --git a/pkg/llama/model.go b/pkg/llama/model.go
@@ -21,7 +21,7 @@ var (
 
 	// ffiTypeModelQuantizeParams represents the C struct llama_model_quantize_params
 	ffiTypeModelQuantizeParams = ffi.NewType(&ffi.TypeSint32, &ffi.TypeSint32,
-		&ffi.TypeSint32, &ffi.TypeSint32, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8,
+		&ffi.TypeSint32, &ffi.TypeSint32, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8,
 		&ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer)
 )