Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/llama/llama.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ type ModelQuantizeParams struct {
OnlyCopy uint8 // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
Pure uint8 // quantize all tensors to the default type
KeepSplit uint8 // keep split tensors (bool as uint8)
DryRun uint8 // calculate and show the final quantization size without performing quantization (bool as uint8)
IMatrix *byte // pointer to importance matrix data
KvOverrides *byte // pointer to vector containing overrides
TensorTypes *byte // pointer to vector containing tensor types
Expand Down
2 changes: 1 addition & 1 deletion pkg/llama/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var (

// ffiTypeModelQuantizeParams represents the C struct llama_model_quantize_params
ffiTypeModelQuantizeParams = ffi.NewType(&ffi.TypeSint32, &ffi.TypeSint32,
&ffi.TypeSint32, &ffi.TypeSint32, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8,
&ffi.TypeSint32, &ffi.TypeSint32, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8, &ffi.TypeUint8,
&ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer)
)

Expand Down