Skip to content

Commit 2faaef3

Browse files
ikawrakowIwan Kawrakow
andauthored
llama : check for 256 divisibility for IQ2_XS, IQ2_XXS (#4950)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 4a3156d commit 2faaef3

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

llama.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8559,7 +8559,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
85598559
//}
85608560
bool convert_incompatible_tensor = false;
85618561
if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
8562-
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
8562+
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K ||
8563+
new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS) {
85638564
int nx = tensor->ne[0];
85648565
int ny = tensor->ne[1];
85658566
if (nx % QK_K != 0) {
@@ -8571,6 +8572,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
85718572
}
85728573
if (convert_incompatible_tensor) {
85738574
switch (new_type) {
8575+
case GGML_TYPE_IQ2_XXS:
8576+
case GGML_TYPE_IQ2_XS:
85748577
case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
85758578
case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
85768579
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;

0 commit comments

Comments
 (0)