Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions ggml/src/ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
float ax = fabsf(x[i]);
if (ax > amax) { amax = ax; max = x[i]; }
}
if (amax < GROUP_MAX_EPS) { // all zero
if (fabsf(amax) < GROUP_MAX_EPS) { // all zero
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't we already use fabsf just above. How is this extra fabsf supposed to help?

for (int i = 0; i < n; ++i) {
L[i] = 0;
}
Expand Down Expand Up @@ -829,7 +829,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
for (int i = 0; i < n; ++i) {
max = MAX(max, x[i]);
}
if (!max) { // all zero
if (fabsf(max) < GROUP_MAX_EPS) { // all zero
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the one line change which fixes the problem in #12439. The minimal fix I tried is the following:

diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index ac918a60..aac8b120 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -829,7 +829,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
     for (int i = 0; i < n; ++i) {
         max = MAX(max, x[i]);
     }
-    if (!max) { // all zero
+    if (max < GROUP_MAX_EPS) { // all zero
         for (int i = 0; i < n; ++i) { L[i] = 0; }
         return 0.f;
     }

The added fabsf is not necessary here either since max can't be smaller than 0 due to how it's initialized.


The problem was caused by very small amplitudes of the model weights in the tensor model.layers.42.self_attn.k_proj.weight, which caused problems when quantizing the sub-block scales. I do not know if this is a general fix, but it does seem to help.

Here are the contents of the problematic blocks
>>> m["model.layers.42.self_attn.k_proj.weight"].reshape(-1, 256)[20].reshape(-1, 16)
tensor([[ 1.4400e-20, -4.8916e-20,  2.3505e-20,  5.9716e-20,  1.0122e-19,
          6.1145e-21,  4.6163e-20,  7.0410e-21, -4.4681e-20, -2.0382e-21,
          3.9387e-20, -1.4908e-19, -8.8515e-20,  5.8869e-20,  3.1552e-20,
         -1.6080e-21],
        [-1.8317e-20, -3.9387e-20, -2.3611e-20,  1.6411e-20,  4.0869e-20,
          1.7682e-20,  3.1128e-20, -4.5528e-20, -1.6835e-20, -3.7693e-20,
         -2.1705e-20, -4.0022e-20,  3.2161e-21,  6.0986e-20, -4.3410e-20,
          3.7058e-20],
        [-5.2940e-20, -6.5222e-20,  4.5528e-20,  6.6969e-21,  1.0853e-20,
          1.4505e-20, -2.8376e-20,  3.0917e-20,  4.9340e-20,  1.0257e-21,
          3.3246e-20,  3.1128e-20, -9.2750e-20,  2.5517e-20, -4.5952e-20,
         -3.3881e-20],
        [-2.7317e-20,  8.6821e-20,  3.4940e-20, -4.4469e-20, -2.8164e-20,
         -2.9011e-20, -4.5740e-20, -2.5729e-20, -5.8445e-20, -7.6656e-20,
         -4.3675e-21, -3.5787e-20,  1.2441e-20, -3.1896e-21,  4.8281e-20,
          3.9599e-20],
        [ 3.0281e-20,  4.5528e-20,  1.0164e-20,  4.3675e-21, -2.1705e-21,
          1.0522e-21,  5.0610e-20,  2.0011e-20, -2.4670e-20,  1.4188e-20,
         -6.0351e-21,  2.3823e-20,  1.9588e-20,  3.5787e-20,  6.6704e-21,
         -5.5057e-20],
        [ 8.2056e-21, -1.6411e-20, -6.0563e-20,  4.0532e-22, -3.2823e-20,
          6.5645e-20, -7.6762e-21,  2.3611e-20, -1.9164e-20,  3.9387e-20,
          3.9811e-20, -4.2521e-19, -3.5364e-20, -1.3500e-20, -2.9646e-20,
          6.0563e-20],
        [-3.7905e-20, -7.9198e-20, -5.9292e-20, -6.7763e-21, -3.0493e-20,
         -1.6200e-20,  1.0800e-20, -1.8529e-20, -6.5645e-20, -2.2658e-20,
          4.9975e-20, -2.2976e-20, -1.7258e-20, -5.0557e-21, -1.6941e-20,
          1.0249e-19],
        [-7.0939e-21, -1.6094e-20,  3.4093e-20,  5.3204e-21,  2.3399e-20,
         -1.9905e-20, -1.9694e-20,  1.0482e-20, -5.9292e-20,  4.9763e-20,
         -2.9858e-20,  5.5904e-20,  1.1435e-20,  3.7269e-20, -5.6116e-21,
          4.9128e-20],
        [-1.4400e-20,  1.6094e-20, -2.1176e-21, -1.0588e-20, -1.1488e-20,
          1.4717e-20, -4.1505e-20,  2.1493e-20,  3.4940e-20,  1.2451e-19,
          4.3940e-21,  6.4375e-20, -3.4517e-20,  5.5904e-20, -2.9911e-21,
         -6.7339e-20],
        [ 4.2775e-20,  4.4046e-20, -3.4093e-20, -2.3082e-20, -2.3399e-20,
          4.1505e-20,  2.0541e-20,  3.2611e-20,  2.6258e-20, -4.5740e-20,
          2.6682e-20,  1.5670e-20, -2.5517e-20,  5.4634e-20, -5.5481e-20,
          1.6517e-19],
        [ 1.6200e-20, -1.0588e-20,  8.2586e-21,  3.3458e-20,  4.5105e-20,
         -1.4294e-20, -2.1705e-20,  3.9387e-20, -1.5776e-20, -3.5152e-20,
          3.0705e-20, -7.0410e-21, -2.0646e-21, -1.4393e-22, -8.7668e-20,
          3.4940e-20],
        [ 6.7763e-20,  4.4893e-20, -5.2304e-20,  9.6138e-20, -9.8997e-21,
         -3.3246e-20, -2.1282e-20,  3.9811e-20,  3.8540e-20, -1.5247e-20,
          5.9028e-21,  3.3670e-20,  3.5999e-20, -2.6046e-20,  2.3929e-20,
         -1.1858e-19],
        [ 5.5904e-20,  2.4352e-20,  2.8460e-19, -1.8529e-21, -1.9588e-21,
          2.9011e-20, -2.0858e-20,  7.7292e-21, -6.6492e-20,  3.7905e-20,
         -2.7740e-20,  4.5740e-20, -3.1764e-20,  3.7481e-20, -2.6867e-21,
         -1.7999e-20],
        [-1.0641e-20, -3.8752e-20,  2.6073e-21,  3.6846e-20,  3.3484e-21,
         -1.5776e-20,  1.7470e-20,  3.1340e-20, -5.3363e-20,  6.0616e-21,
         -3.5205e-21, -3.4146e-21, -1.6094e-20,  1.1276e-20, -4.0863e-22,
         -5.1881e-20],
        [ 6.0616e-21, -2.3293e-20, -1.8846e-20,  3.5787e-20, -1.4929e-20,
         -4.6799e-20, -1.0006e-20, -1.1911e-22, -2.0117e-20, -1.7258e-20,
          8.2056e-21,  1.9270e-20,  4.9340e-20, -1.4611e-20,  1.4717e-20,
          3.4305e-20],
        [-4.9234e-21,  1.0588e-19, -4.0446e-20, -3.0917e-20, -5.2516e-20,
          9.4232e-21,  4.5105e-20, -5.2940e-20,  3.9175e-20, -4.8069e-20,
         -1.2494e-20, -6.8186e-20,  2.3082e-20, -1.4691e-21, -1.3129e-20,
          5.9716e-20]], dtype=torch.bfloat16)
>>> m["model.layers.42.self_attn.k_proj.weight"].reshape(-1, 256)[40].reshape(-1, 16)
tensor([[-6.3527e-21,  7.5809e-20, -2.6787e-20, -5.9292e-20, -1.1435e-19,
         -3.9811e-20, -9.8997e-21, -3.7693e-20,  7.7080e-20, -1.5458e-20,
         -6.6969e-21,  1.2790e-19,  7.9198e-20, -5.6751e-20,  2.2764e-20,
         -5.1881e-21],
        [ 3.8116e-20,  5.6328e-20, -8.4703e-22,  2.1388e-20, -5.2728e-20,
         -2.3082e-20,  3.1631e-21,  4.7315e-22,  6.2257e-20, -2.4035e-20,
         -4.9975e-20,  7.0304e-20, -5.9143e-23, -1.1096e-19,  4.5740e-20,
         -1.8211e-20],
        [ 5.4634e-20,  3.8540e-20, -5.6751e-20, -4.4046e-20, -7.8880e-21,
         -4.6587e-20,  2.0541e-20,  3.1975e-20, -2.7317e-20,  2.9646e-20,
         -1.5776e-20, -3.3034e-20,  8.8515e-20, -1.6729e-20,  7.4539e-20,
         -2.0382e-21],
        [ 4.5740e-20, -9.4021e-20, -1.2335e-20,  4.8175e-21,  4.6587e-20,
          3.5787e-20,  6.6069e-20, -2.0858e-20,  4.9340e-20,  8.4280e-20,
         -5.2940e-20,  4.1293e-20,  3.5364e-20, -8.3645e-21, -1.5948e-21,
         -1.7788e-20],
        [-3.4093e-20, -2.9858e-20, -2.0646e-20,  2.9223e-20,  6.2998e-21,
         -2.0435e-20, -4.6057e-21,  3.7058e-20,  2.7317e-20, -3.9387e-20,
          2.0541e-20, -6.5222e-20, -2.0329e-20,  9.3174e-21, -3.1340e-20,
          1.5352e-20],
        [ 5.7439e-21,  1.6305e-20,  4.9551e-20, -2.6576e-20,  6.3951e-20,
         -3.1340e-20,  8.9336e-22,  1.2904e-21,  5.1087e-21,  3.7905e-20,
         -1.0694e-20, -1.0910e-18,  1.4188e-20, -5.2145e-21,  2.7105e-20,
         -4.5105e-20],
        [ 2.6576e-20,  5.0028e-21,  6.2680e-20,  3.3458e-20, -2.4352e-20,
          9.4232e-21,  9.0527e-21,  8.1527e-21,  4.1928e-20, -1.1435e-20,
         -5.2304e-20,  3.3246e-20, -3.1128e-20, -2.3929e-20,  2.8799e-20,
          1.8127e-19],
        [ 3.5787e-20, -9.3174e-21, -2.1043e-21, -3.2823e-20,  2.4749e-21,
         -1.7073e-21, -1.5670e-20, -2.7529e-20,  6.0986e-20, -4.9551e-20,
          6.6492e-20, -7.4115e-21, -3.7269e-20, -3.8328e-20, -3.7058e-21,
         -2.1917e-20],
        [ 1.0747e-20, -4.1293e-20, -1.9376e-20,  4.1505e-20,  3.8381e-21,
         -1.4188e-20,  3.4093e-20,  1.8211e-20,  2.9117e-21,  1.8529e-21,
          1.2917e-20, -7.4115e-20,  1.1689e-19, -2.1388e-20, -1.9588e-20,
          8.7244e-20],
        [ 6.7498e-21, -2.1599e-20,  5.0187e-20,  4.8281e-20, -1.3976e-20,
         -3.4517e-20, -3.0070e-20, -1.7047e-20, -9.2115e-21,  4.1928e-20,
         -6.8186e-20, -9.2115e-21, -2.4246e-20, -4.1081e-20,  9.1480e-20,
         -1.7364e-19],
        [ 1.4400e-20,  3.7322e-21, -2.4776e-20, -2.8164e-20, -5.9716e-20,
          1.3698e-21, -3.5364e-20, -9.9526e-20,  2.3293e-20,  8.5762e-21,
         -2.1493e-20,  1.2335e-20,  9.8468e-21,  9.5291e-21,  1.0164e-19,
         -9.4444e-20],
        [-8.6821e-20, -2.5623e-20,  4.7646e-20, -5.5057e-20, -5.1034e-20,
         -5.9888e-22, -2.7105e-20, -2.9223e-20, -2.2129e-20, -3.9969e-21,
         -6.4057e-21, -3.2823e-20, -2.5940e-20,  3.4093e-20, -6.8186e-20,
          1.1096e-19],
        [-1.3976e-20, -1.9482e-20, -1.9397e-19, -6.4798e-20,  2.2552e-20,
         -4.7434e-20,  1.1858e-20, -8.4280e-20,  4.1081e-20, -5.2728e-20,
          6.9033e-20, -1.6623e-20,  3.4305e-20, -3.3458e-20,  5.2940e-20,
          4.6163e-20],
        [ 2.1282e-20,  3.8540e-20,  2.3929e-20, -1.0853e-20, -5.3787e-20,
         -1.0059e-21, -2.1282e-20, -2.4564e-20,  2.0011e-20, -1.4228e-21,
         -2.9646e-20, -4.7646e-20, -2.6258e-20,  1.7258e-20,  2.2003e-22,
          7.7927e-20],
        [ 2.9858e-20, -1.0217e-20,  5.2093e-20, -1.9799e-20,  1.0747e-20,
          5.9292e-20, -4.6587e-21,  2.5940e-20,  4.0446e-20,  6.9351e-21,
          5.1087e-21, -2.3188e-20, -4.9551e-20, -1.3870e-20, -1.8105e-20,
         -1.8529e-20],
        [ 1.7788e-20, -8.2162e-20, -1.9058e-20,  3.0917e-20,  1.9376e-20,
         -7.6762e-21, -1.1117e-20,  8.6291e-21, -4.1716e-20,  1.0503e-19,
          4.9234e-21,  5.5057e-20, -3.5364e-20,  2.0011e-20, -3.7481e-20,
         -3.0493e-20]], dtype=torch.bfloat16)

for (int i = 0; i < n; ++i) { L[i] = 0; }
return 0.f;
}
Expand Down Expand Up @@ -3021,7 +3021,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
}
float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
if (max < GROUP_MAX_EPS) {
if (fabsf(max) < GROUP_MAX_EPS) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xval contains the absolute values of the model weights, so how is this extra fabsf supposed to help?

scales[ib] = 0;
memset(L, 0, 32);
continue;
Expand Down Expand Up @@ -3197,7 +3197,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
if (max < GROUP_MAX_EPS) {
if (fabsf(max) < GROUP_MAX_EPS) {
scales[ib] = 0;
memset(L, 0, 16);
continue;
Expand Down Expand Up @@ -3638,7 +3638,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
}
float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
if (max < GROUP_MAX_EPS_IQ3_XXS) {
if (fabsf(max) < GROUP_MAX_EPS_IQ3_XXS) {
scales[ib] = 0;
memset(L, 0, 32);
continue;
Expand Down Expand Up @@ -4808,7 +4808,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
if (max < GROUP_MAX_EPS_IQ2_S) {
if (fabsf(max) < GROUP_MAX_EPS_IQ2_S) {
scales[ib] = 0;
continue;
}
Expand Down
Loading