ggml : fix more imatrix nan cases #11773

ikawrakow · 2025-02-10T08:23:13Z

Didn't we already use fabsf just above. How is this extra fabsf supposed to help?

compilade · 2025-04-29T14:19:31Z

This is the one line change which fixes the problem in #12439. The minimal fix I tried is the following:

diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index ac918a60..aac8b120 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -829,7 +829,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint for (int i = 0; i < n; ++i) { max = MAX(max, x[i]); } - if (!max) { // all zero + if (max < GROUP_MAX_EPS) { // all zero for (int i = 0; i < n; ++i) { L[i] = 0; } return 0.f; }

The added fabsf is not necessary here either since max can't be smaller than 0 due to how it's initialized.

The problem was caused by very small amplitudes of the model weights in the tensor model.layers.42.self_attn.k_proj.weight, which caused problems when quantizing the sub-block scales. I do not know if this is a general fix, but it does seem to help.

Here are the contents of the problematic blocks

>>> m["model.layers.42.self_attn.k_proj.weight"].reshape(-1, 256)[20].reshape(-1, 16) tensor([[ 1.4400e-20, -4.8916e-20, 2.3505e-20, 5.9716e-20, 1.0122e-19, 6.1145e-21, 4.6163e-20, 7.0410e-21, -4.4681e-20, -2.0382e-21, 3.9387e-20, -1.4908e-19, -8.8515e-20, 5.8869e-20, 3.1552e-20, -1.6080e-21], [-1.8317e-20, -3.9387e-20, -2.3611e-20, 1.6411e-20, 4.0869e-20, 1.7682e-20, 3.1128e-20, -4.5528e-20, -1.6835e-20, -3.7693e-20, -2.1705e-20, -4.0022e-20, 3.2161e-21, 6.0986e-20, -4.3410e-20, 3.7058e-20], [-5.2940e-20, -6.5222e-20, 4.5528e-20, 6.6969e-21, 1.0853e-20, 1.4505e-20, -2.8376e-20, 3.0917e-20, 4.9340e-20, 1.0257e-21, 3.3246e-20, 3.1128e-20, -9.2750e-20, 2.5517e-20, -4.5952e-20, -3.3881e-20], [-2.7317e-20, 8.6821e-20, 3.4940e-20, -4.4469e-20, -2.8164e-20, -2.9011e-20, -4.5740e-20, -2.5729e-20, -5.8445e-20, -7.6656e-20, -4.3675e-21, -3.5787e-20, 1.2441e-20, -3.1896e-21, 4.8281e-20, 3.9599e-20], [ 3.0281e-20, 4.5528e-20, 1.0164e-20, 4.3675e-21, -2.1705e-21, 1.0522e-21, 5.0610e-20, 2.0011e-20, -2.4670e-20, 1.4188e-20, -6.0351e-21, 2.3823e-20, 1.9588e-20, 3.5787e-20, 6.6704e-21, -5.5057e-20], [ 8.2056e-21, -1.6411e-20, -6.0563e-20, 4.0532e-22, -3.2823e-20, 6.5645e-20, -7.6762e-21, 2.3611e-20, -1.9164e-20, 3.9387e-20, 3.9811e-20, -4.2521e-19, -3.5364e-20, -1.3500e-20, -2.9646e-20, 6.0563e-20], [-3.7905e-20, -7.9198e-20, -5.9292e-20, -6.7763e-21, -3.0493e-20, -1.6200e-20, 1.0800e-20, -1.8529e-20, -6.5645e-20, -2.2658e-20, 4.9975e-20, -2.2976e-20, -1.7258e-20, -5.0557e-21, -1.6941e-20, 1.0249e-19], [-7.0939e-21, -1.6094e-20, 3.4093e-20, 5.3204e-21, 2.3399e-20, -1.9905e-20, -1.9694e-20, 1.0482e-20, -5.9292e-20, 4.9763e-20, -2.9858e-20, 5.5904e-20, 1.1435e-20, 3.7269e-20, -5.6116e-21, 4.9128e-20], [-1.4400e-20, 1.6094e-20, -2.1176e-21, -1.0588e-20, -1.1488e-20, 1.4717e-20, -4.1505e-20, 2.1493e-20, 3.4940e-20, 1.2451e-19, 4.3940e-21, 6.4375e-20, -3.4517e-20, 5.5904e-20, -2.9911e-21, -6.7339e-20], [ 4.2775e-20, 4.4046e-20, -3.4093e-20, -2.3082e-20, -2.3399e-20, 4.1505e-20, 2.0541e-20, 3.2611e-20, 2.6258e-20, -4.5740e-20, 2.6682e-20, 1.5670e-20, -2.5517e-20, 5.4634e-20, -5.5481e-20, 1.6517e-19], [ 1.6200e-20, -1.0588e-20, 8.2586e-21, 3.3458e-20, 4.5105e-20, -1.4294e-20, -2.1705e-20, 3.9387e-20, -1.5776e-20, -3.5152e-20, 3.0705e-20, -7.0410e-21, -2.0646e-21, -1.4393e-22, -8.7668e-20, 3.4940e-20], [ 6.7763e-20, 4.4893e-20, -5.2304e-20, 9.6138e-20, -9.8997e-21, -3.3246e-20, -2.1282e-20, 3.9811e-20, 3.8540e-20, -1.5247e-20, 5.9028e-21, 3.3670e-20, 3.5999e-20, -2.6046e-20, 2.3929e-20, -1.1858e-19], [ 5.5904e-20, 2.4352e-20, 2.8460e-19, -1.8529e-21, -1.9588e-21, 2.9011e-20, -2.0858e-20, 7.7292e-21, -6.6492e-20, 3.7905e-20, -2.7740e-20, 4.5740e-20, -3.1764e-20, 3.7481e-20, -2.6867e-21, -1.7999e-20], [-1.0641e-20, -3.8752e-20, 2.6073e-21, 3.6846e-20, 3.3484e-21, -1.5776e-20, 1.7470e-20, 3.1340e-20, -5.3363e-20, 6.0616e-21, -3.5205e-21, -3.4146e-21, -1.6094e-20, 1.1276e-20, -4.0863e-22, -5.1881e-20], [ 6.0616e-21, -2.3293e-20, -1.8846e-20, 3.5787e-20, -1.4929e-20, -4.6799e-20, -1.0006e-20, -1.1911e-22, -2.0117e-20, -1.7258e-20, 8.2056e-21, 1.9270e-20, 4.9340e-20, -1.4611e-20, 1.4717e-20, 3.4305e-20], [-4.9234e-21, 1.0588e-19, -4.0446e-20, -3.0917e-20, -5.2516e-20, 9.4232e-21, 4.5105e-20, -5.2940e-20, 3.9175e-20, -4.8069e-20, -1.2494e-20, -6.8186e-20, 2.3082e-20, -1.4691e-21, -1.3129e-20, 5.9716e-20]], dtype=torch.bfloat16) >>> m["model.layers.42.self_attn.k_proj.weight"].reshape(-1, 256)[40].reshape(-1, 16) tensor([[-6.3527e-21, 7.5809e-20, -2.6787e-20, -5.9292e-20, -1.1435e-19, -3.9811e-20, -9.8997e-21, -3.7693e-20, 7.7080e-20, -1.5458e-20, -6.6969e-21, 1.2790e-19, 7.9198e-20, -5.6751e-20, 2.2764e-20, -5.1881e-21], [ 3.8116e-20, 5.6328e-20, -8.4703e-22, 2.1388e-20, -5.2728e-20, -2.3082e-20, 3.1631e-21, 4.7315e-22, 6.2257e-20, -2.4035e-20, -4.9975e-20, 7.0304e-20, -5.9143e-23, -1.1096e-19, 4.5740e-20, -1.8211e-20], [ 5.4634e-20, 3.8540e-20, -5.6751e-20, -4.4046e-20, -7.8880e-21, -4.6587e-20, 2.0541e-20, 3.1975e-20, -2.7317e-20, 2.9646e-20, -1.5776e-20, -3.3034e-20, 8.8515e-20, -1.6729e-20, 7.4539e-20, -2.0382e-21], [ 4.5740e-20, -9.4021e-20, -1.2335e-20, 4.8175e-21, 4.6587e-20, 3.5787e-20, 6.6069e-20, -2.0858e-20, 4.9340e-20, 8.4280e-20, -5.2940e-20, 4.1293e-20, 3.5364e-20, -8.3645e-21, -1.5948e-21, -1.7788e-20], [-3.4093e-20, -2.9858e-20, -2.0646e-20, 2.9223e-20, 6.2998e-21, -2.0435e-20, -4.6057e-21, 3.7058e-20, 2.7317e-20, -3.9387e-20, 2.0541e-20, -6.5222e-20, -2.0329e-20, 9.3174e-21, -3.1340e-20, 1.5352e-20], [ 5.7439e-21, 1.6305e-20, 4.9551e-20, -2.6576e-20, 6.3951e-20, -3.1340e-20, 8.9336e-22, 1.2904e-21, 5.1087e-21, 3.7905e-20, -1.0694e-20, -1.0910e-18, 1.4188e-20, -5.2145e-21, 2.7105e-20, -4.5105e-20], [ 2.6576e-20, 5.0028e-21, 6.2680e-20, 3.3458e-20, -2.4352e-20, 9.4232e-21, 9.0527e-21, 8.1527e-21, 4.1928e-20, -1.1435e-20, -5.2304e-20, 3.3246e-20, -3.1128e-20, -2.3929e-20, 2.8799e-20, 1.8127e-19], [ 3.5787e-20, -9.3174e-21, -2.1043e-21, -3.2823e-20, 2.4749e-21, -1.7073e-21, -1.5670e-20, -2.7529e-20, 6.0986e-20, -4.9551e-20, 6.6492e-20, -7.4115e-21, -3.7269e-20, -3.8328e-20, -3.7058e-21, -2.1917e-20], [ 1.0747e-20, -4.1293e-20, -1.9376e-20, 4.1505e-20, 3.8381e-21, -1.4188e-20, 3.4093e-20, 1.8211e-20, 2.9117e-21, 1.8529e-21, 1.2917e-20, -7.4115e-20, 1.1689e-19, -2.1388e-20, -1.9588e-20, 8.7244e-20], [ 6.7498e-21, -2.1599e-20, 5.0187e-20, 4.8281e-20, -1.3976e-20, -3.4517e-20, -3.0070e-20, -1.7047e-20, -9.2115e-21, 4.1928e-20, -6.8186e-20, -9.2115e-21, -2.4246e-20, -4.1081e-20, 9.1480e-20, -1.7364e-19], [ 1.4400e-20, 3.7322e-21, -2.4776e-20, -2.8164e-20, -5.9716e-20, 1.3698e-21, -3.5364e-20, -9.9526e-20, 2.3293e-20, 8.5762e-21, -2.1493e-20, 1.2335e-20, 9.8468e-21, 9.5291e-21, 1.0164e-19, -9.4444e-20], [-8.6821e-20, -2.5623e-20, 4.7646e-20, -5.5057e-20, -5.1034e-20, -5.9888e-22, -2.7105e-20, -2.9223e-20, -2.2129e-20, -3.9969e-21, -6.4057e-21, -3.2823e-20, -2.5940e-20, 3.4093e-20, -6.8186e-20, 1.1096e-19], [-1.3976e-20, -1.9482e-20, -1.9397e-19, -6.4798e-20, 2.2552e-20, -4.7434e-20, 1.1858e-20, -8.4280e-20, 4.1081e-20, -5.2728e-20, 6.9033e-20, -1.6623e-20, 3.4305e-20, -3.3458e-20, 5.2940e-20, 4.6163e-20], [ 2.1282e-20, 3.8540e-20, 2.3929e-20, -1.0853e-20, -5.3787e-20, -1.0059e-21, -2.1282e-20, -2.4564e-20, 2.0011e-20, -1.4228e-21, -2.9646e-20, -4.7646e-20, -2.6258e-20, 1.7258e-20, 2.2003e-22, 7.7927e-20], [ 2.9858e-20, -1.0217e-20, 5.2093e-20, -1.9799e-20, 1.0747e-20, 5.9292e-20, -4.6587e-21, 2.5940e-20, 4.0446e-20, 6.9351e-21, 5.1087e-21, -2.3188e-20, -4.9551e-20, -1.3870e-20, -1.8105e-20, -1.8529e-20], [ 1.7788e-20, -8.2162e-20, -1.9058e-20, 3.0917e-20, 1.9376e-20, -7.6762e-21, -1.1117e-20, 8.6291e-21, -4.1716e-20, 1.0503e-19, 4.9234e-21, 5.5057e-20, -3.5364e-20, 2.0011e-20, -3.7481e-20, -3.0493e-20]], dtype=torch.bfloat16)

ikawrakow · 2025-02-10T08:26:08Z

xval contains the absolute values of the model weights, so how is this extra fabsf supposed to help?

-Original file line number
+Diff line change
@@ Expand Up @@
             float ax = fabsf(x[i]);
             if (ax > amax) { amax = ax; max = x[i]; }
         }
-        if (amax < GROUP_MAX_EPS) { // all zero
+        if (fabsf(amax) < GROUP_MAX_EPS) { // all zero
             for (int i = 0; i < n; ++i) {
                 L[i] = 0;
             }
@@ Expand Down Expand Up @@
         for (int i = 0; i < n; ++i) {
             max = MAX(max, x[i]);
         }
-        if (!max) { // all zero
+        if (fabsf(max) < GROUP_MAX_EPS) { // all zero
             for (int i = 0; i < n; ++i) { L[i] = 0; }
             return 0.f;
         }
@@ Expand Down Expand Up @@
                 }
                 float max = xval[0];
                 for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
-                if (max < GROUP_MAX_EPS) {
+                if (fabsf(max) < GROUP_MAX_EPS) {
                     scales[ib] = 0;
                     memset(L, 0, 32);
                     continue;
@@ Expand Down Expand Up @@
                 }
                 float max = xval[0];
                 for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
-                if (max < GROUP_MAX_EPS) {
+                if (fabsf(max) < GROUP_MAX_EPS) {
                     scales[ib] = 0;
                     memset(L, 0, 16);
                     continue;
@@ Expand Down Expand Up @@
                 }
                 float max = xval[0];
                 for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
-                if (max < GROUP_MAX_EPS_IQ3_XXS) {
+                if (fabsf(max) < GROUP_MAX_EPS_IQ3_XXS) {
                     scales[ib] = 0;
                     memset(L, 0, 32);
                     continue;
@@ Expand Down Expand Up @@
                 }
                 float max = xval[0];
                 for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
-                if (max < GROUP_MAX_EPS_IQ2_S) {
+                if (fabsf(max) < GROUP_MAX_EPS_IQ2_S) {
                     scales[ib] = 0;
                     continue;
                 }
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

ggml : fix more imatrix nan cases #11773

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ikawrakow Feb 10, 2025

Uh oh!

compilade Apr 29, 2025

Uh oh!

ikawrakow Feb 10, 2025

Uh oh!

Uh oh!

ggml : fix more imatrix nan cases #11773

Uh oh!

ggml : fix more imatrix nan cases #11773

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ikawrakow Feb 10, 2025

Choose a reason for hiding this comment

Uh oh!

compilade Apr 29, 2025

Choose a reason for hiding this comment

Uh oh!

ikawrakow Feb 10, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!