@@ -751,8 +751,8 @@ static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * r
751751 y [i ].d = d ;
752752
753753 for (int j = 0 ; j < qk /2 ; ++ j ) {
754- const float x0 = x [i * qk + 0 + j ]* id ;
755- const float x1 = x [i * qk + qk / 2 + j ]* id ;
754+ const float x0 = x [i * qk + 2 * j + 0 ]* id ;
755+ const float x1 = x [i * qk + 2 * j + 1 ]* id ;
756756
757757 const uint8_t xi0 = MIN (15 , (int8_t )(x0 + 8.5f ));
758758 const uint8_t xi1 = MIN (15 , (int8_t )(x1 + 8.5f ));
@@ -792,8 +792,8 @@ static void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * r
792792 y [i ].m = min ;
793793
794794 for (int j = 0 ; j < qk /2 ; ++ j ) {
795- const float x0 = (x [i * qk + 0 + j ] - min )* id ;
796- const float x1 = (x [i * qk + qk / 2 + j ] - min )* id ;
795+ const float x0 = (x [i * qk + 2 * j + 0 ] - min )* id ;
796+ const float x1 = (x [i * qk + 2 * j + 1 ] - min )* id ;
797797
798798 const uint8_t xi0 = MIN (15 , (int8_t )(x0 + 0.5f ));
799799 const uint8_t xi1 = MIN (15 , (int8_t )(x1 + 0.5f ));
@@ -835,8 +835,8 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
835835 uint32_t qh = 0 ;
836836
837837 for (int j = 0 ; j < qk /2 ; ++ j ) {
838- const float x0 = x [i * qk + 0 + j ]* id ;
839- const float x1 = x [i * qk + qk / 2 + j ]* id ;
838+ const float x0 = x [i * qk + 2 * j + 0 ]* id ;
839+ const float x1 = x [i * qk + 2 * j + 1 ]* id ;
840840
841841 const uint8_t xi0 = MIN (31 , (int8_t )(x0 + 16.5f ));
842842 const uint8_t xi1 = MIN (31 , (int8_t )(x1 + 16.5f ));
@@ -883,8 +883,8 @@ static void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * r
883883 uint32_t qh = 0 ;
884884
885885 for (int j = 0 ; j < qk /2 ; ++ j ) {
886- const float x0 = (x [i * qk + 0 + j ] - min )* id ;
887- const float x1 = (x [i * qk + qk / 2 + j ] - min )* id ;
886+ const float x0 = (x [i * qk + 2 * j + 0 ] - min )* id ;
887+ const float x1 = (x [i * qk + 2 * j + 1 ] - min )* id ;
888888
889889 const uint8_t xi0 = (uint8_t )(x0 + 0.5f );
890890 const uint8_t xi1 = (uint8_t )(x1 + 0.5f );
@@ -922,10 +922,12 @@ static void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * r
922922
923923 y [i ].d = d ;
924924
925- for (int j = 0 ; j < QK8_0 ; ++ j ) {
926- const float v0 = x [i * QK8_0 + j ]* id ;
925+ for (int j = 0 ; j < QK8_0 /2 ; ++ j ) {
926+ const float v0 = x [i * QK8_0 + 2 * j + 0 ]* id ;
927+ const float v1 = x [i * QK8_0 + 2 * j + 1 ]* id ;
927928
928- y [i ].qs [j ] = roundf (v0 );
929+ y [i ].qs [ j ] = v0 + 0.5f ;
930+ y [i ].qs [QK8_0 /2 + j ] = v1 + 0.5f ;
929931 }
930932 }
931933}
@@ -943,12 +945,12 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
943945 float32x4_t asrcv [8 ];
944946 float32x4_t amaxv [8 ];
945947
946- for (int l = 0 ; l < 8 ; l ++ ) srcv [l ] = vld1q_f32 (x + i * 32 + 4 * l );
947- for (int l = 0 ; l < 8 ; l ++ ) asrcv [l ] = vabsq_f32 (srcv [l ]);
948+ for (int j = 0 ; j < 8 ; j ++ ) srcv [j ] = vld1q_f32 (x + i * 32 + 4 * j );
949+ for (int j = 0 ; j < 8 ; j ++ ) asrcv [j ] = vabsq_f32 (srcv [j ]);
948950
949- for (int l = 0 ; l < 4 ; l ++ ) amaxv [2 * l ] = vmaxq_f32 (asrcv [2 * l ], asrcv [2 * l + 1 ]);
950- for (int l = 0 ; l < 2 ; l ++ ) amaxv [4 * l ] = vmaxq_f32 (amaxv [4 * l ], amaxv [4 * l + 2 ]);
951- for (int l = 0 ; l < 1 ; l ++ ) amaxv [8 * l ] = vmaxq_f32 (amaxv [8 * l ], amaxv [8 * l + 4 ]);
951+ for (int j = 0 ; j < 4 ; j ++ ) amaxv [2 * j ] = vmaxq_f32 (asrcv [2 * j ], asrcv [2 * j + 1 ]);
952+ for (int j = 0 ; j < 2 ; j ++ ) amaxv [4 * j ] = vmaxq_f32 (amaxv [4 * j ], amaxv [4 * j + 2 ]);
953+ for (int j = 0 ; j < 1 ; j ++ ) amaxv [8 * j ] = vmaxq_f32 (amaxv [8 * j ], amaxv [8 * j + 4 ]);
952954
953955 const float amax = vmaxvq_f32 (amaxv [0 ]);
954956
@@ -957,14 +959,14 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
957959
958960 y [i ].d = d ;
959961
960- for (int l = 0 ; l < 8 ; l ++ ) {
961- const float32x4_t v = vmulq_n_f32 (srcv [l ], id );
962+ for (int j = 0 ; j < 8 ; j ++ ) {
963+ const float32x4_t v = vmulq_n_f32 (srcv [j ], id );
962964 const int32x4_t vi = vcvtnq_s32_f32 (v );
963965
964- y [i ].qs [4 * l + 0 ] = vgetq_lane_s32 (vi , 0 );
965- y [i ].qs [4 * l + 1 ] = vgetq_lane_s32 (vi , 1 );
966- y [i ].qs [4 * l + 2 ] = vgetq_lane_s32 (vi , 2 );
967- y [i ].qs [4 * l + 3 ] = vgetq_lane_s32 (vi , 3 );
966+ y [i ].qs [ 2 * j + 0 ] = vgetq_lane_s32 (vi , 0 );
967+ y [i ].qs [16 + 2 * j + 0 ] = vgetq_lane_s32 (vi , 1 );
968+ y [i ].qs [ 2 * j + 1 ] = vgetq_lane_s32 (vi , 2 );
969+ y [i ].qs [16 + 2 * j + 1 ] = vgetq_lane_s32 (vi , 3 );
968970 }
969971 }
970972#elif defined(__AVX2__ ) || defined(__AVX__ )
@@ -1080,11 +1082,11 @@ static void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * r
10801082 int sum1 = 0 ;
10811083
10821084 for (int j = 0 ; j < QK8_1 /2 ; ++ j ) {
1083- const float v0 = x [i * QK8_1 + j ]* id ;
1084- const float v1 = x [i * QK8_1 + QK8_1 / 2 + j ]* id ;
1085+ const float v0 = x [i * QK8_1 + 2 * j + 0 ]* id ;
1086+ const float v1 = x [i * QK8_1 + 2 * j + 1 ]* id ;
10851087
1086- y [i ].qs [ j ] = roundf ( v0 ) ;
1087- y [i ].qs [QK8_1 /2 + j ] = roundf ( v1 ) ;
1088+ y [i ].qs [ j ] = v0 + 0.5f ;
1089+ y [i ].qs [QK8_1 /2 + j ] = v1 + 0.5f ;
10881090
10891091 sum0 += y [i ].qs [ j ];
10901092 sum1 += y [i ].qs [QK8_1 /2 + j ];
@@ -1129,10 +1131,10 @@ static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int
11291131 const float32x4_t v = vmulq_n_f32 (srcv [j ], id );
11301132 const int32x4_t vi = vcvtnq_s32_f32 (v );
11311133
1132- y [i ].qs [4 * j + 0 ] = vgetq_lane_s32 (vi , 0 );
1133- y [i ].qs [4 * j + 1 ] = vgetq_lane_s32 (vi , 1 );
1134- y [i ].qs [4 * j + 2 ] = vgetq_lane_s32 (vi , 2 );
1135- y [i ].qs [4 * j + 3 ] = vgetq_lane_s32 (vi , 3 );
1134+ y [i ].qs [ 2 * j + 0 ] = vgetq_lane_s32 (vi , 0 );
1135+ y [i ].qs [16 + 2 * j + 0 ] = vgetq_lane_s32 (vi , 1 );
1136+ y [i ].qs [ 2 * j + 1 ] = vgetq_lane_s32 (vi , 2 );
1137+ y [i ].qs [16 + 2 * j + 1 ] = vgetq_lane_s32 (vi , 3 );
11361138
11371139 accv0 = vaddq_s32 (accv0 , vi );
11381140 }
@@ -1142,10 +1144,10 @@ static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int
11421144 const float32x4_t v = vmulq_n_f32 (srcv [j ], id );
11431145 const int32x4_t vi = vcvtnq_s32_f32 (v );
11441146
1145- y [i ].qs [4 * j + 0 ] = vgetq_lane_s32 (vi , 0 );
1146- y [i ].qs [4 * j + 1 ] = vgetq_lane_s32 (vi , 1 );
1147- y [i ].qs [4 * j + 2 ] = vgetq_lane_s32 (vi , 2 );
1148- y [i ].qs [4 * j + 3 ] = vgetq_lane_s32 (vi , 3 );
1147+ y [i ].qs [ 2 * j + 0 ] = vgetq_lane_s32 (vi , 0 );
1148+ y [i ].qs [16 + 2 * j + 0 ] = vgetq_lane_s32 (vi , 1 );
1149+ y [i ].qs [ 2 * j + 1 ] = vgetq_lane_s32 (vi , 2 );
1150+ y [i ].qs [16 + 2 * j + 1 ] = vgetq_lane_s32 (vi , 3 );
11491151
11501152 accv1 = vaddq_s32 (accv1 , vi );
11511153 }
@@ -1271,8 +1273,8 @@ static void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict
12711273 const int x0 = (x [i ].qs [j ] & 0x0F ) - 8 ;
12721274 const int x1 = (x [i ].qs [j ] >> 4 ) - 8 ;
12731275
1274- y [i * qk + j + 0 ] = x0 * d ;
1275- y [i * qk + j + qk / 2 ] = x1 * d ;
1276+ y [i * qk + 2 * j + 0 ] = x0 * d ;
1277+ y [i * qk + 2 * j + 1 ] = x1 * d ;
12761278 }
12771279 }
12781280}
@@ -1292,8 +1294,8 @@ static void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict
12921294 const int x0 = (x [i ].qs [j ] & 0x0F );
12931295 const int x1 = (x [i ].qs [j ] >> 4 );
12941296
1295- y [i * qk + j + 0 ] = x0 * d + m ;
1296- y [i * qk + j + qk / 2 ] = x1 * d + m ;
1297+ y [i * qk + 2 * j + 0 ] = x0 * d + m ;
1298+ y [i * qk + 2 * j + 1 ] = x1 * d + m ;
12971299 }
12981300 }
12991301}
@@ -1318,8 +1320,8 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
13181320 const int32_t x0 = ((x [i ].qs [j ] & 0x0F ) | xh_0 ) - 16 ;
13191321 const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
13201322
1321- y [i * qk + j + 0 ] = x0 * d ;
1322- y [i * qk + j + qk / 2 ] = x1 * d ;
1323+ y [i * qk + 2 * j + 0 ] = x0 * d ;
1324+ y [i * qk + 2 * j + 1 ] = x1 * d ;
13231325 }
13241326 }
13251327}
@@ -1345,8 +1347,8 @@ static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict
13451347 const int x0 = (x [i ].qs [j ] & 0x0F ) | xh_0 ;
13461348 const int x1 = (x [i ].qs [j ] >> 4 ) | xh_1 ;
13471349
1348- y [i * qk + j + 0 ] = x0 * d + m ;
1349- y [i * qk + j + qk / 2 ] = x1 * d + m ;
1350+ y [i * qk + 2 * j + 0 ] = x0 * d + m ;
1351+ y [i * qk + 2 * j + 1 ] = x1 * d + m ;
13501352 }
13511353 }
13521354}
@@ -1363,8 +1365,9 @@ static void dequantize_row_q8_0(const void * restrict vx, float * restrict y, in
13631365 for (int i = 0 ; i < nb ; i ++ ) {
13641366 const float d = x [i ].d ;
13651367
1366- for (int j = 0 ; j < qk ; ++ j ) {
1367- y [i * qk + j ] = x [i ].qs [j ]* d ;
1368+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1369+ y [i * qk + 2 * j + 0 ] = x [i ].qs [j + 0 ]* d ;
1370+ y [i * qk + 2 * j + 1 ] = x [i ].qs [j + qk /2 ]* d ;
13681371 }
13691372 }
13701373}
0 commit comments