@@ -161,7 +161,27 @@ static __device__ void quantize_f32_q8_0_block(const float * __restrict__ x, blo
161161    }
162162}
163163
164+ static  __device__  const  int8_t  iq4nl_index[241 ] = {
165+      0 ,  0 ,  0 ,  0 ,  0 ,  0 ,  0 ,  0 ,  0 ,  0 ,  0 , 16 , 16 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,  1 ,
166+      1 , 17 , 17 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 ,  2 , 18 ,  3 ,  3 ,  3 ,  3 ,  3 ,  3 ,  3 ,  3 ,  3 ,  3 ,
167+      3 ,  3 ,  3 ,  3 ,  3 ,  3 , 19 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 ,  4 , 20 ,  5 ,  5 ,  5 ,  5 ,  5 ,  5 ,  5 ,  5 ,  5 ,  5 ,
168+      5 ,  5 , 21 , 21 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 ,  6 , 22 ,  7 ,  7 ,  7 ,  7 ,  7 ,  7 ,  7 ,  7 ,  7 ,  7 , 23 , 23 ,  8 ,  8 ,  8 ,  8 ,
169+      8 ,  8 ,  8 ,  8 ,  8 ,  8 , 24 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 ,  9 , 25 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 10 , 26 , 26 ,
170+     11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 11 , 27 , 27 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 12 , 28 , 13 , 13 , 13 ,
171+     13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 13 , 29 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 , 14 ,
172+     14 , 14 , 14 , 14 , 30 , 15 , 15 , 15 , 15 , 15 , 15 , 15 , 15 , 15 , 15 , 15 , 15 
173+ };
174+ static  __device__  __forceinline__  int  best_index_iq4nl (const  int8_t  * values, float  x) {
175+     int  ix = (int )x - values[0 ];
176+     if  (ix < 0  || ix >= 241 ) return  ix < 0  ? 0  : 15 ;
177+     ix = iq4nl_index[ix];
178+     return  ix < 16  ? ix : x - values[ix-16 ] < values[ix-15 ] - x ? ix-16  : ix-15 ;
179+ }
180+ 
164181static  __device__  void  quantize_f32_iq4_nl_block (const  float  * __restrict__  x, block_iq4_nl * __restrict__  y) {
182+     //  const float * xi = (const float *) cxi;
183+     //  block_iq4_nl * dsti = (block_iq4_nl *) cdsti;
184+ 
165185    float  amax = 0 .0f ;
166186    float  vmax = 0 .0f ;
167187
@@ -176,12 +196,14 @@ static __device__ void quantize_f32_iq4_nl_block(const float * __restrict__ x, b
176196    float  d = vmax / kvalues_iq4nl[0 ];
177197    const  float  id = d ? 1 .0f /d : 0 .0f ;
178198
199+     // dsti->d = d;
200+ 
179201    float  sumqx = 0 , sumq2 = 0 ;
180202    for  (int  j = 0 ; j < QK4_NL/2 ; ++j) {
181203        const  float  x0 = x[0         + j]*id;
182204        const  float  x1 = x[QK4_NL/2  + j]*id;
183-         const  uint8_t  xi0 = best_index_int8 ( 16 ,  kvalues_iq4nl, x0);
184-         const  uint8_t  xi1 = best_index_int8 ( 16 ,  kvalues_iq4nl, x1);
205+         const  uint8_t  xi0 = best_index_iq4nl ( kvalues_iq4nl, x0);
206+         const  uint8_t  xi1 = best_index_iq4nl ( kvalues_iq4nl, x1);
185207        y->qs [j] = xi0 | (xi1 << 4 );
186208        const  float  v0 = kvalues_iq4nl[xi0];
187209        const  float  v1 = kvalues_iq4nl[xi1];
@@ -194,6 +216,41 @@ static __device__ void quantize_f32_iq4_nl_block(const float * __restrict__ x, b
194216    y->d  = sumq2 > 0  ? sumqx/sumq2 : d;
195217}
196218
219+ static  __device__  void  quantize_f32_q6_0_block (const  float  * __restrict__  x, block_q6_0 * __restrict__  y) {
220+     //  const float * xi = (const float *) cxi;
221+     //  block_q6_0 * dsti = (block_q6_0 *) cdsti;
222+ 
223+     float  amax = 0 .0f ;
224+     float  vmax = 0 .0f ;
225+ 
226+     for  (int  j = 0 ; j < QK6_0; ++j) {
227+         const  float  v  = x[j];
228+         const  float  av = fabsf (x[j]);
229+         if  (amax < av) {
230+             amax = av;
231+             vmax = v;
232+         }
233+     }
234+ 
235+     const  float  d  = vmax / -32 ;
236+     const  float  id = d ? 1 .0f /d : 0 .0f ;
237+ 
238+     y->d  = d;
239+     memset (y->qh , 0 , QK6_0/4 );
240+ 
241+     for  (int  j = 0 ; j < QK6_0/2 ; ++j) {
242+         const  float  x0 = x[0        + j]*id;
243+         const  float  x1 = x[QK4_0/2  + j]*id;
244+ 
245+         const  uint8_t  xi0 = min (63 , (int8_t )(x0 + 32 .5f ));
246+         const  uint8_t  xi1 = min (63 , (int8_t )(x1 + 32 .5f ));
247+ 
248+         y->qs [j]  = (xi0 & 0xf ) | ((xi1 & 0xf ) << 4 );
249+         const  uint8_t  h = (xi0 >> 4 ) | ((xi1 >> 4 ) << 2 );
250+         y->qh [j%(QK6_0/4 )] |= (h << 4 *(j/(QK6_0/4 )));
251+     }
252+ }
253+ 
197254//  Wrapper functions for cpy.cu compatibility
198255static  __device__  void  cpy_blck_f32_q4_0 (const  char  * cxi, char  * cdsti) {
199256    quantize_f32_q4_0_block ((const  float  *)cxi, (block_q4_0 *)cdsti);
@@ -211,6 +268,10 @@ static __device__ void cpy_blck_f32_q5_1(const char * cxi, char * cdsti) {
211268    quantize_f32_q5_1_block ((const  float  *)cxi, (block_q5_1 *)cdsti);
212269}
213270
271+ static  __device__  void  cpy_blck_f32_q6_0 (const  char  * cxi, char  * cdsti) {
272+     quantize_f32_q6_0_block ((const  float  *)cxi, (block_q6_0 *)cdsti);
273+ }
274+ 
214275static  __device__  void  cpy_blck_f32_q8_0 (const  char  * cxi, char  * cdsti) {
215276    quantize_f32_q8_0_block ((const  float  *)cxi, (block_q8_0 *)cdsti);
216277}
0 commit comments