2020
2121// Functions to create the interleaved data layout formats
2222
23- // interleave 4 block_q4_0s in blocks of interleave_blcksize
23+ // interleave 4 block_q4_0s in blocks of blck_size_interleave
2424// returns an interleaved block_q4_0x4
2525// in the interleaved block_q4_0x4, place deltas for 4 block_q4_0 blocks
26- // first, then interleave quants from 4 block_q4_0s in blocks of interleave_blcksize
26+ // first, then interleave quants from 4 block_q4_0s in blocks of blck_size_interleave
2727//
2828// - in : an array of block_q4_0 pointers
29- // - interleave_blcksize : the block_q4_0 quants bytes are interleaved in blocks of
30- // interleave_blcksize bytes
29+ // - blck_size_interleave : the block_q4_0 quants bytes are interleaved in blocks of
30+ // blck_size_interleave bytes
3131// - xor_mask : the mask to convert the nibbles in block_q4_0 quants bytes
3232// from bias offset form to pure sign form (this saves subtract
3333// operations durin unpacking)
3434//
35- static block_q4_0x4 make_block_q4_0x4 (block_q4_0 * in , unsigned int interleave_blcksize , unsigned int xor_mask ) {
35+ static block_q4_0x4 make_block_q4_0x4 (block_q4_0 * in , unsigned int blck_size_interleave , unsigned int xor_mask ) {
3636 block_q4_0x4 out ;
3737
3838 for (int i = 0 ; i < 4 ; i ++ ) {
3939 out .d [i ] = in [i ].d ;
4040 }
4141
4242 for (int i = 0 ; i < QK4_0 * 2 ; i ++ ) {
43- int src_offset = (i / (4 * interleave_blcksize )) * interleave_blcksize ;
44- int src_id = (i % (4 * interleave_blcksize )) / interleave_blcksize ;
45- src_offset += (i % interleave_blcksize );
43+ int src_offset = (i / (4 * blck_size_interleave )) * blck_size_interleave ;
44+ int src_id = (i % (4 * blck_size_interleave )) / blck_size_interleave ;
45+ src_offset += (i % blck_size_interleave );
4646
4747 out .qs [i ] = in [src_id ].qs [src_offset ] ^ xor_mask ;
4848 }
4949
5050 return out ;
5151}
5252
53- // interleave 8 block_q4_0s in blocks of interleave_blcksize
53+ // interleave 8 block_q4_0s in blocks of blck_size_interleave
5454// returns an interleaved block_q4_0x8
5555// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
56- // first, then interleave quants from 8 block_q4_0s in blocks of interleave_blcksize
57- static block_q4_0x8 make_block_q4_0x8 (block_q4_0 * in , unsigned int interleave_blcksize , unsigned int xor_mask ) {
56+ // first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
57+ static block_q4_0x8 make_block_q4_0x8 (block_q4_0 * in , unsigned int blck_size_interleave , unsigned int xor_mask ) {
5858 block_q4_0x8 out ;
5959
6060 for (int i = 0 ; i < 8 ; i ++ ) {
6161 out .d [i ] = in [i ].d ;
6262 }
6363
6464 for (int i = 0 ; i < QK4_0 * 4 ; i ++ ) {
65- int src_offset = (i / (8 * interleave_blcksize )) * interleave_blcksize ;
66- int src_id = (i % (8 * interleave_blcksize )) / interleave_blcksize ;
67- src_offset += (i % interleave_blcksize );
65+ int src_offset = (i / (8 * blck_size_interleave )) * blck_size_interleave ;
66+ int src_id = (i % (8 * blck_size_interleave )) / blck_size_interleave ;
67+ src_offset += (i % blck_size_interleave );
6868
6969 out .qs [i ] = in [src_id ].qs [src_offset ] ^ xor_mask ;
7070 }
@@ -135,7 +135,7 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
135135 }
136136#else
137137 // scalar
138- const int interleave_blcksize = 4 ;
138+ const int blck_size_interleave = 4 ;
139139 float srcv [4 ][QK8_0 ];
140140 float id [4 ];
141141
@@ -155,12 +155,12 @@ void quantize_q8_0_4x4(const float * restrict x, void * restrict vy, int64_t k)
155155 }
156156
157157 for (int j = 0 ; j < QK8_0 * 4 ; j ++ ) {
158- int src_offset = (j / (4 * interleave_blcksize )) * interleave_blcksize ;
159- int src_id = (j % (4 * interleave_blcksize )) / interleave_blcksize ;
160- src_offset += (j % interleave_blcksize );
158+ int src_offset = (j / (4 * blck_size_interleave )) * blck_size_interleave ;
159+ int src_id = (j % (4 * blck_size_interleave )) / blck_size_interleave ;
160+ src_offset += (j % blck_size_interleave );
161161
162162 float x0 = srcv [src_id ][src_offset ] * id [src_id ];
163- y [i ].qs [j ] = roundf (x0 );;
163+ y [i ].qs [j ] = roundf (x0 );
164164 }
165165 }
166166#endif
@@ -253,7 +253,7 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
253253 }
254254#else
255255 // scalar
256- const int interleave_blcksize = 8 ;
256+ const int blck_size_interleave = 8 ;
257257 float srcv [4 ][QK8_0 ];
258258 float id [4 ];
259259
@@ -273,26 +273,30 @@ void quantize_q8_0_4x8(const float * restrict x, void * restrict vy, int64_t k)
273273 }
274274
275275 for (int j = 0 ; j < QK8_0 * 4 ; j ++ ) {
276- int src_offset = (j / (4 * interleave_blcksize )) * interleave_blcksize ;
277- int src_id = (j % (4 * interleave_blcksize )) / interleave_blcksize ;
278- src_offset += (j % interleave_blcksize );
276+ int src_offset = (j / (4 * blck_size_interleave )) * blck_size_interleave ;
277+ int src_id = (j % (4 * blck_size_interleave )) / blck_size_interleave ;
278+ src_offset += (j % blck_size_interleave );
279279
280280 float x0 = srcv [src_id ][src_offset ] * id [src_id ];
281- y [i ].qs [j ] = roundf (x0 );;
281+ y [i ].qs [j ] = roundf (x0 );
282282 }
283283 }
284284#endif
285285}
286286
287- void quantize_mat_q8_0 (const float * restrict x , void * restrict vy , int64_t nrow , int64_t n_per_row , int64_t interleave_blcksize ) {
287+ void quantize_mat_q8_0 (const float * restrict x , void * restrict vy , int64_t nrow , int64_t n_per_row , int64_t blck_size_interleave ) {
288288 assert (nrow == 4 );
289289 UNUSED (nrow );
290- if (interleave_blcksize == 4 ) quantize_q8_0_4x4 (x , vy , n_per_row );
291- else if (interleave_blcksize == 8 ) quantize_q8_0_4x8 (x , vy , n_per_row );
292- else assert (false);
290+ if (blck_size_interleave == 4 ) {
291+ quantize_q8_0_4x4 (x , vy , n_per_row );
292+ } else if (blck_size_interleave == 8 ) {
293+ quantize_q8_0_4x8 (x , vy , n_per_row );
294+ } else {
295+ assert (false);
296+ }
293297}
294298
295- static size_t quantize_q4_0_nr_bl (const float * restrict src , void * restrict dst , int64_t nrow , int64_t n_per_row , int nrows_interleaved , int interleave_blcksize ) {
299+ static size_t quantize_q4_0_nr_bl (const float * restrict src , void * restrict dst , int64_t nrow , int64_t n_per_row , int nrows_interleaved , int blck_size_interleave ) {
296300 assert (n_per_row % QK4_0 == 0 );
297301 const int nb = n_per_row / QK4_0 ;
298302
@@ -311,15 +315,15 @@ static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict ds
311315 for (int64_t x = 0 ; x < nb ; x ++ ) {
312316
313317 for (int i = 0 ; i < nrows_interleaved ; i ++ ) {
314- quantize_row_q4_0_reference (src + b + i * n_per_row + x * QK4_0 , (block_q4_0 * ) dst_tmp + i , QK4_0 );
318+ quantize_row_q4_0_ref (src + b + i * n_per_row + x * QK4_0 , (block_q4_0 * ) dst_tmp + i , QK4_0 );
315319 }
316320
317321 if (nrows_interleaved == 8 ) {
318- * (block_q4_0x8 * ) out_ptr = make_block_q4_0x8 (dst_tmp , interleave_blcksize , 0x88 );
322+ * (block_q4_0x8 * ) out_ptr = make_block_q4_0x8 (dst_tmp , blck_size_interleave , 0x88 );
319323 out_ptr = (block_q4_0x8 * ) out_ptr + 1 ;
320324 }
321325 else if (nrows_interleaved == 4 ) {
322- * (block_q4_0x4 * ) out_ptr = make_block_q4_0x4 (dst_tmp , interleave_blcksize , 0x88 );
326+ * (block_q4_0x4 * ) out_ptr = make_block_q4_0x4 (dst_tmp , blck_size_interleave , 0x88 );
323327 out_ptr = (block_q4_0x4 * ) out_ptr + 1 ;
324328 }
325329 }
0 commit comments