@@ -301,6 +301,130 @@ float16_t dequantFuncQ6_K(const in decodeBufQ6_K bl, const in uint blockCoords[2
301301 return ret;
302302}
303303
304+ #if defined(DATA_A_IQ2_XXS)
305+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XXS {
306+ block_iq2_xxs block;
307+ };
308+
309+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XXS_packed16 {
310+ block_iq2_xxs_packed16 block;
311+ };
312+
313+ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
314+ {
315+ decodeBufIQ2_XXS_packed16 bl16 = decodeBufIQ2_XXS_packed16(bl);
316+ const float16_t d = bl.block.d;
317+ const uint idx = coordInBlock[1];
318+
319+ const uint ib32 = (idx & 0xE0) >> 5; // 0..7
320+ const uint ib8 = (idx & 0x18) >> 3; // 0..3
321+ const uint iqs = 8 * ib32 + ib8;
322+
323+ const uint8_t qs = bl.block.qs[iqs];
324+ const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3]));
325+
326+ const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t(signscale >> 28));
327+ uint sign = bitfieldExtract(signscale, 7 * int(ib8), 7);
328+ sign |= bitCount(sign) << 7;
329+
330+ const uint8_t g = unpack8(iq2xxs_grid[qs][(idx & 4) >> 2])[idx & 3];
331+
332+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
333+
334+ return ret;
335+ }
336+ #endif
337+
338+ #if defined(DATA_A_IQ2_XS)
339+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2_XS {
340+ block_iq2_xs block;
341+ };
342+
343+ float16_t dequantFuncIQ2_XS(const in decodeBufIQ2_XS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
344+ {
345+ const float16_t d = bl.block.d;
346+ const uint idx = coordInBlock[1];
347+
348+ const uint is = (idx & 0xE0) >> 5; // 0..8
349+ const uint sshift = (idx & 0x10) >> 2; // 0,4
350+ const uint iqs = (idx & 0xF8) >> 3; // 0..63
351+
352+ const uint16_t qs = bl.block.qs[iqs];
353+ const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t((bl.block.scales[is] >> sshift) & 0xF));
354+
355+ uint sign = uint(qs >> 9);
356+ sign |= bitCount(sign) << 7;
357+ const uint8_t g = unpack8(iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2])[idx & 3];
358+
359+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
360+ return ret;
361+ }
362+ #endif
363+
364+ #if defined(DATA_A_IQ3_XXS)
365+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_XXS {
366+ block_iq3_xxs block;
367+ };
368+
369+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_XXS_packed16 {
370+ block_iq3_xxs_packed16 block;
371+ };
372+
373+ float16_t dequantFuncIQ3_XXS(const in decodeBufIQ3_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
374+ {
375+ decodeBufIQ3_XXS_packed16 bl16 = decodeBufIQ3_XXS_packed16(bl);
376+ const float16_t d = bl.block.d;
377+ const uint idx = coordInBlock[1];
378+
379+ const uint ib32 = (idx & 0xE0) >> 5; // 0..7
380+ const uint ib4 = (idx & 0xFC) >> 4; // 0..63
381+ const uint is16 = QUANT_K / 8 + 2 * ib32; // index in packed16
382+
383+ const uint8_t qs = bl.block.qs[ib4];
384+ const uint signscale = pack32(u16vec2(bl16.block.qs[is16], bl16.block.qs[is16+1]));
385+
386+ const float16_t dscale = bl.block.d * 0.5hf * (0.5hf + float16_t(signscale >> 28));
387+ uint sign = bitfieldExtract(signscale, 7 * int(ib4 & 3), 7);
388+ sign |= bitCount(sign) << 7;
389+
390+ const uint8_t g = unpack8(iq3xxs_grid[qs])[idx & 3];
391+
392+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
393+ return ret;
394+ }
395+ #endif
396+
397+ #if defined(DATA_A_IQ3_S)
398+ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3_S {
399+ block_iq3_s block;
400+ };
401+
402+ float16_t dequantFuncIQ3_S(const in decodeBufIQ3_S bl, const in uint blockCoords[2], const in uint coordInBlock[2])
403+ {
404+ const float16_t d = bl.block.d;
405+ const uint idx = coordInBlock[1];
406+
407+ const uint iqs = (idx & 0xFC) >> 2; // 0..63
408+ const uint iqh = (idx & 0xE0) >> 5; // 0..7
409+ const uint qhbit = iqs & 7;
410+ const uint isgn = (idx & 0xF8) >> 3; // 0..31
411+ const uint is = (idx & 0xC0) >> 6; // 0..3
412+
413+ const uint8_t scale = (bl.block.scales[is] >> ((idx & 0x20) >> 3)) & uint8_t(0xF);
414+ const float16_t dscale = d * (1.0hf + float16_t(2 * scale));
415+
416+ const uint qs = bl.block.qs[iqs];
417+ const uint qh = (bl.block.qh[iqh] << (8 - qhbit)) & 0x100;
418+ const uint8_t sign = bl.block.signs[isgn];
419+
420+ const uint g = unpack8(iq3s_grid[qs | qh])[idx & 3];
421+ float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
422+
423+ return ret;
424+ }
425+ #endif
426+
427+
304428#if defined(DATA_A_IQ4_NL)
305429layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ4_NL {
306430 block_iq4_nl block;
@@ -340,6 +464,14 @@ float16_t dequantFuncIQ4_NL(const in decodeBufIQ4_NL bl, const in uint blockCoor
340464#define dequantFuncA dequantFuncQ5_K
341465#elif defined(DATA_A_Q6_K)
342466#define dequantFuncA dequantFuncQ6_K
467+ #elif defined(DATA_A_IQ2_XXS)
468+ #define dequantFuncA dequantFuncIQ2_XXS
469+ #elif defined(DATA_A_IQ2_XS)
470+ #define dequantFuncA dequantFuncIQ2_XS
471+ #elif defined(DATA_A_IQ3_XXS)
472+ #define dequantFuncA dequantFuncIQ3_XXS
473+ #elif defined(DATA_A_IQ3_S)
474+ #define dequantFuncA dequantFuncIQ3_S
343475#elif defined(DATA_A_IQ4_NL)
344476#define dequantFuncA dequantFuncIQ4_NL
345477#endif
0 commit comments