@@ -174,6 +174,51 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
174174}
175175#endif
176176
177+ #if defined(DATA_A_IQ2_S)
178+ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
179+ const uint ib32 = iqs / 32;
180+ const uint ib8 = iqs / 8;
181+
182+ const uint scale = (data_a[a_offset + ib].scales[ib32] >> (4 * ((iqs / 16) & 1))) & 0xf;
183+ const uint qs = data_a[a_offset + ib].qs[ib8];
184+ const uint qh = data_a[a_offset + ib].qh[ib32];
185+ const uint qhshift = 2 * (ib8 % 4);
186+ const uint sign = data_a[a_offset + ib].qs[QUANT_K / 8 + ib8] >> (iqs % 8);
187+
188+ const float db = 0.25 * (0.5 + scale);
189+ const u8vec4 grid = unpack8(iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(iqs % 8) / 4]);
190+ bool sign0 = (sign & 1) != 0;
191+ bool sign1 = (sign & 2) != 0;
192+ return db * vec2(
193+ grid[iqs % 4] * (sign0 ? -1.0 : 1.0),
194+ grid[(iqs % 4) + 1] * (sign1 ? -1.0 : 1.0)
195+ );
196+ }
197+ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
198+ const uint ib32 = iqs / 32;
199+ const uint ib8 = iqs / 8;
200+
201+ const uint scale = (data_a[a_offset + ib].scales[ib32] >> (4 * ((iqs / 16) & 1))) & 0xf;
202+ const uint qs = data_a[a_offset + ib].qs[ib8];
203+ const uint qh = data_a[a_offset + ib].qh[ib32];
204+ const uint qhshift = 2 * (ib8 % 4);
205+ const uint sign = data_a[a_offset + ib].qs[QUANT_K / 8 + ib8] >> (iqs % 8);
206+
207+ const float db = 0.25 * (0.5 + scale);
208+ const u8vec4 grid = unpack8(iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(iqs % 8) / 4]);
209+ bool sign0 = (sign & 1) != 0;
210+ bool sign1 = (sign & 2) != 0;
211+ bool sign2 = (sign & 4) != 0;
212+ bool sign3 = (sign & 8) != 0;
213+ return db * vec4(
214+ grid.x * (sign0 ? -1.0 : 1.0),
215+ grid.y * (sign1 ? -1.0 : 1.0),
216+ grid.z * (sign2 ? -1.0 : 1.0),
217+ grid.w * (sign3 ? -1.0 : 1.0)
218+ );
219+ }
220+ #endif
221+
177222#if defined(DATA_A_IQ3_XXS)
178223vec2 dequantize(uint ib, uint iqs, uint a_offset) {
179224 const uint ib4 = iqs / 4;
@@ -276,7 +321,7 @@ vec2 get_dm(uint ib, uint a_offset) {
276321}
277322#endif
278323
279- #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL)
324+ #if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined( DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL)
280325vec2 get_dm(uint ib, uint a_offset) {
281326 return vec2(float(data_a[a_offset + ib].d), 0);
282327}
0 commit comments