@@ -129,45 +129,72 @@ namespace x86simdsort {
129129 } \
130130 }
131131
132- #define DISPATCH_KEYVALUE_SORT (TYPE1, TYPE2, ISA ) \
133- static void (CAT(CAT(*internal_kv_qsort_, TYPE1), TYPE2))( \
132+ #define ISA_LIST (...) \
133+ std::initializer_list<std::string_view> \
134+ { \
135+ __VA_ARGS__ \
136+ }
137+
138+ #ifdef __FLT16_MAX__
139+ DISPATCH (qsort, _Float16, ISA_LIST(" avx512_spr" ))
140+ DISPATCH(qselect, _Float16, ISA_LIST(" avx512_spr" ))
141+ DISPATCH(partial_qsort, _Float16, ISA_LIST(" avx512_spr" ))
142+ DISPATCH(argsort, _Float16, ISA_LIST(" none" ))
143+ DISPATCH(argselect, _Float16, ISA_LIST(" none" ))
144+ #endif
145+
146+ #define DISPATCH_ALL (func, ISA_16BIT, ISA_32BIT, ISA_64BIT ) \
147+ DISPATCH (func, uint16_t , ISA_16BIT) \
148+ DISPATCH (func, int16_t , ISA_16BIT) \
149+ DISPATCH (func, float , ISA_32BIT) \
150+ DISPATCH (func, int32_t , ISA_32BIT) \
151+ DISPATCH (func, uint32_t , ISA_32BIT) \
152+ DISPATCH (func, int64_t , ISA_64BIT) \
153+ DISPATCH (func, uint64_t , ISA_64BIT) \
154+ DISPATCH (func, double , ISA_64BIT)
155+
156+ DISPATCH_ALL (qsort,
157+ (ISA_LIST(" avx512_icl" )),
158+ (ISA_LIST(" avx512_skx" , " avx2" )),
159+ (ISA_LIST(" avx512_skx" , " avx2" )))
160+ DISPATCH_ALL (qselect,
161+ (ISA_LIST(" avx512_icl" )),
162+ (ISA_LIST(" avx512_skx" , " avx2" )),
163+ (ISA_LIST(" avx512_skx" , " avx2" )))
164+ DISPATCH_ALL (partial_qsort,
165+ (ISA_LIST(" avx512_icl" )),
166+ (ISA_LIST(" avx512_skx" , " avx2" )),
167+ (ISA_LIST(" avx512_skx" , " avx2" )))
168+ DISPATCH_ALL (argsort,
169+ (ISA_LIST(" none" )),
170+ (ISA_LIST(" avx512_skx" , " avx2" )),
171+ (ISA_LIST(" avx512_skx" , " avx2" )))
172+ DISPATCH_ALL (argselect,
173+ (ISA_LIST(" none" )),
174+ (ISA_LIST(" avx512_skx" , " avx2" )),
175+ (ISA_LIST(" avx512_skx" , " avx2" )))
176+
177+ /* Key-Value methods */
178+ #define DECLARE_ALL_KEYVALUE_METHODS (TYPE1, TYPE2 ) \
179+ static void (CAT(CAT(*internal_keyvalue_qsort_, TYPE1), TYPE2))( \
134180 TYPE1 *, TYPE2 *, size_t , bool , bool ) \
135181 = NULL; \
182+ static void (CAT(CAT(*internal_keyvalue_select_, TYPE1), TYPE2))( \
183+ TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
184+ = NULL; \
185+ static void (CAT(CAT(*internal_keyvalue_partial_sort_, TYPE1), TYPE2))( \
186+ TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
187+ = NULL; \
136188 template <> \
137189 void keyvalue_qsort (TYPE1 *key, \
138190 TYPE2 *val, \
139191 size_t arrsize, \
140192 bool hasnan, \
141193 bool descending) \
142194 { \
143- (CAT (CAT (*internal_kv_qsort_ , TYPE1), TYPE2))( \
195+ (CAT (CAT (*internal_keyvalue_qsort_ , TYPE1), TYPE2))( \
144196 key, val, arrsize, hasnan, descending); \
145197 } \
146- static __attribute__ ((constructor)) void CAT( \
147- CAT (resolve_keyvalue_qsort_, TYPE1), TYPE2)(void ) \
148- { \
149- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
150- = &xss::scalar::keyvalue_qsort<TYPE1, TYPE2>; \
151- __builtin_cpu_init (); \
152- std::string_view preferred_cpu = find_preferred_cpu (ISA); \
153- if constexpr (dispatch_requested (" avx512" , ISA)) { \
154- if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
155- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
156- = &xss::avx512::keyvalue_qsort<TYPE1, TYPE2>; \
157- return ; \
158- } \
159- } \
160- if constexpr (dispatch_requested (" avx2" , ISA)) { \
161- if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
162- CAT (CAT (internal_kv_qsort_, TYPE1), TYPE2) \
163- = &xss::avx2::keyvalue_qsort<TYPE1, TYPE2>; \
164- return ; \
165- } \
166- } \
167- } \
168- static void (CAT(CAT(*internal_kv_select_, TYPE1), TYPE2))( \
169- TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
170- = NULL; \
171198 template <> \
172199 void keyvalue_select (TYPE1 *key, \
173200 TYPE2 *val, \
@@ -176,34 +203,9 @@ namespace x86simdsort {
176203 bool hasnan, \
177204 bool descending) \
178205 { \
179- (CAT (CAT (*internal_kv_select_ , TYPE1), TYPE2))( \
206+ (CAT (CAT (*internal_keyvalue_select_ , TYPE1), TYPE2))( \
180207 key, val, k, arrsize, hasnan, descending); \
181208 } \
182- static __attribute__ ((constructor)) void CAT( \
183- CAT (resolve_keyvalue_select_, TYPE1), TYPE2)(void ) \
184- { \
185- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
186- = &xss::scalar::keyvalue_select<TYPE1, TYPE2>; \
187- __builtin_cpu_init (); \
188- std::string_view preferred_cpu = find_preferred_cpu (ISA); \
189- if constexpr (dispatch_requested (" avx512" , ISA)) { \
190- if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
191- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
192- = &xss::avx512::keyvalue_select<TYPE1, TYPE2>; \
193- return ; \
194- } \
195- } \
196- if constexpr (dispatch_requested (" avx2" , ISA)) { \
197- if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
198- CAT (CAT (internal_kv_select_, TYPE1), TYPE2) \
199- = &xss::avx2::keyvalue_select<TYPE1, TYPE2>; \
200- return ; \
201- } \
202- } \
203- } \
204- static void (CAT(CAT(*internal_kv_partial_sort_, TYPE1), TYPE2))( \
205- TYPE1 *, TYPE2 *, size_t , size_t , bool , bool ) \
206- = NULL; \
207209 template <> \
208210 void keyvalue_partial_sort (TYPE1 *key, \
209211 TYPE2 *val, \
@@ -212,76 +214,39 @@ namespace x86simdsort {
212214 bool hasnan, \
213215 bool descending) \
214216 { \
215- (CAT (CAT (*internal_kv_partial_sort_ , TYPE1), TYPE2))( \
217+ (CAT (CAT (*internal_keyvalue_partial_sort_ , TYPE1), TYPE2))( \
216218 key, val, k, arrsize, hasnan, descending); \
217- } \
219+ }
220+
221+ #define DISPATCH_KV_FUNC (func, TYPE1, TYPE2, ISA ) \
218222 static __attribute__ ((constructor)) void CAT( \
219- CAT (resolve_keyvalue_partial_sort_ , TYPE1), TYPE2)(void ) \
223+ CAT (CAT(CAT(resolve_, func), _) , TYPE1), TYPE2)(void ) \
220224 { \
221- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
222- = &xss::scalar::keyvalue_partial_sort <TYPE1, TYPE2>; \
225+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
226+ = &xss::scalar::func <TYPE1, TYPE2>; \
223227 __builtin_cpu_init (); \
224228 std::string_view preferred_cpu = find_preferred_cpu (ISA); \
225229 if constexpr (dispatch_requested (" avx512" , ISA)) { \
226230 if (preferred_cpu.find (" avx512" ) != std::string_view::npos) { \
227- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
228- = &xss::avx512::keyvalue_partial_sort <TYPE1, TYPE2>; \
231+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
232+ = &xss::avx512::func <TYPE1, TYPE2>; \
229233 return ; \
230234 } \
231235 } \
232236 if constexpr (dispatch_requested (" avx2" , ISA)) { \
233237 if (preferred_cpu.find (" avx2" ) != std::string_view::npos) { \
234- CAT (CAT (internal_kv_partial_sort_ , TYPE1), TYPE2) \
235- = &xss::avx2::keyvalue_partial_sort <TYPE1, TYPE2>; \
238+ CAT (CAT (CAT ( CAT (internal_, func), _) , TYPE1), TYPE2) \
239+ = &xss::avx2::func <TYPE1, TYPE2>; \
236240 return ; \
237241 } \
238242 } \
239243 }
240244
241- #define ISA_LIST (...) \
242- std::initializer_list<std::string_view> \
243- { \
244- __VA_ARGS__ \
245- }
246-
247- #ifdef __FLT16_MAX__
248- DISPATCH (qsort, _Float16, ISA_LIST(" avx512_spr" ))
249- DISPATCH(qselect, _Float16, ISA_LIST(" avx512_spr" ))
250- DISPATCH(partial_qsort, _Float16, ISA_LIST(" avx512_spr" ))
251- DISPATCH(argsort, _Float16, ISA_LIST(" none" ))
252- DISPATCH(argselect, _Float16, ISA_LIST(" none" ))
253- #endif
254-
255- #define DISPATCH_ALL (func, ISA_16BIT, ISA_32BIT, ISA_64BIT ) \
256- DISPATCH (func, uint16_t , ISA_16BIT) \
257- DISPATCH(func, int16_t , ISA_16BIT) \
258- DISPATCH(func, float , ISA_32BIT) \
259- DISPATCH(func, int32_t , ISA_32BIT) \
260- DISPATCH(func, uint32_t , ISA_32BIT) \
261- DISPATCH(func, int64_t , ISA_64BIT) \
262- DISPATCH(func, uint64_t , ISA_64BIT) \
263- DISPATCH(func, double , ISA_64BIT)
264-
265- DISPATCH_ALL(qsort,
266- (ISA_LIST(" avx512_icl" )),
267- (ISA_LIST(" avx512_skx" , " avx2" )),
268- (ISA_LIST(" avx512_skx" , " avx2" )))
269- DISPATCH_ALL(qselect,
270- (ISA_LIST(" avx512_icl" )),
271- (ISA_LIST(" avx512_skx" , " avx2" )),
272- (ISA_LIST(" avx512_skx" , " avx2" )))
273- DISPATCH_ALL(partial_qsort,
274- (ISA_LIST(" avx512_icl" )),
275- (ISA_LIST(" avx512_skx" , " avx2" )),
276- (ISA_LIST(" avx512_skx" , " avx2" )))
277- DISPATCH_ALL(argsort,
278- (ISA_LIST(" none" )),
279- (ISA_LIST(" avx512_skx" , " avx2" )),
280- (ISA_LIST(" avx512_skx" , " avx2" )))
281- DISPATCH_ALL(argselect,
282- (ISA_LIST(" none" )),
283- (ISA_LIST(" avx512_skx" , " avx2" )),
284- (ISA_LIST(" avx512_skx" , " avx2" )))
245+ #define DISPATCH_KEYVALUE_SORT (TYPE1, TYPE2, ISA ) \
246+ DECLARE_ALL_KEYVALUE_METHODS (TYPE1, TYPE2) \
247+ DISPATCH_KV_FUNC(keyvalue_qsort, TYPE1, TYPE2, ISA) \
248+ DISPATCH_KV_FUNC(keyvalue_select, TYPE1, TYPE2, ISA) \
249+ DISPATCH_KV_FUNC(keyvalue_partial_sort, TYPE1, TYPE2, ISA)
285250
286251#define DISPATCH_KEYVALUE_SORT_FORTYPE (type ) \
287252 DISPATCH_KEYVALUE_SORT (type, uint64_t , (ISA_LIST(" avx512_skx" , " avx2" ))) \
0 commit comments