@@ -390,25 +390,21 @@ X86_SIMD_SORT_INLINE type_t get_pivot_64bit(type_t *arr,
390390 const int64_t left,
391391 const int64_t right)
392392{
393- // median of 8x8 elements
393+ // median of 8
394394 int64_t size = (right - left) / 8 ;
395395 using zmm_t = typename vtype::zmm_t ;
396- zmm_t v[8 ];
397- for (int64_t ii = 0 ; ii < 8 ; ++ii) {
398- v[ii] = vtype::loadu (arr + left + ii*size);
399- }
400- COEX<vtype>(v[0 ], v[1 ]); COEX<vtype>(v[2 ], v[3 ]); /* step 1 */
401- COEX<vtype>(v[4 ], v[5 ]); COEX<vtype>(v[6 ], v[7 ]);
402- COEX<vtype>(v[0 ], v[2 ]); COEX<vtype>(v[1 ], v[3 ]); /* step 2 */
403- COEX<vtype>(v[4 ], v[6 ]); COEX<vtype>(v[5 ], v[7 ]);
404- COEX<vtype>(v[0 ], v[4 ]); COEX<vtype>(v[1 ], v[2 ]); /* step 3 */
405- COEX<vtype>(v[5 ], v[6 ]); COEX<vtype>(v[3 ], v[7 ]);
406- COEX<vtype>(v[1 ], v[5 ]); COEX<vtype>(v[2 ], v[6 ]); /* step 4 */
407- COEX<vtype>(v[3 ], v[5 ]); COEX<vtype>(v[2 ], v[4 ]); /* step 5 */
408- COEX<vtype>(v[3 ], v[4 ]); /* step 6 */
396+ __m512i rand_index = _mm512_set_epi64 (left + size,
397+ left + 2 * size,
398+ left + 3 * size,
399+ left + 4 * size,
400+ left + 5 * size,
401+ left + 6 * size,
402+ left + 7 * size,
403+ left + 8 * size);
404+ zmm_t rand_vec = vtype::template i64gather<sizeof (type_t )>(rand_index, arr);
409405 // pivot will never be a nan, since there are no nan's!
410- zmm_t sort = sort_zmm_64bit<vtype>(v[ 3 ] );
406+ zmm_t sort = sort_zmm_64bit<vtype>(rand_vec );
411407 return ((type_t *)&sort)[4 ];
412408}
413409
414- #endif
410+ #endif
0 commit comments