Skip to content

Commit 250dbe5

Browse files
author
Raghuveer Devulapalli
committed
Fix formatting
1 parent 2a4f949 commit 250dbe5

File tree

2 files changed

+49
-41
lines changed

2 files changed

+49
-41
lines changed

src/avx512-64bit-qsort.hpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -580,22 +580,26 @@ X86_SIMD_SORT_INLINE void sort_256_64bit(type_t *arr, int32_t N)
580580
uint64_t combined_mask;
581581
if (N < 192) {
582582
combined_mask = (0x1ull << (N - 128)) - 0x1ull;
583-
load_mask1 = (combined_mask) & 0xFF;
583+
load_mask1 = (combined_mask)&0xFF;
584584
load_mask2 = (combined_mask >> 8) & 0xFF;
585585
load_mask3 = (combined_mask >> 16) & 0xFF;
586586
load_mask4 = (combined_mask >> 24) & 0xFF;
587587
load_mask5 = (combined_mask >> 32) & 0xFF;
588588
load_mask6 = (combined_mask >> 40) & 0xFF;
589589
load_mask7 = (combined_mask >> 48) & 0xFF;
590590
load_mask8 = (combined_mask >> 56) & 0xFF;
591-
load_mask9 = 0x00; load_mask10 = 0x0;
592-
load_mask11 = 0x00; load_mask12 = 0x00;
593-
load_mask13 = 0x00; load_mask14 = 0x00;
594-
load_mask15 = 0x00; load_mask16 = 0x00;
591+
load_mask9 = 0x00;
592+
load_mask10 = 0x0;
593+
load_mask11 = 0x00;
594+
load_mask12 = 0x00;
595+
load_mask13 = 0x00;
596+
load_mask14 = 0x00;
597+
load_mask15 = 0x00;
598+
load_mask16 = 0x00;
595599
}
596600
else {
597601
combined_mask = (0x1ull << (N - 192)) - 0x1ull;
598-
load_mask9 = (combined_mask) & 0xFF;
602+
load_mask9 = (combined_mask)&0xFF;
599603
load_mask10 = (combined_mask >> 8) & 0xFF;
600604
load_mask11 = (combined_mask >> 16) & 0xFF;
601605
load_mask12 = (combined_mask >> 24) & 0xFF;
@@ -714,7 +718,6 @@ X86_SIMD_SORT_INLINE void sort_256_64bit(type_t *arr, int32_t N)
714718
vtype::mask_storeu(arr + 240, load_mask15, zmm[30]);
715719
vtype::mask_storeu(arr + 248, load_mask16, zmm[31]);
716720
}
717-
718721
}
719722

720723
template <typename vtype, typename type_t>

src/avx512-common-qsort.h

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,13 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
269269
type_t *biggest)
270270
{
271271
const int num_unroll = 8;
272-
if (right - left <= 2*num_unroll*vtype::numlanes) {
273-
return partition_avx512<vtype>(arr, left, right, pivot, smallest, biggest);
272+
if (right - left <= 2 * num_unroll * vtype::numlanes) {
273+
return partition_avx512<vtype>(
274+
arr, left, right, pivot, smallest, biggest);
274275
}
275276
/* make array length divisible by 8*vtype::numlanes , shortening the array */
276-
for (int32_t i = ((right - left) % (num_unroll*vtype::numlanes)); i > 0; --i) {
277+
for (int32_t i = ((right - left) % (num_unroll * vtype::numlanes)); i > 0;
278+
--i) {
277279
*smallest = std::min(*smallest, arr[left], comparison_func<vtype>);
278280
*biggest = std::max(*biggest, arr[left], comparison_func<vtype>);
279281
if (!comparison_func<vtype>(arr[left], pivot)) {
@@ -295,17 +297,18 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
295297
// We will now have atleast 16 registers worth of data to process:
296298
// left and right vtype::numlanes values are partitioned at the end
297299
zmm_t vec_left[num_unroll], vec_right[num_unroll];
298-
#pragma GCC unroll 8
300+
#pragma GCC unroll 8
299301
for (int ii = 0; ii < num_unroll; ++ii) {
300-
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes*ii);
301-
vec_right[ii] = vtype::loadu(arr + (right - vtype::numlanes*(num_unroll-ii)));
302+
vec_left[ii] = vtype::loadu(arr + left + vtype::numlanes * ii);
303+
vec_right[ii] = vtype::loadu(
304+
arr + (right - vtype::numlanes * (num_unroll - ii)));
302305
}
303306
// store points of the vectors
304307
int64_t r_store = right - vtype::numlanes;
305308
int64_t l_store = left;
306309
// indices for loading the elements
307-
left += num_unroll*vtype::numlanes;
308-
right -= num_unroll*vtype::numlanes;
310+
left += num_unroll * vtype::numlanes;
311+
right -= num_unroll * vtype::numlanes;
309312
while (right - left != 0) {
310313
zmm_t curr_vec[num_unroll];
311314
/*
@@ -314,57 +317,59 @@ static inline int64_t partition_avx512_unrolled(type_t *arr,
314317
* otherwise from the left side
315318
*/
316319
if ((r_store + vtype::numlanes) - right < left - l_store) {
317-
right -= num_unroll*vtype::numlanes;
318-
#pragma GCC unroll 8
320+
right -= num_unroll * vtype::numlanes;
321+
#pragma GCC unroll 8
319322
for (int ii = 0; ii < num_unroll; ++ii) {
320-
curr_vec[ii] = vtype::loadu(arr + right + ii*vtype::numlanes);
323+
curr_vec[ii] = vtype::loadu(arr + right + ii * vtype::numlanes);
321324
}
322325
}
323326
else {
324-
#pragma GCC unroll 8
327+
#pragma GCC unroll 8
325328
for (int ii = 0; ii < num_unroll; ++ii) {
326-
curr_vec[ii] = vtype::loadu(arr + left + ii*vtype::numlanes);
329+
curr_vec[ii] = vtype::loadu(arr + left + ii * vtype::numlanes);
327330
}
328-
left += num_unroll*vtype::numlanes;
331+
left += num_unroll * vtype::numlanes;
329332
}
330-
// partition the current vector and save it on both sides of the array
331-
#pragma GCC unroll 8
333+
// partition the current vector and save it on both sides of the array
334+
#pragma GCC unroll 8
332335
for (int ii = 0; ii < num_unroll; ++ii) {
333336
int32_t amount_ge_pivot
334337
= partition_vec<vtype>(arr,
335338
l_store,
336339
r_store + vtype::numlanes,
337340
curr_vec[ii],
338341
pivot_vec,
339-
&min_vec,pick
342+
&min_vec,
340343
&max_vec);
341344
l_store += (vtype::numlanes - amount_ge_pivot);
342345
r_store -= amount_ge_pivot;
343346
}
344347
}
345348

346-
/* partition and save vec_left[8] and vec_right[8] */
347-
#pragma GCC unroll 8
349+
/* partition and save vec_left[8] and vec_right[8] */
350+
#pragma GCC unroll 8
348351
for (int ii = 0; ii < num_unroll; ++ii) {
349-
int32_t amount_ge_pivot = partition_vec<vtype>(arr,
350-
l_store,
351-
r_store + vtype::numlanes,
352-
vec_left[ii],
353-
pivot_vec,
354-
&min_vec,
355-
&max_vec);
352+
int32_t amount_ge_pivot
353+
= partition_vec<vtype>(arr,
354+
l_store,
355+
r_store + vtype::numlanes,
356+
vec_left[ii],
357+
pivot_vec,
358+
&min_vec,
359+
&max_vec);
356360
l_store += (vtype::numlanes - amount_ge_pivot);
357361
r_store -= amount_ge_pivot;
358362
}
359-
#pragma GCC unroll 8
363+
#pragma GCC unroll 8
360364
for (int ii = 0; ii < num_unroll; ++ii) {
361-
int32_t amount_ge_pivot = partition_vec<vtype>(arr,
362-
l_store,
363-
r_store + vtype::numlanes,
364-
vec_right[ii],
365-
pivot_vec,
366-
&min_vec,
367-
&max_vec);
365+
int32_t amount_ge_pivot
366+
= partition_vec<vtype>(arr,
367+
l_store,
368+
r_store + vtype::numlanes,
369+
vec_right[ii],
370+
pivot_vec,
371+
&min_vec,
372+
&max_vec);
368373
l_store += (vtype::numlanes - amount_ge_pivot);
369374
r_store -= amount_ge_pivot;
370375
}

0 commit comments

Comments
 (0)