Skip to content
This repository was archived by the owner on Dec 16, 2021. It is now read-only.

Commit a3205b7

Browse files
committed
Fix SSE2/SSSE3 with large blocks
Closes #5
1 parent 193c938 commit a3205b7

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

fecpp.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,20 @@ void addmul(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
173173
#if defined(FECPP_IS_X86)
174174
if(size >= 16 && has_ssse3())
175175
{
176-
size = addmul_ssse3(z, x, y, size);
176+
const size_t consumed = addmul_ssse3(z, x, y, size);
177+
z += consumed;
178+
x += consumed;
179+
y += consumed;
180+
size -= consumed;
177181
}
178182

179183
if(size >= 64 && has_sse2())
180184
{
181-
size = addmul_sse2(z, x, y, size);
185+
const size_t consumed = addmul_sse2(z, x, y, size);
186+
z += consumed;
187+
x += consumed;
188+
y += consumed;
189+
size -= consumed;
182190
}
183191
#endif
184192

fecpp_sse2.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ size_t addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
1515

1616
const size_t y_bits = 32 - __builtin_clz(y);
1717

18+
const size_t consumed = size - (size % 64);
19+
1820
// unrolled out to cache line size
1921
while(size >= 64)
2022
{
@@ -94,7 +96,7 @@ size_t addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
9496
size -= 64;
9597
}
9698

97-
return size;
99+
return consumed;
98100
}
99101

100102
}

fecpp_ssse3.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,8 @@ size_t addmul_ssse3(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
541541
const __m128i t_lo = _mm_load_si128((const __m128i*)(GFTBL + 32*y));
542542
const __m128i t_hi = _mm_load_si128((const __m128i*)(GFTBL + 32*y + 16));
543543

544+
const size_t consumed = size - (size % 16);
545+
544546
while(size >= 16)
545547
{
546548
const __m128i x_1 = _mm_loadu_si128((const __m128i*)(x));
@@ -565,7 +567,7 @@ size_t addmul_ssse3(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
565567
size -= 16;
566568
}
567569

568-
return size;
570+
return consumed;
569571
}
570572

571573
}

0 commit comments

Comments
 (0)