@@ -72,28 +72,35 @@ static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,
7272
7373// Copies `count` bytes by blocks of `kBlockSize` bytes.
7474// Copies at the start and end of the buffer are unaligned.
75- // Copies in the middle of the buffer are aligned to `kBlockSize `.
75+ // Copies in the middle of the buffer are aligned to `kAlignment `.
7676//
7777// e.g. with
7878// [12345678123456781234567812345678]
79- // [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80- // [__XXXXXXXX______________________]
81- // [________XXXXXXXX________________]
82- // [________________XXXXXXXX________]
83- // [_____________________XXXXXXXX___]
79+ // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
80+ // [__XXXX___________________________]
81+ // [_____XXXXXXXX____________________]
82+ // [_____________XXXXXXXX____________]
83+ // [_____________________XXXXXXXX____]
84+ // [______________________XXXXXXXX___]
8485//
85- // Precondition: `count > 2 * kBlockSize` for efficiency.
86- // `count >= kBlockSize` for correctness.
87- template <size_t kBlockSize >
86+ // Precondition: `kAlignment <= kBlockSize`
87+ // `count > 2 * kBlockSize` for efficiency.
88+ // `count >= kAlignment` for correctness.
89+ template <size_t kBlockSize , size_t kAlignment = kBlockSize >
8890static void CopyAlignedBlocks (char *__restrict dst, const char *__restrict src,
8991 size_t count) {
90- CopyBlock<kBlockSize >(dst, src); // Copy first block
92+ static_assert (is_power2 (kAlignment ), " kAlignment must be a power of two" );
93+ static_assert (is_power2 (kBlockSize ), " kBlockSize must be a power of two" );
94+ static_assert (kAlignment <= kBlockSize ,
95+ " kAlignment must be less or equal to block size" );
96+ CopyBlock<kAlignment >(dst, src); // Copy first block
9197
9298 // Copy aligned blocks
93- const size_t ofla = offset_from_last_aligned<kBlockSize >(src);
99+ const size_t ofla = offset_from_last_aligned<kAlignment >(src);
94100 const size_t limit = count + ofla - kBlockSize ;
95- for (size_t offset = kBlockSize ; offset < limit; offset += kBlockSize )
96- CopyBlock<kBlockSize >(dst - ofla + offset, src - ofla + offset);
101+ for (size_t offset = kAlignment ; offset < limit; offset += kBlockSize )
102+ CopyBlock<kBlockSize >(dst - ofla + offset,
103+ assume_aligned<kAlignment >(src - ofla + offset));
97104
98105 CopyLastBlock<kBlockSize >(dst, src, count); // Copy last block
99106}
0 commit comments