Skip to content

Commit 225fbd1

Browse files
committed
pass just the width down as a const generic
1 parent 647f191 commit 225fbd1

File tree

1 file changed

+53
-125
lines changed

1 file changed

+53
-125
lines changed

zlib-rs/src/inflate/writer.rs

Lines changed: 53 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,7 @@ impl<'a> Writer<'a> {
8888
) {
8989
match FEATURES {
9090
#[cfg(target_arch = "x86_64")]
91-
CpuFeatures::AVX2 => {
92-
self.extend_from_window_help::<core::arch::x86_64::__m256i>(window, range)
93-
}
91+
CpuFeatures::AVX2 => self.extend_from_window_help::<32>(window, range),
9492
_ => self.extend_from_window_runtime_dispatch(window, range),
9593
}
9694
}
@@ -104,49 +102,49 @@ impl<'a> Writer<'a> {
104102
//
105103
// #[cfg(target_arch = "x86_64")]
106104
// if crate::cpu_features::is_enabled_avx512() {
107-
// return self.extend_from_window_help::<core::arch::x86_64::__m512i>(window, range);
105+
// return self.extend_from_window_help::<64>(window, range);
108106
// }
109107

110108
#[cfg(target_arch = "x86_64")]
111109
if crate::cpu_features::is_enabled_avx2() {
112-
return self.extend_from_window_help::<core::arch::x86_64::__m256i>(window, range);
110+
return self.extend_from_window_help::<32>(window, range);
113111
}
114112

115113
#[cfg(target_arch = "x86_64")]
116114
if crate::cpu_features::is_enabled_sse() {
117-
return self.extend_from_window_help::<core::arch::x86_64::__m128i>(window, range);
115+
return self.extend_from_window_help::<16>(window, range);
118116
}
119117

120118
#[cfg(target_arch = "aarch64")]
121119
if crate::cpu_features::is_enabled_neon() {
122-
return self.extend_from_window_help::<core::arch::aarch64::uint8x16_t>(window, range);
120+
return self.extend_from_window_help::<16>(window, range);
123121
}
124122

125123
#[cfg(target_arch = "wasm32")]
126124
if crate::cpu_features::is_enabled_simd128() {
127-
return self.extend_from_window_help::<core::arch::wasm32::v128>(window, range);
125+
return self.extend_from_window_help::<16>(window, range);
128126
}
129127

130-
self.extend_from_window_help::<u64>(window, range)
128+
self.extend_from_window_help::<8>(window, range)
131129
}
132130

133131
#[inline(always)]
134-
fn extend_from_window_help<C: Chunk>(
132+
fn extend_from_window_help<const N: usize>(
135133
&mut self,
136134
window: &super::window::Window,
137135
range: Range<usize>,
138136
) {
139137
let len = range.end - range.start;
140138

141-
if self.remaining() >= len + core::mem::size_of::<C>() {
139+
if self.remaining() >= len + N {
142140
// SAFETY: we know that our window has at least a core::mem::size_of::<C>() extra bytes
143141
// at the end, making it always safe to perform an (unaligned) Chunk read anywhere in
144142
// the window slice.
145143
//
146144
// The calling function checks for CPU features requirements for C.
147145
unsafe {
148146
let src = window.as_ptr();
149-
Self::copy_chunk_unchecked::<C>(
147+
Self::copy_chunk_unchecked::<N>(
150148
src.wrapping_add(range.start).cast(),
151149
self.next_out(),
152150
len,
@@ -174,9 +172,7 @@ impl<'a> Writer<'a> {
174172
) {
175173
match FEATURES {
176174
#[cfg(target_arch = "x86_64")]
177-
CpuFeatures::AVX2 => {
178-
self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length)
179-
}
175+
CpuFeatures::AVX2 => self.copy_match_help::<32>(offset_from_end, length),
180176
_ => self.copy_match_runtime_dispatch(offset_from_end, length),
181177
}
182178
}
@@ -191,32 +187,31 @@ impl<'a> Writer<'a> {
191187

192188
#[cfg(target_arch = "x86_64")]
193189
if crate::cpu_features::is_enabled_avx2() {
194-
return self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length);
190+
return self.copy_match_help::<32>(offset_from_end, length);
195191
}
196192

197193
#[cfg(target_arch = "x86_64")]
198194
if crate::cpu_features::is_enabled_sse() {
199-
return self.copy_match_help::<core::arch::x86_64::__m128i>(offset_from_end, length);
195+
return self.copy_match_help::<16>(offset_from_end, length);
200196
}
201197

202198
#[cfg(target_arch = "aarch64")]
203199
if crate::cpu_features::is_enabled_neon() {
204-
return self
205-
.copy_match_help::<core::arch::aarch64::uint8x16_t>(offset_from_end, length);
200+
return self.copy_match_help::<16>(offset_from_end, length);
206201
}
207202

208203
#[cfg(target_arch = "wasm32")]
209204
if crate::cpu_features::is_enabled_simd128() {
210-
return self.copy_match_help::<core::arch::wasm32::v128>(offset_from_end, length);
205+
return self.copy_match_help::<16>(offset_from_end, length);
211206
}
212207

213-
self.copy_match_help::<u64>(offset_from_end, length)
208+
self.copy_match_help::<8>(offset_from_end, length)
214209
}
215210

216211
#[inline(always)]
217-
fn copy_match_help<C: Chunk>(&mut self, offset_from_end: usize, length: usize) {
212+
fn copy_match_help<const N: usize>(&mut self, offset_from_end: usize, length: usize) {
218213
let capacity = self.buf.len();
219-
let len = Ord::min(self.filled + length + core::mem::size_of::<C>(), capacity);
214+
let len = Ord::min(self.filled + length + N, capacity);
220215
let buf = &mut self.buf.as_mut_slice()[..len];
221216

222217
let current = self.filled;
@@ -244,12 +239,12 @@ impl<'a> Writer<'a> {
244239
}
245240
}
246241
} else {
247-
Self::copy_chunked_within::<C>(buf, capacity, current, offset_from_end, length)
242+
Self::copy_chunked_within::<N>(buf, capacity, current, offset_from_end, length)
248243
}
249244
}
250245

251246
#[inline(always)]
252-
fn copy_chunked_within<C: Chunk>(
247+
fn copy_chunked_within<const N: usize>(
253248
buf: &mut [MaybeUninit<u8>],
254249
capacity: usize,
255250
current: usize,
@@ -258,10 +253,10 @@ impl<'a> Writer<'a> {
258253
) {
259254
let start = current.checked_sub(offset_from_end).expect("in bounds");
260255

261-
if current + length + core::mem::size_of::<C>() < capacity {
256+
if current + length + N < capacity {
262257
let ptr = buf.as_mut_ptr();
263258
// SAFETY: if statement and checked_sub ensures we stay in bounds.
264-
unsafe { Self::copy_chunk_unchecked::<C>(ptr.add(start), ptr.add(current), length) }
259+
unsafe { Self::copy_chunk_unchecked::<N>(ptr.add(start), ptr.add(current), length) }
265260
} else {
266261
// a full simd copy does not fit in the output buffer
267262
buf.copy_within(start..start + length, current);
@@ -273,29 +268,45 @@ impl<'a> Writer<'a> {
273268
/// `src` must be safe to perform unaligned reads in `core::mem::size_of::<C>()` chunks until
274269
/// `end` is reached. `dst` must be safe to (unalingned) write that number of chunks.
275270
#[inline(always)]
276-
unsafe fn copy_chunk_unchecked<C: Chunk>(
271+
unsafe fn copy_chunk_unchecked<const N: usize>(
277272
mut src: *const MaybeUninit<u8>,
278273
mut dst: *mut MaybeUninit<u8>,
279274
length: usize,
280275
) {
281276
let end = src.add(length);
282277

283-
let chunk = C::load_chunk(src);
284-
C::store_chunk(dst, chunk);
278+
let chunk = load_chunk::<N>(src);
279+
store_chunk::<N>(dst, chunk);
285280

286-
src = src.add(core::mem::size_of::<C>());
287-
dst = dst.add(core::mem::size_of::<C>());
281+
src = src.add(N);
282+
dst = dst.add(N);
288283

289284
while src < end {
290-
let chunk = C::load_chunk(src);
291-
C::store_chunk(dst, chunk);
285+
let chunk = load_chunk::<N>(src);
286+
store_chunk::<N>(dst, chunk);
292287

293-
src = src.add(core::mem::size_of::<C>());
294-
dst = dst.add(core::mem::size_of::<C>());
288+
src = src.add(N);
289+
dst = dst.add(N);
295290
}
296291
}
297292
}
298293

294+
/// # Safety
295+
///
296+
/// Must be valid to read a `[u8; N]` value from `from` with an unaligned read.
297+
#[inline(always)]
298+
unsafe fn load_chunk<const N: usize>(from: *const MaybeUninit<u8>) -> [u8; N] {
299+
core::ptr::read_unaligned(from.cast::<[u8; N]>())
300+
}
301+
302+
/// # Safety
303+
///
304+
/// Must be valid to write a `[u8; N]` value to `out` with an unaligned write.
305+
#[inline(always)]
306+
unsafe fn store_chunk<const N: usize>(out: *mut MaybeUninit<u8>, chunk: [u8; N]) {
307+
core::ptr::write_unaligned(out.cast(), chunk)
308+
}
309+
299310
impl fmt::Debug for Writer<'_> {
300311
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301312
f.debug_struct("Writer")
@@ -309,80 +320,6 @@ fn slice_to_uninit(slice: &[u8]) -> &[MaybeUninit<u8>] {
309320
unsafe { &*(slice as *const [u8] as *const [MaybeUninit<u8>]) }
310321
}
311322

312-
trait Chunk: Sized {
313-
/// # Safety
314-
///
315-
/// Must be valid to read a `Self::Chunk` value from `from` with an unaligned read.
316-
///
317-
/// Implementations may have CPU feature specific requirements depending on the type.
318-
#[inline(always)]
319-
unsafe fn load_chunk(from: *const MaybeUninit<u8>) -> Self {
320-
core::ptr::read_unaligned(from.cast::<Self>())
321-
}
322-
323-
/// # Safety
324-
///
325-
/// Must be valid to write a `Self::Chunk` value to `out` with an unaligned write.
326-
///
327-
/// Implementations may have CPU feature specific requirements depending on the type.
328-
unsafe fn store_chunk(out: *mut MaybeUninit<u8>, chunk: Self) {
329-
core::ptr::write_unaligned(out.cast(), chunk)
330-
}
331-
}
332-
333-
#[cfg(target_endian = "little")]
334-
impl Chunk for u64 {}
335-
336-
#[cfg(target_endian = "big")]
337-
impl Chunk for u64 {
338-
unsafe fn load_chunk(from: *const MaybeUninit<u8>) -> Self {
339-
u64::to_le(core::ptr::read_unaligned(from.cast()))
340-
}
341-
342-
unsafe fn store_chunk(out: *mut MaybeUninit<u8>, chunk: Self) {
343-
core::ptr::copy_nonoverlapping(
344-
chunk.to_le_bytes().as_ptr().cast(),
345-
out,
346-
core::mem::size_of::<Self>(),
347-
)
348-
}
349-
}
350-
351-
#[cfg(target_arch = "x86_64")]
352-
impl Chunk for core::arch::x86_64::__m128i {}
353-
354-
#[cfg(target_arch = "x86_64")]
355-
impl Chunk for core::arch::x86_64::__m256i {}
356-
357-
#[cfg(target_arch = "x86_64")]
358-
impl Chunk for core::arch::x86_64::__m512i {}
359-
360-
#[cfg(target_arch = "aarch64")]
361-
impl Chunk for core::arch::aarch64::uint8x16_t {
362-
#[inline(always)]
363-
unsafe fn load_chunk(from: *const MaybeUninit<u8>) -> Self {
364-
core::arch::aarch64::vld1q_u8(from.cast())
365-
}
366-
367-
#[inline(always)]
368-
unsafe fn store_chunk(out: *mut MaybeUninit<u8>, chunk: Self) {
369-
core::arch::aarch64::vst1q_u8(out.cast(), chunk)
370-
}
371-
}
372-
373-
#[cfg(target_arch = "wasm32")]
374-
impl Chunk for core::arch::wasm32::v128 {
375-
#[inline(always)]
376-
unsafe fn load_chunk(from: *const MaybeUninit<u8>) -> Self {
377-
core::arch::wasm32::v128_load(from.cast())
378-
}
379-
380-
#[inline(always)]
381-
unsafe fn store_chunk(out: *mut MaybeUninit<u8>, chunk: Self) {
382-
core::arch::wasm32::v128_store(out as *mut Self, chunk)
383-
}
384-
}
385-
386323
#[cfg(test)]
387324
mod test {
388325
use super::*;
@@ -424,15 +361,6 @@ mod test {
424361
let offset_from_end = 17;
425362
let length = 17;
426363

427-
#[cfg(target_arch = "x86_64")]
428-
use core::arch::x86_64::{__m128i, __m256i, __m512i};
429-
430-
#[cfg(target_arch = "aarch64")]
431-
use core::arch::aarch64::uint8x16_t;
432-
433-
#[cfg(target_arch = "wasm32")]
434-
use core::arch::wasm32::v128;
435-
436364
macro_rules! helper {
437365
($func:expr) => {
438366
let mut buf = test_array();
@@ -447,30 +375,30 @@ mod test {
447375

448376
#[cfg(target_arch = "x86_64")]
449377
if crate::cpu_features::is_enabled_avx512() {
450-
helper!(Writer::copy_match_help::<__m512i>);
378+
helper!(Writer::copy_match_help::<64>);
451379
}
452380

453381
#[cfg(target_arch = "x86_64")]
454382
if crate::cpu_features::is_enabled_avx2() {
455-
helper!(Writer::copy_match_help::<__m256i>);
383+
helper!(Writer::copy_match_help::<32>);
456384
}
457385

458386
#[cfg(target_arch = "x86_64")]
459387
if crate::cpu_features::is_enabled_sse() {
460-
helper!(Writer::copy_match_help::<__m128i>);
388+
helper!(Writer::copy_match_help::<16>);
461389
}
462390

463391
#[cfg(target_arch = "aarch64")]
464392
if crate::cpu_features::is_enabled_neon() {
465-
helper!(Writer::copy_match_help::<uint8x16_t>);
393+
helper!(Writer::copy_match_help::<16>);
466394
}
467395

468396
#[cfg(target_arch = "wasm32")]
469397
if crate::cpu_features::is_enabled_simd128() {
470-
helper!(Writer::copy_match_help::<v128>);
398+
helper!(Writer::copy_match_help::<16>);
471399
}
472400

473-
helper!(Writer::copy_match_help::<u64>);
401+
helper!(Writer::copy_match_help::<8>);
474402
}
475403

476404
#[test]

0 commit comments

Comments
 (0)