@@ -88,9 +88,7 @@ impl<'a> Writer<'a> {
8888 ) {
8989 match FEATURES {
9090 #[ cfg( target_arch = "x86_64" ) ]
91- CpuFeatures :: AVX2 => {
92- self . extend_from_window_help :: < core:: arch:: x86_64:: __m256i > ( window, range)
93- }
91+ CpuFeatures :: AVX2 => self . extend_from_window_help :: < 32 > ( window, range) ,
9492 _ => self . extend_from_window_runtime_dispatch ( window, range) ,
9593 }
9694 }
@@ -104,49 +102,49 @@ impl<'a> Writer<'a> {
104102 //
105103 // #[cfg(target_arch = "x86_64")]
106104 // if crate::cpu_features::is_enabled_avx512() {
107- // return self.extend_from_window_help::<core::arch::x86_64::__m512i >(window, range);
105+ // return self.extend_from_window_help::<64 >(window, range);
108106 // }
109107
110108 #[ cfg( target_arch = "x86_64" ) ]
111109 if crate :: cpu_features:: is_enabled_avx2 ( ) {
112- return self . extend_from_window_help :: < core :: arch :: x86_64 :: __m256i > ( window, range) ;
110+ return self . extend_from_window_help :: < 32 > ( window, range) ;
113111 }
114112
115113 #[ cfg( target_arch = "x86_64" ) ]
116114 if crate :: cpu_features:: is_enabled_sse ( ) {
117- return self . extend_from_window_help :: < core :: arch :: x86_64 :: __m128i > ( window, range) ;
115+ return self . extend_from_window_help :: < 16 > ( window, range) ;
118116 }
119117
120118 #[ cfg( target_arch = "aarch64" ) ]
121119 if crate :: cpu_features:: is_enabled_neon ( ) {
122- return self . extend_from_window_help :: < core :: arch :: aarch64 :: uint8x16_t > ( window, range) ;
120+ return self . extend_from_window_help :: < 16 > ( window, range) ;
123121 }
124122
125123 #[ cfg( target_arch = "wasm32" ) ]
126124 if crate :: cpu_features:: is_enabled_simd128 ( ) {
127- return self . extend_from_window_help :: < core :: arch :: wasm32 :: v128 > ( window, range) ;
125+ return self . extend_from_window_help :: < 16 > ( window, range) ;
128126 }
129127
130- self . extend_from_window_help :: < u64 > ( window, range)
128+ self . extend_from_window_help :: < 8 > ( window, range)
131129 }
132130
133131 #[ inline( always) ]
134- fn extend_from_window_help < C : Chunk > (
132+ fn extend_from_window_help < const N : usize > (
135133 & mut self ,
136134 window : & super :: window:: Window ,
137135 range : Range < usize > ,
138136 ) {
139137 let len = range. end - range. start ;
140138
141- if self . remaining ( ) >= len + core :: mem :: size_of :: < C > ( ) {
139+ if self . remaining ( ) >= len + N {
142140 // SAFETY: we know that our window has at least a core::mem::size_of::<C>() extra bytes
143141 // at the end, making it always safe to perform an (unaligned) Chunk read anywhere in
144142 // the window slice.
145143 //
146144 // The calling function checks for CPU features requirements for C.
147145 unsafe {
148146 let src = window. as_ptr ( ) ;
149- Self :: copy_chunk_unchecked :: < C > (
147+ Self :: copy_chunk_unchecked :: < N > (
150148 src. wrapping_add ( range. start ) . cast ( ) ,
151149 self . next_out ( ) ,
152150 len,
@@ -174,9 +172,7 @@ impl<'a> Writer<'a> {
174172 ) {
175173 match FEATURES {
176174 #[ cfg( target_arch = "x86_64" ) ]
177- CpuFeatures :: AVX2 => {
178- self . copy_match_help :: < core:: arch:: x86_64:: __m256i > ( offset_from_end, length)
179- }
175+ CpuFeatures :: AVX2 => self . copy_match_help :: < 32 > ( offset_from_end, length) ,
180176 _ => self . copy_match_runtime_dispatch ( offset_from_end, length) ,
181177 }
182178 }
@@ -191,32 +187,31 @@ impl<'a> Writer<'a> {
191187
192188 #[ cfg( target_arch = "x86_64" ) ]
193189 if crate :: cpu_features:: is_enabled_avx2 ( ) {
194- return self . copy_match_help :: < core :: arch :: x86_64 :: __m256i > ( offset_from_end, length) ;
190+ return self . copy_match_help :: < 32 > ( offset_from_end, length) ;
195191 }
196192
197193 #[ cfg( target_arch = "x86_64" ) ]
198194 if crate :: cpu_features:: is_enabled_sse ( ) {
199- return self . copy_match_help :: < core :: arch :: x86_64 :: __m128i > ( offset_from_end, length) ;
195+ return self . copy_match_help :: < 16 > ( offset_from_end, length) ;
200196 }
201197
202198 #[ cfg( target_arch = "aarch64" ) ]
203199 if crate :: cpu_features:: is_enabled_neon ( ) {
204- return self
205- . copy_match_help :: < core:: arch:: aarch64:: uint8x16_t > ( offset_from_end, length) ;
200+ return self . copy_match_help :: < 16 > ( offset_from_end, length) ;
206201 }
207202
208203 #[ cfg( target_arch = "wasm32" ) ]
209204 if crate :: cpu_features:: is_enabled_simd128 ( ) {
210- return self . copy_match_help :: < core :: arch :: wasm32 :: v128 > ( offset_from_end, length) ;
205+ return self . copy_match_help :: < 16 > ( offset_from_end, length) ;
211206 }
212207
213- self . copy_match_help :: < u64 > ( offset_from_end, length)
208+ self . copy_match_help :: < 8 > ( offset_from_end, length)
214209 }
215210
216211 #[ inline( always) ]
217- fn copy_match_help < C : Chunk > ( & mut self , offset_from_end : usize , length : usize ) {
212+ fn copy_match_help < const N : usize > ( & mut self , offset_from_end : usize , length : usize ) {
218213 let capacity = self . buf . len ( ) ;
219- let len = Ord :: min ( self . filled + length + core :: mem :: size_of :: < C > ( ) , capacity) ;
214+ let len = Ord :: min ( self . filled + length + N , capacity) ;
220215 let buf = & mut self . buf . as_mut_slice ( ) [ ..len] ;
221216
222217 let current = self . filled ;
@@ -244,12 +239,12 @@ impl<'a> Writer<'a> {
244239 }
245240 }
246241 } else {
247- Self :: copy_chunked_within :: < C > ( buf, capacity, current, offset_from_end, length)
242+ Self :: copy_chunked_within :: < N > ( buf, capacity, current, offset_from_end, length)
248243 }
249244 }
250245
251246 #[ inline( always) ]
252- fn copy_chunked_within < C : Chunk > (
247+ fn copy_chunked_within < const N : usize > (
253248 buf : & mut [ MaybeUninit < u8 > ] ,
254249 capacity : usize ,
255250 current : usize ,
@@ -258,10 +253,10 @@ impl<'a> Writer<'a> {
258253 ) {
259254 let start = current. checked_sub ( offset_from_end) . expect ( "in bounds" ) ;
260255
261- if current + length + core :: mem :: size_of :: < C > ( ) < capacity {
256+ if current + length + N < capacity {
262257 let ptr = buf. as_mut_ptr ( ) ;
263258 // SAFETY: if statement and checked_sub ensures we stay in bounds.
264- unsafe { Self :: copy_chunk_unchecked :: < C > ( ptr. add ( start) , ptr. add ( current) , length) }
259+ unsafe { Self :: copy_chunk_unchecked :: < N > ( ptr. add ( start) , ptr. add ( current) , length) }
265260 } else {
266261 // a full simd copy does not fit in the output buffer
267262 buf. copy_within ( start..start + length, current) ;
@@ -273,29 +268,45 @@ impl<'a> Writer<'a> {
273268 /// `src` must be safe to perform unaligned reads in `core::mem::size_of::<C>()` chunks until
274269 /// `end` is reached. `dst` must be safe to (unalingned) write that number of chunks.
275270 #[ inline( always) ]
276- unsafe fn copy_chunk_unchecked < C : Chunk > (
271+ unsafe fn copy_chunk_unchecked < const N : usize > (
277272 mut src : * const MaybeUninit < u8 > ,
278273 mut dst : * mut MaybeUninit < u8 > ,
279274 length : usize ,
280275 ) {
281276 let end = src. add ( length) ;
282277
283- let chunk = C :: load_chunk ( src) ;
284- C :: store_chunk ( dst, chunk) ;
278+ let chunk = load_chunk :: < N > ( src) ;
279+ store_chunk :: < N > ( dst, chunk) ;
285280
286- src = src. add ( core :: mem :: size_of :: < C > ( ) ) ;
287- dst = dst. add ( core :: mem :: size_of :: < C > ( ) ) ;
281+ src = src. add ( N ) ;
282+ dst = dst. add ( N ) ;
288283
289284 while src < end {
290- let chunk = C :: load_chunk ( src) ;
291- C :: store_chunk ( dst, chunk) ;
285+ let chunk = load_chunk :: < N > ( src) ;
286+ store_chunk :: < N > ( dst, chunk) ;
292287
293- src = src. add ( core :: mem :: size_of :: < C > ( ) ) ;
294- dst = dst. add ( core :: mem :: size_of :: < C > ( ) ) ;
288+ src = src. add ( N ) ;
289+ dst = dst. add ( N ) ;
295290 }
296291 }
297292}
298293
294+ /// # Safety
295+ ///
296+ /// Must be valid to read a `[u8; N]` value from `from` with an unaligned read.
297+ #[ inline( always) ]
298+ unsafe fn load_chunk < const N : usize > ( from : * const MaybeUninit < u8 > ) -> [ u8 ; N ] {
299+ core:: ptr:: read_unaligned ( from. cast :: < [ u8 ; N ] > ( ) )
300+ }
301+
302+ /// # Safety
303+ ///
304+ /// Must be valid to write a `[u8; N]` value to `out` with an unaligned write.
305+ #[ inline( always) ]
306+ unsafe fn store_chunk < const N : usize > ( out : * mut MaybeUninit < u8 > , chunk : [ u8 ; N ] ) {
307+ core:: ptr:: write_unaligned ( out. cast ( ) , chunk)
308+ }
309+
299310impl fmt:: Debug for Writer < ' _ > {
300311 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
301312 f. debug_struct ( "Writer" )
@@ -309,80 +320,6 @@ fn slice_to_uninit(slice: &[u8]) -> &[MaybeUninit<u8>] {
309320 unsafe { & * ( slice as * const [ u8 ] as * const [ MaybeUninit < u8 > ] ) }
310321}
311322
312- trait Chunk : Sized {
313- /// # Safety
314- ///
315- /// Must be valid to read a `Self::Chunk` value from `from` with an unaligned read.
316- ///
317- /// Implementations may have CPU feature specific requirements depending on the type.
318- #[ inline( always) ]
319- unsafe fn load_chunk ( from : * const MaybeUninit < u8 > ) -> Self {
320- core:: ptr:: read_unaligned ( from. cast :: < Self > ( ) )
321- }
322-
323- /// # Safety
324- ///
325- /// Must be valid to write a `Self::Chunk` value to `out` with an unaligned write.
326- ///
327- /// Implementations may have CPU feature specific requirements depending on the type.
328- unsafe fn store_chunk ( out : * mut MaybeUninit < u8 > , chunk : Self ) {
329- core:: ptr:: write_unaligned ( out. cast ( ) , chunk)
330- }
331- }
332-
333- #[ cfg( target_endian = "little" ) ]
334- impl Chunk for u64 { }
335-
336- #[ cfg( target_endian = "big" ) ]
337- impl Chunk for u64 {
338- unsafe fn load_chunk ( from : * const MaybeUninit < u8 > ) -> Self {
339- u64:: to_le ( core:: ptr:: read_unaligned ( from. cast ( ) ) )
340- }
341-
342- unsafe fn store_chunk ( out : * mut MaybeUninit < u8 > , chunk : Self ) {
343- core:: ptr:: copy_nonoverlapping (
344- chunk. to_le_bytes ( ) . as_ptr ( ) . cast ( ) ,
345- out,
346- core:: mem:: size_of :: < Self > ( ) ,
347- )
348- }
349- }
350-
351- #[ cfg( target_arch = "x86_64" ) ]
352- impl Chunk for core:: arch:: x86_64:: __m128i { }
353-
354- #[ cfg( target_arch = "x86_64" ) ]
355- impl Chunk for core:: arch:: x86_64:: __m256i { }
356-
357- #[ cfg( target_arch = "x86_64" ) ]
358- impl Chunk for core:: arch:: x86_64:: __m512i { }
359-
360- #[ cfg( target_arch = "aarch64" ) ]
361- impl Chunk for core:: arch:: aarch64:: uint8x16_t {
362- #[ inline( always) ]
363- unsafe fn load_chunk ( from : * const MaybeUninit < u8 > ) -> Self {
364- core:: arch:: aarch64:: vld1q_u8 ( from. cast ( ) )
365- }
366-
367- #[ inline( always) ]
368- unsafe fn store_chunk ( out : * mut MaybeUninit < u8 > , chunk : Self ) {
369- core:: arch:: aarch64:: vst1q_u8 ( out. cast ( ) , chunk)
370- }
371- }
372-
373- #[ cfg( target_arch = "wasm32" ) ]
374- impl Chunk for core:: arch:: wasm32:: v128 {
375- #[ inline( always) ]
376- unsafe fn load_chunk ( from : * const MaybeUninit < u8 > ) -> Self {
377- core:: arch:: wasm32:: v128_load ( from. cast ( ) )
378- }
379-
380- #[ inline( always) ]
381- unsafe fn store_chunk ( out : * mut MaybeUninit < u8 > , chunk : Self ) {
382- core:: arch:: wasm32:: v128_store ( out as * mut Self , chunk)
383- }
384- }
385-
386323#[ cfg( test) ]
387324mod test {
388325 use super :: * ;
@@ -424,15 +361,6 @@ mod test {
424361 let offset_from_end = 17 ;
425362 let length = 17 ;
426363
427- #[ cfg( target_arch = "x86_64" ) ]
428- use core:: arch:: x86_64:: { __m128i, __m256i, __m512i} ;
429-
430- #[ cfg( target_arch = "aarch64" ) ]
431- use core:: arch:: aarch64:: uint8x16_t;
432-
433- #[ cfg( target_arch = "wasm32" ) ]
434- use core:: arch:: wasm32:: v128;
435-
436364 macro_rules! helper {
437365 ( $func: expr) => {
438366 let mut buf = test_array( ) ;
@@ -447,30 +375,30 @@ mod test {
447375
448376 #[ cfg( target_arch = "x86_64" ) ]
449377 if crate :: cpu_features:: is_enabled_avx512 ( ) {
450- helper ! ( Writer :: copy_match_help:: <__m512i >) ;
378+ helper ! ( Writer :: copy_match_help:: <64 >) ;
451379 }
452380
453381 #[ cfg( target_arch = "x86_64" ) ]
454382 if crate :: cpu_features:: is_enabled_avx2 ( ) {
455- helper ! ( Writer :: copy_match_help:: <__m256i >) ;
383+ helper ! ( Writer :: copy_match_help:: <32 >) ;
456384 }
457385
458386 #[ cfg( target_arch = "x86_64" ) ]
459387 if crate :: cpu_features:: is_enabled_sse ( ) {
460- helper ! ( Writer :: copy_match_help:: <__m128i >) ;
388+ helper ! ( Writer :: copy_match_help:: <16 >) ;
461389 }
462390
463391 #[ cfg( target_arch = "aarch64" ) ]
464392 if crate :: cpu_features:: is_enabled_neon ( ) {
465- helper ! ( Writer :: copy_match_help:: <uint8x16_t >) ;
393+ helper ! ( Writer :: copy_match_help:: <16 >) ;
466394 }
467395
468396 #[ cfg( target_arch = "wasm32" ) ]
469397 if crate :: cpu_features:: is_enabled_simd128 ( ) {
470- helper ! ( Writer :: copy_match_help:: <v128 >) ;
398+ helper ! ( Writer :: copy_match_help:: <16 >) ;
471399 }
472400
473- helper ! ( Writer :: copy_match_help:: <u64 >) ;
401+ helper ! ( Writer :: copy_match_help:: <8 >) ;
474402 }
475403
476404 #[ test]
0 commit comments