@@ -41,6 +41,69 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141 core:: mem:: transmute ( x_read)
4242}
4343
44+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
45+ /// read with the out-of-bounds part filled with 0s.
46+ /// `load_sz` must not exceed WORD_SIZE.
47+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
48+ #[ inline( always) ]
49+ unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
50+ if load_sz == WORD_SIZE {
51+ return * src;
52+ }
53+
54+ let mut i = 0 ;
55+ let mut out = 0usize ;
56+ macro_rules! load_prefix {
57+ ( $( $ty: ty) +) => { $(
58+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
59+ if ( load_sz & chunk_sz) != 0 {
60+ // Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
61+ * ( & raw mut out) . byte_add( i) . cast:: <$ty>( ) = * src. byte_add( i) . cast:: <$ty>( ) ;
62+ i |= chunk_sz;
63+ }
64+ ) +} ;
65+ }
66+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
67+ // (as we handled the full-word case above).
68+ const { assert ! ( WORD_SIZE <= 8 ) } ;
69+ load_prefix ! ( u32 u16 u8 ) ;
70+ debug_assert ! ( i == load_sz) ;
71+ out
72+ }
73+
74+ /// Load `load_sz` many bytes from `src.byte_add(WORD_SIZE - load_sz)`. `src` must be `usize`-aligned.
75+ /// The bytes are returned as the *last* bytes of the return value, i.e., acts as if we had done
76+ /// a `usize` read from `src`, with the out-of-bounds part filled with 0s.
77+ /// `load_sz` must not exceed WORD_SIZE.
78+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
79+ #[ inline( always) ]
80+ unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
81+ if load_sz == WORD_SIZE {
82+ return * src;
83+ }
84+
85+ let mut i = 0 ;
86+ let mut out = 0usize ;
87+ let start_shift = WORD_SIZE - load_sz;
88+ macro_rules! load_prefix {
89+ ( $( $ty: ty) +) => { $(
90+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
91+ if ( load_sz & chunk_sz) != 0 {
92+ // Since we are doing the small reads first, `start_shift + i` has in the mean
93+ // time become aligned to `chunk_sz`.
94+ * ( & raw mut out) . byte_add( start_shift + i) . cast:: <$ty>( ) = * src. byte_add( start_shift + i) . cast:: <$ty>( ) ;
95+ i |= chunk_sz;
96+ }
97+ ) +} ;
98+ }
99+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
100+ // (as we handled the full-word case above).
101+ const { assert ! ( WORD_SIZE <= 8 ) } ;
102+ load_prefix ! ( u8 u16 u32 ) ;
103+ debug_assert ! ( i == load_sz) ;
104+ out
105+ }
106+
44107#[ inline( always) ]
45108pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
46109 #[ inline( always) ]
@@ -66,9 +129,12 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66129 }
67130 }
68131
132+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
69133 #[ cfg( not( feature = "mem-unaligned" ) ) ]
70134 #[ inline( always) ]
71135 unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
136+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
137+
72138 let mut dest_usize = dest as * mut usize ;
73139 let dest_end = dest. wrapping_add ( n) as * mut usize ;
74140
@@ -77,29 +143,37 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
77143 let shift = offset * 8 ;
78144
79145 // Realign src
80- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
81- // This will read (but won't use) bytes out of bound.
82- // cfg needed because not all targets will have atomic loads that can be lowered
83- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
85- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
86- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
87- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
146+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
147+ let mut prev_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
88148
89- while dest_usize < dest_end {
149+ while dest_usize. wrapping_add ( 1 ) < dest_end {
90150 src_aligned = src_aligned. wrapping_add ( 1 ) ;
91151 let cur_word = * src_aligned;
92152 #[ cfg( target_endian = "little" ) ]
93- let resembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
153+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
94154 #[ cfg( target_endian = "big" ) ]
95- let resembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
155+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
96156 prev_word = cur_word;
97157
98- * dest_usize = resembled ;
158+ * dest_usize = reassembled ;
99159 dest_usize = dest_usize. wrapping_add ( 1 ) ;
100160 }
161+
162+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
163+ // it is partially out-of-bounds.
164+ src_aligned = src_aligned. wrapping_add ( 1 ) ;
165+ let cur_word = load_aligned_partial ( src_aligned, offset) ;
166+ #[ cfg( target_endian = "little" ) ]
167+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
168+ #[ cfg( target_endian = "big" ) ]
169+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
170+ // prev_word does not matter any more
171+
172+ * dest_usize = reassembled;
173+ // dest_usize does not matter any more
101174 }
102175
176+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
103177 #[ cfg( feature = "mem-unaligned" ) ]
104178 #[ inline( always) ]
105179 unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
@@ -164,40 +238,51 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164238 }
165239 }
166240
241+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
167242 #[ cfg( not( feature = "mem-unaligned" ) ) ]
168243 #[ inline( always) ]
169244 unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
245+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
246+
170247 let mut dest_usize = dest as * mut usize ;
171- let dest_start = dest. wrapping_sub ( n) as * mut usize ;
248+ let dest_start = dest. wrapping_sub ( n) as * mut usize ; // we're moving towards the start
172249
173250 // Calculate the misalignment offset and shift needed to reassemble value.
174251 let offset = src as usize & WORD_MASK ;
175252 let shift = offset * 8 ;
176253
177- // Realign src_aligned
178- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
179- // This will read (but won't use) bytes out of bound.
180- // cfg needed because not all targets will have atomic loads that can be lowered
181- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
183- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
184- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
185- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
254+ // Realign src
255+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
256+ let mut prev_word = load_aligned_partial ( src_aligned, offset) ;
186257
187- while dest_start < dest_usize {
258+ while dest_start. wrapping_add ( 1 ) < dest_usize {
188259 src_aligned = src_aligned. wrapping_sub ( 1 ) ;
189260 let cur_word = * src_aligned;
190261 #[ cfg( target_endian = "little" ) ]
191- let resembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
262+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
192263 #[ cfg( target_endian = "big" ) ]
193- let resembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
264+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
194265 prev_word = cur_word;
195266
196267 dest_usize = dest_usize. wrapping_sub ( 1 ) ;
197- * dest_usize = resembled ;
268+ * dest_usize = reassembled ;
198269 }
270+
271+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
272+ // it is partially out-of-bounds.
273+ src_aligned = src_aligned. wrapping_sub ( 1 ) ;
274+ let cur_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
275+ #[ cfg( target_endian = "little" ) ]
276+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
277+ #[ cfg( target_endian = "big" ) ]
278+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
279+ // prev_word does not matter any more
280+
281+ dest_usize = dest_usize. wrapping_sub ( 1 ) ;
282+ * dest_usize = reassembled;
199283 }
200284
285+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
201286 #[ cfg( feature = "mem-unaligned" ) ]
202287 #[ inline( always) ]
203288 unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
0 commit comments