diff --git a/src/librustc/mir/interpret/allocation.rs b/src/librustc/mir/interpret/allocation.rs index d4ee60eee6310..80fef910cc718 100644 --- a/src/librustc/mir/interpret/allocation.rs +++ b/src/librustc/mir/interpret/allocation.rs @@ -101,8 +101,7 @@ impl AllocationExtra<(), ()> for () { impl Allocation { /// Creates a read-only allocation initialized by the given bytes pub fn from_bytes(slice: &[u8], align: Align, extra: Extra) -> Self { - let mut undef_mask = UndefMask::new(Size::ZERO); - undef_mask.grow(Size::from_bytes(slice.len() as u64), true); + let undef_mask = UndefMask::new(Size::from_bytes(slice.len() as u64), true); Self { bytes: slice.to_owned(), relocations: Relocations::new(), @@ -122,7 +121,7 @@ impl Allocation { Allocation { bytes: vec![0; size.bytes() as usize], relocations: Relocations::new(), - undef_mask: UndefMask::new(size), + undef_mask: UndefMask::new(size, false), align, mutability: Mutability::Mutable, extra, @@ -614,8 +613,9 @@ impl DerefMut for Relocations { //////////////////////////////////////////////////////////////////////////////// type Block = u64; -const BLOCK_SIZE: u64 = 64; +/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte +/// is defined. If it is `false` the byte is undefined. #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)] pub struct UndefMask { blocks: Vec, @@ -625,12 +625,14 @@ pub struct UndefMask { impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len}); impl UndefMask { - pub fn new(size: Size) -> Self { + pub const BLOCK_SIZE: u64 = 64; + + pub fn new(size: Size, state: bool) -> Self { let mut m = UndefMask { blocks: vec![], len: Size::ZERO, }; - m.grow(size, false); + m.grow(size, state); m } @@ -644,6 +646,7 @@ impl UndefMask { return Err(self.len); } + // FIXME(oli-obk): optimize this for allocations larger than a block. let idx = (start.bytes()..end.bytes()) .map(|i| Size::from_bytes(i)) .find(|&i| !self.get(i)); @@ -663,20 +666,63 @@ impl UndefMask { } pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) { - for i in start.bytes()..end.bytes() { - self.set(Size::from_bytes(i), new_state); + let (blocka, bita) = bit_index(start); + let (blockb, bitb) = bit_index(end); + if blocka == blockb { + // first set all bits but the first `bita` + // then unset the last `64 - bitb` bits + let range = if bitb == 0 { + u64::max_value() << bita + } else { + (u64::max_value() << bita) & (u64::max_value() >> (64 - bitb)) + }; + if new_state { + self.blocks[blocka] |= range; + } else { + self.blocks[blocka] &= !range; + } + return; + } + // across block boundaries + if new_state { + // set bita..64 to 1 + self.blocks[blocka] |= u64::max_value() << bita; + // set 0..bitb to 1 + if bitb != 0 { + self.blocks[blockb] |= u64::max_value() >> (64 - bitb); + } + // fill in all the other blocks (much faster than one bit at a time) + for block in (blocka + 1) .. blockb { + self.blocks[block] = u64::max_value(); + } + } else { + // set bita..64 to 0 + self.blocks[blocka] &= !(u64::max_value() << bita); + // set 0..bitb to 0 + if bitb != 0 { + self.blocks[blockb] &= !(u64::max_value() >> (64 - bitb)); + } + // fill in all the other blocks (much faster than one bit at a time) + for block in (blocka + 1) .. blockb { + self.blocks[block] = 0; + } } } #[inline] pub fn get(&self, i: Size) -> bool { let (block, bit) = bit_index(i); - (self.blocks[block] & 1 << bit) != 0 + (self.blocks[block] & (1 << bit)) != 0 } #[inline] pub fn set(&mut self, i: Size, new_state: bool) { let (block, bit) = bit_index(i); + self.set_bit(block, bit, new_state); + } + + #[inline] + fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) { if new_state { self.blocks[block] |= 1 << bit; } else { @@ -685,11 +731,15 @@ impl UndefMask { } pub fn grow(&mut self, amount: Size, new_state: bool) { - let unused_trailing_bits = self.blocks.len() as u64 * BLOCK_SIZE - self.len.bytes(); + if amount.bytes() == 0 { + return; + } + let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes(); if amount.bytes() > unused_trailing_bits { - let additional_blocks = amount.bytes() / BLOCK_SIZE + 1; + let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; assert_eq!(additional_blocks as usize as u64, additional_blocks); self.blocks.extend( + // FIXME(oli-obk): optimize this by repeating `new_state as Block` iter::repeat(0).take(additional_blocks as usize), ); } @@ -702,8 +752,8 @@ impl UndefMask { #[inline] fn bit_index(bits: Size) -> (usize, usize) { let bits = bits.bytes(); - let a = bits / BLOCK_SIZE; - let b = bits % BLOCK_SIZE; + let a = bits / UndefMask::BLOCK_SIZE; + let b = bits % UndefMask::BLOCK_SIZE; assert_eq!(a as usize as u64, a); assert_eq!(b as usize as u64, b); (a as usize, b as usize) diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 88b936afaa4c1..6ea200d4e4fad 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -700,24 +700,29 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // relocations overlapping the edges; those would not be handled correctly). let relocations = { let relocations = self.get(src.alloc_id)?.relocations(self, src, size); - let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize)); - for i in 0..length { - new_relocations.extend( - relocations - .iter() - .map(|&(offset, reloc)| { - // compute offset for current repetition - let dest_offset = dest.offset + (i * size); - ( - // shift offsets from source allocation to destination allocation - offset + dest_offset - src.offset, - reloc, - ) - }) - ); - } + if relocations.is_empty() { + // nothing to copy, ignore even the `length` loop + Vec::new() + } else { + let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize)); + for i in 0..length { + new_relocations.extend( + relocations + .iter() + .map(|&(offset, reloc)| { + // compute offset for current repetition + let dest_offset = dest.offset + (i * size); + ( + // shift offsets from source allocation to destination allocation + offset + dest_offset - src.offset, + reloc, + ) + }) + ); + } - new_relocations + new_relocations + } }; let tcx = self.tcx.tcx; @@ -784,20 +789,65 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { // The bits have to be saved locally before writing to dest in case src and dest overlap. assert_eq!(size.bytes() as usize as u64, size.bytes()); - let undef_mask = self.get(src.alloc_id)?.undef_mask.clone(); - let dest_allocation = self.get_mut(dest.alloc_id)?; + let undef_mask = &self.get(src.alloc_id)?.undef_mask; + + // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`), + // a naive undef mask copying algorithm would repeatedly have to read the undef mask from + // the source and write it to the destination. Even if we optimized the memory accesses, + // we'd be doing all of this `repeat` times. + // Therefor we precompute a compressed version of the undef mask of the source value and + // then write it back `repeat` times without computing any more information from the source. + + // a precomputed cache for ranges of defined/undefined bits + // 0000010010001110 will become + // [5, 1, 2, 1, 3, 3, 1] + // where each element toggles the state + let mut ranges = smallvec::SmallVec::<[u64; 1]>::new(); + let first = undef_mask.get(src.offset); + let mut cur_len = 1; + let mut cur = first; + for i in 1..size.bytes() { + // FIXME: optimize to bitshift the current undef block's bits and read the top bit + if undef_mask.get(src.offset + Size::from_bytes(i)) == cur { + cur_len += 1; + } else { + ranges.push(cur_len); + cur_len = 1; + cur = !cur; + } + } - for i in 0..size.bytes() { - let defined = undef_mask.get(src.offset + Size::from_bytes(i)); + // now fill in all the data + let dest_allocation = self.get_mut(dest.alloc_id)?; + // an optimization where we can just overwrite an entire range of definedness bits if + // they are going to be uniformly `1` or `0`. + if ranges.is_empty() { + dest_allocation.undef_mask.set_range_inbounds( + dest.offset, + dest.offset + size * repeat, + first, + ); + return Ok(()) + } - for j in 0..repeat { - dest_allocation.undef_mask.set( - dest.offset + Size::from_bytes(i + (size.bytes() * j)), - defined + // remember to fill in the trailing bits + ranges.push(cur_len); + + for mut j in 0..repeat { + j *= size.bytes(); + j += dest.offset.bytes(); + let mut cur = first; + for range in &ranges { + let old_j = j; + j += range; + dest_allocation.undef_mask.set_range_inbounds( + Size::from_bytes(old_j), + Size::from_bytes(j), + cur, ); + cur = !cur; } } - Ok(()) } } diff --git a/src/test/run-pass-fulldeps/undef_mask.rs b/src/test/run-pass-fulldeps/undef_mask.rs new file mode 100644 index 0000000000000..cf6e6f7231638 --- /dev/null +++ b/src/test/run-pass-fulldeps/undef_mask.rs @@ -0,0 +1,26 @@ +// ignore-cross-compile +// ignore-stage1 + +#![feature(rustc_private)] + +extern crate rustc; + +use rustc::mir::interpret::UndefMask; +use rustc::ty::layout::Size; + +fn main() { + let mut mask = UndefMask::new(Size::from_bytes(500), false); + assert!(!mask.get(Size::from_bytes(499))); + mask.set(Size::from_bytes(499), true); + assert!(mask.get(Size::from_bytes(499))); + mask.set_range_inbounds(Size::from_bytes(100), Size::from_bytes(256), true); + for i in 0..100 { + assert!(!mask.get(Size::from_bytes(i))); + } + for i in 100..256 { + assert!(mask.get(Size::from_bytes(i))); + } + for i in 256..499 { + assert!(!mask.get(Size::from_bytes(i))); + } +}