From 104da501288c9b802fe6aa8335cf9fc509958a87 Mon Sep 17 00:00:00 2001 From: Russell Johnston Date: Sat, 23 May 2020 15:26:53 -0700 Subject: [PATCH] Keep entity lists alive during iteration Entity lists are now copy-on-write. This lets the interpreter keep iterators pointing into them while (potentially) mutating them, and preserves GML `with` semantics in the presence of such mutation. In particular, this makes `instance_create` and `instance_destroy` memory safe when called from GML, as described by #40. --- gml/src/back/codegen.rs | 1 + gml/src/back/ssa.rs | 8 ++ gml/src/front/codegen.rs | 8 ++ gml/src/index_map.rs | 57 ------------- gml/src/lib.rs | 3 +- gml/src/rc_vec.rs | 164 +++++++++++++++++++++++++++++++++++++ gml/src/vm/code.rs | 1 + gml/src/vm/instance_map.rs | 62 ++++++++++++++ gml/src/vm/interpreter.rs | 28 ++++++- gml/src/vm/mod.rs | 6 +- gml/src/vm/world.rs | 8 +- gml/tests/interpreter.rs | 91 ++++++++++++++++---- 12 files changed, 355 insertions(+), 82 deletions(-) delete mode 100644 gml/src/index_map.rs create mode 100644 gml/src/rc_vec.rs create mode 100644 gml/src/vm/instance_map.rs diff --git a/gml/src/back/codegen.rs b/gml/src/back/codegen.rs index f14e392..5694060 100644 --- a/gml/src/back/codegen.rs +++ b/gml/src/back/codegen.rs @@ -375,6 +375,7 @@ impl From for code::Op { ssa::Opcode::Return => code::Op::Ret, ssa::Opcode::With => code::Op::With, + ssa::Opcode::ReleaseWith => code::Op::ReleaseWith, ssa::Opcode::ScopeError => code::Op::ScopeError, ssa::Opcode::LoadPointer => code::Op::LoadPointer, ssa::Opcode::NextPointer => code::Op::NextPointer, diff --git a/gml/src/back/ssa.rs b/gml/src/back/ssa.rs index 6842af5..717b352 100644 --- a/gml/src/back/ssa.rs +++ b/gml/src/back/ssa.rs @@ -61,6 +61,7 @@ pub enum Instruction { /// instructions. Parameter, + Nullary { op: Opcode }, Unary { op: Opcode, arg: Value }, UnaryReal { op: Opcode, real: f64 }, UnarySymbol { op: Opcode, symbol: Symbol }, @@ -101,6 +102,8 @@ pub enum Opcode { /// Build an iterator over a scope, producing a tuple of start and end. With, + /// Release the iterator created by the most recent `With`. + ReleaseWith, /// Error that a scope does not exist. ScopeError, LoadPointer, @@ -264,6 +267,7 @@ impl Function { match self.values[value] { Alias { .. } | Project { .. } | Parameter => panic!("finding op of non-instruction"), + Nullary { op, .. } | Unary { op, .. } | UnaryReal { op, .. } | UnarySymbol { op, .. } | @@ -286,6 +290,7 @@ impl Function { Alias { .. } | Project { .. } | Parameter => panic!("finding defs of non-instruction"), // Zero-valued instructions: + Nullary { op: Opcode::ReleaseWith } | Unary { op: Opcode::Release, .. } | Unary { op: Opcode::Return, .. } | Unary { op: Opcode::ScopeError, .. } | @@ -302,6 +307,7 @@ impl Function { Unary { op: Opcode::With, .. } => ValueRange { range: start + 1..start + 3 }, // The common case: single-valued instructions: + Nullary { .. } | Unary { .. } | UnaryReal { .. } | UnarySymbol { .. } | @@ -328,6 +334,7 @@ impl Function { match self.values[value] { Alias { .. } | Project { .. } | Parameter => panic!("finding uses of non-instruction"), + Nullary { .. } => &[], Unary { ref arg, .. } => slice::from_ref(arg), UnaryReal { .. } => &[], UnarySymbol { .. } => &[], @@ -350,6 +357,7 @@ impl Function { match self.values[value] { Alias { .. } | Project { .. } | Parameter => panic!("finding uses of non-instruction"), + Nullary { .. } => &mut [], Unary { ref mut arg, .. } => slice::from_mut(arg), UnaryReal { .. } => &mut [], UnarySymbol { .. } => &mut [], diff --git a/gml/src/front/codegen.rs b/gml/src/front/codegen.rs index 28c9a2a..1bf7bf6 100644 --- a/gml/src/front/codegen.rs +++ b/gml/src/front/codegen.rs @@ -1141,6 +1141,9 @@ impl<'p, 'e> Codegen<'p, 'e> { let exists = self.emit_unary(ssa::Opcode::ExistsEntity, entity, location); self.emit_branch(exists, body_block, cond_block, location); + self.current_block = exit_block; + self.emit_nullary(ssa::Opcode::ReleaseWith, location); + With { cond_block, body_block, exit_block, entity } } @@ -1251,6 +1254,11 @@ impl<'p, 'e> Codegen<'p, 'e> { self.emit_unary_symbol(ssa::Opcode::Constant, string, location) } + fn emit_nullary(&mut self, op: ssa::Opcode, location: usize) -> ssa::Value { + let instruction = ssa::Instruction::Nullary { op }; + self.function.emit_instruction(self.current_block, instruction, location) + } + fn emit_unary(&mut self, op: ssa::Opcode, arg: ssa::Value, location: usize) -> ssa::Value { let instruction = ssa::Instruction::Unary { op, arg }; self.function.emit_instruction(self.current_block, instruction, location) diff --git a/gml/src/index_map.rs b/gml/src/index_map.rs deleted file mode 100644 index 5be8527..0000000 --- a/gml/src/index_map.rs +++ /dev/null @@ -1,57 +0,0 @@ -use std::collections::HashMap; -use std::collections::hash_map::Entry; -use std::ops::Index; -use std::hash::Hash; - -pub struct IndexMap where K: Eq + Hash { - keys: HashMap, - values: Vec, -} - -impl Default for IndexMap where K: Eq + Hash { - fn default() -> Self { - IndexMap { - keys: HashMap::default(), - values: Vec::default(), - } - } -} - -impl IndexMap where K: Eq + Hash { - pub fn values(&self) -> &[V] { - &self.values[..] - } - - pub fn len(&self) -> usize { - self.values.len() - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn contains_key(&self, key: K) -> bool { - self.keys.contains_key(&key) - } - - pub fn insert(&mut self, key: K, value: V) { - match self.keys.entry(key) { - Entry::Occupied(entry) => { - self.values[*entry.get()] = value; - } - - Entry::Vacant(entry) => { - entry.insert(self.values.len()); - self.values.push(value); - } - } - } -} - -impl Index for IndexMap where K: Eq + Hash { - type Output = V; - - fn index(&self, key: K) -> &V { - &self.values[self.keys[&key]] - } -} diff --git a/gml/src/lib.rs b/gml/src/lib.rs index cc7e97d..cc4d723 100644 --- a/gml/src/lib.rs +++ b/gml/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(maybe_uninit_extra)] #![feature(box_patterns)] #![feature(box_syntax)] #![feature(extern_types)] @@ -18,7 +19,7 @@ pub use gml_meta::bind; #[macro_use] mod handle_map; -mod index_map; +mod rc_vec; mod bit_vec; pub mod symbol; diff --git a/gml/src/rc_vec.rs b/gml/src/rc_vec.rs new file mode 100644 index 0000000..d4b3c50 --- /dev/null +++ b/gml/src/rc_vec.rs @@ -0,0 +1,164 @@ +use std::{ops, cmp, iter, mem, ptr, slice}; +use std::marker::PhantomData; +use std::mem::MaybeUninit; +use std::rc::Rc; + +/// A copy-on-write `Vec`. +/// +/// Invoking `clone` on an `RcVec` produces a new `RcVec` using the same array. +/// +/// Attempts to mutate an `RcVec` will first ensure it is unique, invoking `clone` on its elements +/// if there are other `RcVec` objects sharing its array. +pub struct RcVec { + buf: Rc<[MaybeUninit]>, + len: usize, + + _marker: PhantomData<[T]>, +} + +impl Default for RcVec { + /// Create a new, empty, unshared `RcVec`. + fn default() -> Self { + RcVec { buf: Rc::new([]), len: 0, _marker: PhantomData } + } +} + +impl Clone for RcVec { + /// Create a new `RcVec` with the same array, increasing its reference count. + fn clone(&self) -> RcVec { + RcVec { buf: Rc::clone(&self.buf), len: self.len, _marker: PhantomData } + } +} + +impl Drop for RcVec { + fn drop(&mut self) { + // If this is the last reference to the array, drop its elements. + if Rc::strong_count(&self.buf) == 1 { + unsafe { + let buf = slice::from_raw_parts_mut(self.as_mut_ptr(), self.len); + ptr::drop_in_place(buf); + } + } + } +} + +impl ops::Deref for RcVec { + type Target = [T]; + + fn deref(&self) -> &[T] { + // Safety: `self.len` represents the number of initialized elements. + unsafe { slice::from_raw_parts(self.as_ptr(), self.len) } + } +} + +impl RcVec { + /// Construct a new `RcVec` with elements initialized to those of `iter` and capacity `cap`. + /// + /// If `cap` is less than the number of elements yielded by `iter`, the rest are discarded. + pub fn from_iter_with_capacity(iter: I, cap: usize) -> Self where + I: IntoIterator + { + let mut len = 0; + let buf = iter.into_iter() + .inspect(|_| len += 1) + .map(MaybeUninit::new) + .chain(iter::repeat_with(MaybeUninit::uninit)) + .take(cap) + .collect(); + + // Safety: The soundness of `Deref` depends on the value of `len` here. + RcVec { buf, len, _marker: PhantomData } + } + + pub fn len(&self) -> usize { self.len } + + pub fn as_ptr(&self) -> *const T { + // Safety: This immediately "forgets" the duplicate `Rc`. + let ptr = unsafe { Rc::into_raw(ptr::read(&self.buf)) }; + ptr as *const T + } + + pub fn as_mut_ptr(&mut self) -> *mut T { + // Safety: This immediately "forgets" the duplicate `Rc`. + let ptr = unsafe { Rc::into_raw(ptr::read(&self.buf)) }; + ptr as *mut T + } +} + +impl RcVec where T: Clone { + /// Reserve capacity for at least `extra` more elements to be inserted. + /// + /// If the array must be reallocated and there are other `RcVec`s using the same allocation, + /// existing elements will be cloned. + pub fn reserve(&mut self, extra: usize) { + if self.buf.len().wrapping_sub(self.len) >= extra { + return; + } + + // Mimic `Vec`'s behavior: limiting `size` to `isize::MAX` ensures that subsequent + // evaluations of `2 * self.buf.len()` cannot overflow. + let buf_len = self.len.checked_add(extra).expect("capacity overflow"); + let buf_len = cmp::max(buf_len, 2 * self.buf.len()); + let size = buf_len.checked_mul(mem::size_of::()).expect("capacity overflow"); + if mem::size_of::() < 8 && size > isize::MAX as usize { + panic!("capacity overflow"); + } + + // If this is the only reference to the array, move its elements. Otherwise, clone them. + if Rc::strong_count(&self.buf) == 1 { + // Safety: The moved from values are immediately freed without being dropped. + let iter = self.buf.iter().take(self.len).map(|value| unsafe { value.read() }); + *self = Self::from_iter_with_capacity(iter, buf_len); + } else { + *self = Self::from_iter_with_capacity(self.iter().cloned(), buf_len); + } + } + + /// Make a mutable reference into the array. + /// + /// If there are other `RcVec`s sharing the array, `clone` the elements to ensure uniqueness. + pub fn make_mut(&mut self) -> &mut [T] { + // If this is not the only reference to the array, clone it. + if Rc::strong_count(&self.buf) != 1 { + *self = Self::from_iter_with_capacity(self.iter().cloned(), self.buf.len()); + } + + // Safety: The strong count of `self.buf` is now 1. + // `self.len` represents the number of initialized elements. + unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } + } + + /// Append `value` to the end of the array. + pub fn push(&mut self, value: T) { + if self.len == self.buf.len() { + self.reserve(1); + } + + // Safety: If there was excess capacity, then no references have been formed to that memory. + // Otherwise, the strong count of `self.buf` is now 1 and there are no references at all. + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + } + + /// Remove and return the element at position `index`, shifting later elements down. + pub fn remove(&mut self, index: usize) -> T { + let len = self.len(); + assert!(index < len); + + let ret; + self.make_mut(); + + // Safety: The strong count of `self.buf` is now 1, and `index` is in bounds. + unsafe { + let ptr = self.as_mut_ptr().add(index); + ret = ptr::read(ptr); + ptr::copy(ptr.offset(1), ptr, len - index - 1); + } + + self.len -= 1; + ret + } +} diff --git a/gml/src/vm/code.rs b/gml/src/vm/code.rs index 1b3e6c6..d9fec42 100644 --- a/gml/src/vm/code.rs +++ b/gml/src/vm/code.rs @@ -102,6 +102,7 @@ pub enum Op { StoreScope, With, + ReleaseWith, LoadPointer, NextPointer, NePointer, diff --git a/gml/src/vm/instance_map.rs b/gml/src/vm/instance_map.rs new file mode 100644 index 0000000..fc97546 --- /dev/null +++ b/gml/src/vm/instance_map.rs @@ -0,0 +1,62 @@ +use std::mem; +use std::collections::HashMap; +use std::collections::hash_map::Entry; +use std::ops::Index; +use std::hash::Hash; + +use crate::rc_vec::RcVec; + +/// A hash map that preserves insertion order in a copy-on-write array of values. +/// +/// This is designed to map instance IDs to entity handles. Using a copy-on-write `RcVec` for +/// the entity handles allows `with` loops to iterate over them without being invalidated by +/// reallocations or mutations. +pub struct InstanceMap where K: Eq + Hash { + keys: HashMap, + values: RcVec, +} + +impl Default for InstanceMap where K: Eq + Hash { + fn default() -> Self { + InstanceMap { + keys: HashMap::default(), + values: RcVec::default(), + } + } +} + +impl InstanceMap where K: Eq + Hash { + pub fn values(&self) -> &RcVec { &self.values } + + pub fn len(&self) -> usize { self.values.len() } + + pub fn is_empty(&self) -> bool { self.len() == 0 } + + pub fn contains_key(&self, key: K) -> bool { self.keys.contains_key(&key) } +} + +impl InstanceMap where K: Eq + Hash, V: Clone { + pub fn insert(&mut self, key: K, value: V) -> Option { + match self.keys.entry(key) { + Entry::Occupied(entry) => { + let values = self.values.make_mut(); + let old = mem::replace(&mut values[*entry.get()], value); + Some(old) + } + + Entry::Vacant(entry) => { + entry.insert(self.values.len()); + self.values.push(value); + None + } + } + } +} + +impl Index for InstanceMap where K: Eq + Hash { + type Output = V; + + fn index(&self, key: K) -> &V { + &self.values[self.keys[&key]] + } +} diff --git a/gml/src/vm/interpreter.rs b/gml/src/vm/interpreter.rs index 7fbc5dc..9421806 100644 --- a/gml/src/vm/interpreter.rs +++ b/gml/src/vm/interpreter.rs @@ -3,12 +3,14 @@ use std::convert::TryFrom; use std::mem::ManuallyDrop; use crate::symbol::Symbol; +use crate::rc_vec::RcVec; use crate::vm::{code, world}; use crate::vm::{World, Entity, Value, ValueRef, Data, to_i32, to_bool, Array, ArrayRef, Resources}; /// A single thread of GML execution. pub struct Thread { returns: Vec<(Symbol, usize, usize)>, + withs: Vec>, stack: Vec, self_entity: Entity, @@ -111,6 +113,7 @@ impl Default for Thread { fn default() -> Self { Thread { returns: Vec::default(), + withs: Vec::default(), stack: Vec::default(), self_entity: Entity(0), @@ -465,25 +468,44 @@ fn execute_internal<'a>( _ => break ErrorKind::TypeUnary(op, scope.clone()), }; + let mut values = RcVec::default(); let slice = match scope { SELF => slice::from_ref(&thread.self_entity), OTHER => slice::from_ref(&thread.other_entity), - ALL => world(engine).instances.values(), + ALL => { + values = world(engine).instances.values().clone(); + &values[..] + } NOONE => &[], GLOBAL => slice::from_ref(&world::GLOBAL), LOCAL => &[], // TODO: error - object if (0..=100_000).contains(&object) => - &world(engine).objects[&object][..], + object if (0..=100_000).contains(&object) => { + values = world(engine).objects[&object].clone(); + &values[..] + } instance if (100_001..).contains(&instance) => slice::from_ref(&world(engine).instances[instance]), _ => &[], // TODO: error }; + + // Safety: There are two cases to consider here: + // - The iterator points to a single element. It will be dereferenced only once, + // before any operations that can invalidate it. + // - The iterator points into an `RcVec` of entities. The `RcVec`'s refcount is + // incremented above, and decremented by `ReleaseWith`. Any attempt to mutate the + // `RcVec` in between will make a copy of the array, and mutate the copy instead. unsafe { let first = slice.as_ptr() as *mut Entity; let last = first.offset(slice.len() as isize); registers[ptr].iterator = ptr::NonNull::new_unchecked(first); registers[end].iterator = ptr::NonNull::new_unchecked(last); } + + thread.withs.push(values); + } + + (code::Op::ReleaseWith, _, _, _) => { + thread.withs.pop(); } (code::Op::LoadPointer, t, ptr, _) => { diff --git a/gml/src/vm/mod.rs b/gml/src/vm/mod.rs index 0805ac7..3162d11 100644 --- a/gml/src/vm/mod.rs +++ b/gml/src/vm/mod.rs @@ -5,16 +5,18 @@ use crate::symbol::Symbol; pub use crate::vm::interpreter::{Thread, Error, ErrorKind, SELF, OTHER, ALL, NOONE, GLOBAL}; pub use crate::vm::world::World; pub use crate::vm::entity_map::{Entity, EntityAllocator, EntityMap}; +pub use crate::vm::instance_map::InstanceMap; pub use crate::vm::value::{Value, ValueRef, Data, to_i32, to_u32, to_bool}; pub use crate::vm::array::{Array, ArrayRef}; pub mod code; -pub mod debug; pub mod world; -mod interpreter; mod entity_map; +mod instance_map; +mod interpreter; mod value; mod array; +mod debug; pub struct Resources { pub scripts: HashMap, diff --git a/gml/src/vm/world.rs b/gml/src/vm/world.rs index 218dc15..689d912 100644 --- a/gml/src/vm/world.rs +++ b/gml/src/vm/world.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::index_map::IndexMap; +use crate::rc_vec::RcVec; use crate::symbol::Symbol; use crate::vm; @@ -8,8 +8,8 @@ pub struct World { pub entities: vm::EntityAllocator, pub members: vm::EntityMap>, - pub objects: HashMap>, - pub instances: IndexMap, + pub objects: HashMap>, + pub instances: vm::InstanceMap, pub globals: HashSet, } @@ -23,7 +23,7 @@ impl Default for World { members: vm::EntityMap::default(), objects: HashMap::default(), - instances: IndexMap::default(), + instances: vm::InstanceMap::default(), globals: HashSet::default(), }; diff --git a/gml/tests/interpreter.rs b/gml/tests/interpreter.rs index 1809149..399daa9 100644 --- a/gml/tests/interpreter.rs +++ b/gml/tests/interpreter.rs @@ -47,7 +47,7 @@ fn member() -> Result<(), vm::Error> { let mut engine = Engine::default(); let mut thread = vm::Thread::default(); - let entity = engine.world.create_instance(0, 100001); + let (_, entity) = engine.create_instance(); thread.set_self(entity); assert_eq!(thread.execute(&mut engine, &resources, member, vec![])?, vm::Value::from(8)); @@ -86,7 +86,7 @@ fn builtin() -> Result<(), vm::Error> { let mut engine = Engine::default(); let mut thread = vm::Thread::default(); - let entity = engine.world.create_instance(0, 100001); + let (_, entity) = engine.create_instance(); engine.instances.insert(entity, Instance::default()); thread.set_self(entity); @@ -120,7 +120,7 @@ fn global() -> Result<(), vm::Error> { let mut engine = Engine::default(); let mut thread = vm::Thread::default(); - let entity = engine.world.create_instance(0, 100001); + let (_, entity) = engine.create_instance(); thread.set_self(entity); assert_eq!(thread.execute(&mut engine, &resources, global, vec![])?, vm::Value::from(8)); @@ -128,37 +128,70 @@ fn global() -> Result<(), vm::Error> { } #[test] -fn with() -> Result<(), vm::Error> { +fn with_scopes() -> Result<(), vm::Error> { let mut items = HashMap::new(); let with = Symbol::intern("with"); items.insert(with, Item::Script(b"{ - var a, b; - a = 100001 - b = 100002 c = 3 with (all) { n = other.c m = other.c } - with (a) { + with (argument0) { n = 5 } - with (b) { + with (argument1) { m = 13 } - return a.n + b.n + a.m + b.m + return argument0.n + argument1.n + argument0.m + argument1.m }")); let resources = build(&items).unwrap_or_else(|_| panic!()); let mut engine = Engine::default(); let mut thread = vm::Thread::default(); - let a = engine.world.create_instance(0, 100001); - engine.world.create_instance(0, 100002); - thread.set_self(a); + let (a, entity) = engine.create_instance(); + let (b, _) = engine.create_instance(); + thread.set_self(entity); + + let arguments = vec![vm::Value::from(a), vm::Value::from(b)]; + assert_eq!(thread.execute(&mut engine, &resources, with, arguments)?, vm::Value::from(24.0)); + Ok(()) +} + +#[test] +fn with_iterator() -> Result<(), vm::Error> { + let mut items = HashMap::new(); + + let with = Symbol::intern("with"); + items.insert(with, Item::Script(b"{ + with (all) { + c = 3 + var i; + i = create_instance() + i.c = 5 + } + var s; + s = 0 + with (all) { + s += c + } + return s + }")); + + let create_instance = Symbol::intern("create_instance"); + items.insert(create_instance, Item::Native(Engine::native_create_instance, 0, false)); + + let resources = build(&items).unwrap_or_else(|_| panic!()); + let mut engine = Engine::default(); + let mut thread = vm::Thread::default(); + + let (_, entity) = engine.create_instance(); + engine.create_instance(); + thread.set_self(entity); - assert_eq!(thread.execute(&mut engine, &resources, with, vec![])?, vm::Value::from(24.0)); + assert_eq!(thread.execute(&mut engine, &resources, with, vec![])?, vm::Value::from(16.0)); Ok(()) } @@ -408,10 +441,10 @@ fn ffi() -> Result<(), vm::Error> { Ok(()) } -#[derive(Default)] struct Engine { world: vm::World, + next_id: i32, instances: HashMap, global_scalar: i32, @@ -422,6 +455,20 @@ impl vm::world::Api for Engine { fn receivers(&mut self) -> &mut vm::World { &mut self.world } } +impl Default for Engine { + fn default() -> Self { + Engine { + world: vm::World::default(), + + next_id: 100001, + instances: HashMap::default(), + + global_scalar: i32::default(), + global_array: <[f32; 2]>::default(), + } + } +} + impl Engine { fn native_add( &mut self, _resources: &vm::Resources, _entity: vm::Entity, arguments: &[vm::Value] @@ -434,6 +481,20 @@ impl Engine { Ok(value) } + fn native_create_instance( + &mut self, _resources: &vm::Resources, _entity: vm::Entity, _arguments: &[vm::Value] + ) -> Result { + let (id, _) = self.create_instance(); + Ok(vm::Value::from(id)) + } + + fn create_instance(&mut self) -> (i32, vm::Entity) { + let id = self.next_id; + self.next_id += 1; + + (id, self.world.create_instance(0, id)) + } + fn get_global_scalar(&mut self, _: vm::Entity, _: usize) -> vm::Value { vm::Value::from(self.global_scalar) }