From b28c077f3ecf49efa9d1c415281329d0bb9618c5 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Jun 2024 03:37:23 +0000 Subject: [PATCH 1/8] Require object reference to be aligned --- src/util/address.rs | 13 +++++++++++++ src/util/test_util/mock_vm.rs | 2 +- .../tests/mock_tests/mock_test_is_in_mmtk_spaces.rs | 10 ++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/util/address.rs b/src/util/address.rs index c38274a0e1..e6b37882fd 100644 --- a/src/util/address.rs +++ b/src/util/address.rs @@ -509,6 +509,10 @@ impl ObjectReference { /// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::vm::ObjectModel::address_to_ref()`] /// to turn addresses that are from [`crate::vm::ObjectModel::ref_to_address()`] back to object. pub fn from_raw_address(addr: Address) -> Option { + debug_assert!( + addr.is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + "ObjectReference is required to be word aligned" + ); NonZeroUsize::new(addr.0).map(ObjectReference) } @@ -522,6 +526,10 @@ impl ObjectReference { /// adding a positive offset to a non-zero address, we know the result must not be zero. pub unsafe fn from_raw_address_unchecked(addr: Address) -> ObjectReference { debug_assert!(!addr.is_zero()); + debug_assert!( + addr.is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + "ObjectReference is required to be word aligned" + ); ObjectReference(NonZeroUsize::new_unchecked(addr.0)) } @@ -560,6 +568,11 @@ impl ObjectReference { use crate::vm::ObjectModel; let obj = VM::VMObjectModel::address_to_ref(addr); debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj); + debug_assert!( + obj.to_raw_address() + .is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + "ObjectReference is required to be word aligned" + ); obj } diff --git a/src/util/test_util/mock_vm.rs b/src/util/test_util/mock_vm.rs index 1183639a53..4e3b66a4e3 100644 --- a/src/util/test_util/mock_vm.rs +++ b/src/util/test_util/mock_vm.rs @@ -28,7 +28,7 @@ use std::sync::Mutex; /// The offset between object reference and the allocation address if we use /// the default mock VM. -pub const DEFAULT_OBJECT_REF_OFFSET: usize = 4; +pub const DEFAULT_OBJECT_REF_OFFSET: usize = crate::util::constants::BYTES_IN_ADDRESS; // To mock static methods, we have to create a static instance of `MockVM`. lazy_static! { diff --git a/src/vm/tests/mock_tests/mock_test_is_in_mmtk_spaces.rs b/src/vm/tests/mock_tests/mock_test_is_in_mmtk_spaces.rs index 97a620f2ea..ad0032d165 100644 --- a/src/vm/tests/mock_tests/mock_test_is_in_mmtk_spaces.rs +++ b/src/vm/tests/mock_tests/mock_test_is_in_mmtk_spaces.rs @@ -38,7 +38,10 @@ pub fn max() { SINGLE_OBJECT.with_fixture(|_fixture| { assert!( !memory_manager::is_in_mmtk_spaces::( - ObjectReference::from_raw_address(Address::MAX).unwrap() + ObjectReference::from_raw_address( + Address::MAX.align_down(crate::util::constants::BYTES_IN_ADDRESS) + ) + .unwrap() ), "Address::MAX should not be in any MMTk spaces." ); @@ -113,7 +116,10 @@ pub fn negative_offsets() { // It's just a smoke test. It is hard to predict if the addr is still in any space, // but it must not crash. let _ = memory_manager::is_in_mmtk_spaces::( - ObjectReference::from_raw_address(addr).unwrap(), + ObjectReference::from_raw_address( + addr.align_down(crate::util::constants::BYTES_IN_ADDRESS), + ) + .unwrap(), ); } }); From 3ea862abd3199e907e1556346f845d7070cbbf09 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 2 Jul 2024 02:05:13 +0000 Subject: [PATCH 2/8] Update docs --- src/util/address.rs | 13 +++++++---- src/vm/object_model.rs | 52 +++++++++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/util/address.rs b/src/util/address.rs index e6b37882fd..13cae85c70 100644 --- a/src/util/address.rs +++ b/src/util/address.rs @@ -486,12 +486,18 @@ use crate::vm::VMBinding; /// `usize`. For the convenience of passing `Option` to and from native (C/C++) /// programs, mmtk-core provides [`crate::util::api_util::NullableObjectReference`]. /// +/// Note that [`ObjectReference`] has to be word aligned. +/// /// [NPO]: https://doc.rust-lang.org/std/option/index.html#representation #[repr(transparent)] #[derive(Copy, Clone, Eq, Hash, PartialOrd, Ord, PartialEq, NoUninit)] pub struct ObjectReference(NonZeroUsize); impl ObjectReference { + /// The required minimal alignment for object reference. If the object reference's raw address is not aligned to this value, + /// you will see an assertion failure in the debug build. + pub const ALIGNMENT: usize = crate::util::constants::BYTES_IN_ADDRESS; + /// Cast the object reference to its raw address. This method is mostly for the convinience of a binding. /// /// MMTk should not make any assumption on the actual location of the address with the object reference. @@ -510,7 +516,7 @@ impl ObjectReference { /// to turn addresses that are from [`crate::vm::ObjectModel::ref_to_address()`] back to object. pub fn from_raw_address(addr: Address) -> Option { debug_assert!( - addr.is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + addr.is_aligned_to(Self::ALIGNMENT), "ObjectReference is required to be word aligned" ); NonZeroUsize::new(addr.0).map(ObjectReference) @@ -527,7 +533,7 @@ impl ObjectReference { pub unsafe fn from_raw_address_unchecked(addr: Address) -> ObjectReference { debug_assert!(!addr.is_zero()); debug_assert!( - addr.is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + addr.is_aligned_to(Self::ALIGNMENT), "ObjectReference is required to be word aligned" ); ObjectReference(NonZeroUsize::new_unchecked(addr.0)) @@ -569,8 +575,7 @@ impl ObjectReference { let obj = VM::VMObjectModel::address_to_ref(addr); debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj); debug_assert!( - obj.to_raw_address() - .is_aligned_to(crate::util::constants::BYTES_IN_ADDRESS), + obj.to_raw_address().is_aligned_to(Self::ALIGNMENT), "ObjectReference is required to be word aligned" ); obj diff --git a/src/vm/object_model.rs b/src/vm/object_model.rs index 9449be2564..f63cb53d06 100644 --- a/src/vm/object_model.rs +++ b/src/vm/object_model.rs @@ -27,9 +27,9 @@ use crate::vm::VMBinding; /// /// Note that depending on the selected GC plan, only a subset of the methods provided here will be used. /// -/// Side Specs Layout +/// # Side Specs Layout /// -/// Short version +/// ## Short version /// /// * For *global* side metadata: /// * The first spec: VMGlobalXXXSpec::side_first() @@ -38,7 +38,7 @@ use crate::vm::VMBinding; /// * The first spec: VMLocalXXXSpec::side_first() /// * The following specs: VMLocalXXXSpec::side_after(FIRST_LOCAL.as_spec()) /// -/// Detailed explanation +/// ## Detailed explanation /// /// There are two types of side metadata layout in MMTk: /// @@ -57,6 +57,37 @@ use crate::vm::VMBinding; /// and for a third SideMetadataSpec (`LS3`), the `offset` will be `BASE(LS2) + required_metadata_space_per_chunk(LS2)`. /// /// For all other policies, the `offset` starts from zero. This is safe because no two policies ever manage one chunk, so there will be no overlap. +/// +/// # Object Layout Addresses +/// +/// MMTk tries to be general to cope with different language implementations and different object models. Thus it does not assume the internal of the object model. +/// Instead, MMTk only uses the following addresses for an object. If you find the MMTk's approach does not work for your language in practice, you are welcome to submit an issue +/// or engage with MMTk team on Zulip to disucss further. +/// +/// ### Object Reference +/// +/// See [`crate::util::address::ObjectReference`]. This is a special address that represents the object. +/// MMTk refers to an object by its object reference. An object reference cannot be NULL, and has to be +/// word aligned ([`crate::util::address::ObjectReference::ALIGNMENT`]). It is allowed that an object +/// reference is not in the allocated memory for the object. +/// +/// ### Object Start Address +/// +/// The address is returned by an allocation call [`crate::memory_manager::alloc`]. This is the start of the address range of the allocation. +/// [`ObjectModel::ref_to_object_start`] should return this address for a given object. +/// +/// ### In-object Address +/// +/// As the object reference address may be outside the allocated memory, and calculating the object start address may +/// be complex, MMTk requires a fixed and efficient in-object address for each object. The in-object address should be a constant +/// offset from the object reference address, and should be inside the allocated memory. MMTk requires the conversion +/// from the object reference to the in-object address ([`ObjectModel::ref_to_address`]) and from the in-object address +/// to the object reference ([`ObjectModel::address_to_ref`]). +/// +/// ### Object header address +/// +/// If a binding allows MMTk to use its header bits for object metadata, they need to supply an object header +/// address ([`ObjectModel::ref_to_header`]). MMTk will access header bits using this address. pub trait ObjectModel { // Per-object Metadata Spec definitions go here // @@ -439,7 +470,7 @@ pub trait ObjectModel { /// Return an address guaranteed to be inside the storage associated /// with an object. The returned address needs to be deterministic /// for an given object. For a given object, the returned address - /// should be a constant offset from the object reference address. + /// *must* be a constant offset from the object reference address. /// /// Note that MMTk may forge an arbitrary address /// directly into a potential object reference, and call this method on the 'object reference'. @@ -453,12 +484,17 @@ pub trait ObjectModel { fn ref_to_address(object: ObjectReference) -> Address; /// Return an object for a given address returned by `ref_to_address()`. - /// This does exactly the opposite of `ref_to_address()`. The argument `addr` has - /// to be an address that is previously returned from `ref_to_address()`. Invoking this method - /// with an unexpected address is undefined behavior. + /// This does exactly the opposite of `ref_to_address()`. The returned + /// object reference address *must* be a constant offset from the given address. + /// + /// Note that MMTk may forge an address and call this method with the address. + /// Thus the returned object reference may not always be valid. The binding + /// should simply apply a constant offset the given address, and return + /// it as an object reference, and should not assume the returned object reference + /// is always valid. MMTk is reponsible for using the returned object reference. /// /// Arguments: - /// * `addr`: An address that is returned from `ref_to_address()` + /// * `addr`: An in-object address. fn address_to_ref(addr: Address) -> ObjectReference; /// Dump debugging information for an object. From 6ce0d2ca79e14b64eee321a97c5ae67227f36228 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 10 Jul 2024 04:52:17 +0000 Subject: [PATCH 3/8] Introduce a constant IN_OBJECT_ADDRESS_OFFSET --- benches/mock_bench/sft.rs | 4 +- docs/dummyvm/src/api.rs | 5 +- docs/dummyvm/src/lib.rs | 7 +++ docs/dummyvm/src/object_model.rs | 26 ++++------- docs/userguide/src/migration/prefix.md | 16 +++++++ src/util/address.rs | 32 +++++++------ src/util/test_util/fixtures.rs | 8 ++-- src/util/test_util/mock_vm.rs | 23 ++++------ src/vm/object_model.rs | 46 +++++-------------- .../mock_tests/mock_test_vm_layout_default.rs | 2 +- 10 files changed, 78 insertions(+), 91 deletions(-) diff --git a/benches/mock_bench/sft.rs b/benches/mock_bench/sft.rs index 9788779011..d8a432d69f 100644 --- a/benches/mock_bench/sft.rs +++ b/benches/mock_bench/sft.rs @@ -4,14 +4,12 @@ use criterion::Criterion; use mmtk::memory_manager; use mmtk::util::test_util::fixtures::*; use mmtk::util::test_util::mock_vm::*; -use mmtk::vm::ObjectModel; -use mmtk::vm::VMBinding; use mmtk::AllocationSemantics; pub fn bench(c: &mut Criterion) { let mut fixture = MutatorFixture::create(); let addr = memory_manager::alloc(&mut fixture.mutator, 8, 8, 0, AllocationSemantics::Default); - let obj = ::VMObjectModel::address_to_ref(addr); + let obj = MockVM::object_start_to_ref(addr); c.bench_function("sft read", |b| { b.iter(|| memory_manager::is_in_mmtk_spaces::(black_box(obj))) diff --git a/docs/dummyvm/src/api.rs b/docs/dummyvm/src/api.rs index 598168d1be..27f7b1f87a 100644 --- a/docs/dummyvm/src/api.rs +++ b/docs/dummyvm/src/api.rs @@ -259,7 +259,6 @@ pub extern "C" fn mmtk_get_malloc_bytes() -> usize { #[cfg(test)] mod tests { use super::*; - use crate::mmtk::vm::ObjectModel; use std::ffi::CString; #[test] @@ -293,8 +292,8 @@ mod tests { let addr = mmtk_alloc(mutator, 16, 8, 0, mmtk::AllocationSemantics::Default); assert!(!addr.is_zero()); - // Turn the allocation address into the object reference - let obj = crate::object_model::VMObjectModel::address_to_ref(addr); + // Turn the allocation address into the object reference. + let obj = DummyVM::object_start_to_ref(addr); // Post allocation mmtk_post_alloc(mutator, obj, 16, mmtk::AllocationSemantics::Default); diff --git a/docs/dummyvm/src/lib.rs b/docs/dummyvm/src/lib.rs index 19a7436b49..7c5341c76d 100644 --- a/docs/dummyvm/src/lib.rs +++ b/docs/dummyvm/src/lib.rs @@ -32,6 +32,13 @@ impl VMBinding for DummyVM { const MAX_ALIGNMENT: usize = 1 << 6; } +impl DummyVM { + pub fn object_start_to_ref(start: Address) -> ObjectReference { + // Safety: start is the allocation result, and it should not be zero with an offset. + unsafe { ObjectReference::from_raw_address_unchecked(start + crate::object_model::OBJECT_REF_OFFSET) } + } +} + pub static SINGLETON: OnceLock>> = OnceLock::new(); fn mmtk() -> &'static MMTK { diff --git a/docs/dummyvm/src/object_model.rs b/docs/dummyvm/src/object_model.rs index dcc5d1ad86..d1747dbf19 100644 --- a/docs/dummyvm/src/object_model.rs +++ b/docs/dummyvm/src/object_model.rs @@ -5,13 +5,16 @@ use mmtk::vm::*; pub struct VMObjectModel {} -// This is the offset from the allocation result to the object reference for the object. -// The binding can set this to a different value if the ObjectReference in the VM has an offset from the allocation starting address. -// Many methods like `address_to_ref` and `ref_to_address` use this constant. -// For bindings that this offset is not a constant, you can implement the calculation in the methods, and -// remove this constant. +/// This is the offset from the allocation result to the object reference for the object. +/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start``, and +/// remove this constant. pub const OBJECT_REF_OFFSET: usize = 0; +/// This is the offset from the object reference to an in-object address. The binding needs +/// to guarantee the in-object address is inside the storage associated with the object. +/// It has to be a constant offset. See `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`. +pub const IN_OBJECT_ADDRESS_OFFSET: isize = 0; + // This is the offset from the object reference to the object header. // This value is used in `ref_to_header` where MMTk loads header metadata from. pub const OBJECT_HEADER_OFFSET: usize = 0; @@ -83,18 +86,7 @@ impl ObjectModel for VMObjectModel { object.to_raw_address().sub(OBJECT_HEADER_OFFSET) } - fn ref_to_address(object: ObjectReference) -> Address { - // This method should return an address that is within the allocation. - // Using `ref_to_object_start` is always correct here. - // However, a binding may optimize this method to make it more efficient. - Self::ref_to_object_start(object) - } - - fn address_to_ref(addr: Address) -> ObjectReference { - // This is the reverse operation of `ref_to_address`. - // If the implementation of `ref_to_address` is changed, this implementation needs to be changed accordingly. - unsafe { ObjectReference::from_raw_address_unchecked(addr.add(OBJECT_REF_OFFSET)) } - } + const IN_OBJECT_ADDRESS_OFFSET: isize = IN_OBJECT_ADDRESS_OFFSET; fn dump_object(_object: ObjectReference) { unimplemented!() diff --git a/docs/userguide/src/migration/prefix.md b/docs/userguide/src/migration/prefix.md index c32108bba8..818193aa9b 100644 --- a/docs/userguide/src/migration/prefix.md +++ b/docs/userguide/src/migration/prefix.md @@ -30,6 +30,22 @@ Notes for the mmtk-core developers: +## 0.27.0 + +### Introduce `ObjectModel::IN_OBJECT_ADDRESS_OFFSET` + +```admonish tldr +We used to have `ObjectModel::ref_to_address` and `ObjectModel::address_to_ref`, and require +the object reference and the in-object address to have a constant offset. Now, the two methods +are removed, and replaced with a constant `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`. +``` + +API changes: +* trait `ObjectModel` + - The methods `ref_to_address` and `address_to_ref` are removed. + - Users are required to specify `IN_OBJECT_ADDRESS_OFFSET` instead, which is the offset from the object + reference to the in-object address (which was returned in the old `ref_to_address()`). + ## 0.26.0 ### Rename "edge" to "slot" diff --git a/src/util/address.rs b/src/util/address.rs index 13cae85c70..d30edf175e 100644 --- a/src/util/address.rs +++ b/src/util/address.rs @@ -212,6 +212,11 @@ impl Address { Address(self.0 - size) } + /// Apply an signed offset to the address. + pub const fn offset(self, offset: isize) -> Address { + Address((self.0 as isize).wrapping_add(offset) as usize) + } + /// Bitwise 'and' with a mask. pub const fn and(self, mask: usize) -> usize { self.0 & mask @@ -502,7 +507,7 @@ impl ObjectReference { /// /// MMTk should not make any assumption on the actual location of the address with the object reference. /// MMTk should not assume the address returned by this method is in our allocation. For the purposes of - /// setting object metadata, MMTk should use [`crate::vm::ObjectModel::ref_to_address()`] or [`crate::vm::ObjectModel::ref_to_header()`]. + /// setting object metadata, MMTk should use [`crate::util::ObjectReference::to_address`] or [`crate::util::ObjectReference::to_header`]. pub fn to_raw_address(self) -> Address { Address(self.0.get()) } @@ -512,8 +517,8 @@ impl ObjectReference { /// /// If `addr` is 0, the result is `None`. /// - /// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::vm::ObjectModel::address_to_ref()`] - /// to turn addresses that are from [`crate::vm::ObjectModel::ref_to_address()`] back to object. + /// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::util::ObjectReference::from_address`] + /// to turn addresses that are from [`crate::util::ObjectReference::to_address`] back to object. pub fn from_raw_address(addr: Address) -> Option { debug_assert!( addr.is_aligned_to(Self::ALIGNMENT), @@ -540,12 +545,11 @@ impl ObjectReference { } /// Get the in-heap address from an object reference. This method is used by MMTk to get an in-heap address - /// for an object reference. This method is syntactic sugar for [`crate::vm::ObjectModel::ref_to_address`]. See the - /// comments on [`crate::vm::ObjectModel::ref_to_address`]. + /// for an object reference. pub fn to_address(self) -> Address { use crate::vm::ObjectModel; - let to_address = VM::VMObjectModel::ref_to_address(self); - debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, to_address); + let to_address = Address(self.0.get()).offset(VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET); + debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, in-object addr is {}", self, to_address); to_address } @@ -563,17 +567,19 @@ impl ObjectReference { pub fn to_object_start(self) -> Address { use crate::vm::ObjectModel; let object_start = VM::VMObjectModel::ref_to_object_start(self); - debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, object_start); + debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_object_start() returns {}", self, object_start); object_start } - /// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`] - /// or [`crate::vm::ObjectModel::ref_to_address`]. This method is syntactic sugar for [`crate::vm::ObjectModel::address_to_ref`]. - /// See the comments on [`crate::vm::ObjectModel::address_to_ref`]. + /// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`]. pub fn from_address(addr: Address) -> ObjectReference { use crate::vm::ObjectModel; - let obj = VM::VMObjectModel::address_to_ref(addr); - debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj); + let obj = unsafe { + ObjectReference::from_raw_address_unchecked( + addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET), + ) + }; + debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, the object reference is {}", addr, obj); debug_assert!( obj.to_raw_address().is_aligned_to(Self::ALIGNMENT), "ObjectReference is required to be word aligned" diff --git a/src/util/test_util/fixtures.rs b/src/util/test_util/fixtures.rs index d40ea302aa..f5aed33660 100644 --- a/src/util/test_util/fixtures.rs +++ b/src/util/test_util/fixtures.rs @@ -222,7 +222,6 @@ pub struct SingleObject { impl FixtureContent for SingleObject { fn create() -> Self { - use crate::vm::object_model::ObjectModel; let mut mutator = MutatorFixture::create(); // A relatively small object, typical for Ruby. @@ -232,7 +231,7 @@ impl FixtureContent for SingleObject { let addr = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics); assert!(!addr.is_zero()); - let objref = MockVM::address_to_ref(addr); + let objref = MockVM::object_start_to_ref(addr); memory_manager::post_alloc(&mut mutator.mutator, objref, size, semantics); SingleObject { objref, mutator } @@ -257,7 +256,6 @@ pub struct TwoObjects { impl FixtureContent for TwoObjects { fn create() -> Self { - use crate::vm::object_model::ObjectModel; let mut mutator = MutatorFixture::create(); let size = 128; @@ -266,13 +264,13 @@ impl FixtureContent for TwoObjects { let addr1 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics); assert!(!addr1.is_zero()); - let objref1 = MockVM::address_to_ref(addr1); + let objref1 = MockVM::object_start_to_ref(addr1); memory_manager::post_alloc(&mut mutator.mutator, objref1, size, semantics); let addr2 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics); assert!(!addr2.is_zero()); - let objref2 = MockVM::address_to_ref(addr2); + let objref2 = MockVM::object_start_to_ref(addr2); memory_manager::post_alloc(&mut mutator.mutator, objref2, size, semantics); TwoObjects { diff --git a/src/util/test_util/mock_vm.rs b/src/util/test_util/mock_vm.rs index 4e3b66a4e3..4f8d49b1be 100644 --- a/src/util/test_util/mock_vm.rs +++ b/src/util/test_util/mock_vm.rs @@ -231,8 +231,6 @@ pub struct MockVM { MockMethod<(ObjectReference, Address), ObjectReference>, pub ref_to_object_start: MockMethod, pub ref_to_header: MockMethod, - pub ref_to_address: MockMethod, - pub address_to_ref: MockMethod, pub dump_object: MockMethod, // reference glue pub weakref_clear_referent: MockMethod, @@ -304,12 +302,6 @@ impl Default for MockVM { object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET) })), ref_to_header: MockMethod::new_fixed(Box::new(|object| object.to_raw_address())), - ref_to_address: MockMethod::new_fixed(Box::new(|object| { - object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET) - })), - address_to_ref: MockMethod::new_fixed(Box::new(|addr| { - ObjectReference::from_raw_address(addr.add(DEFAULT_OBJECT_REF_OFFSET)).unwrap() - })), dump_object: MockMethod::new_unimplemented(), weakref_clear_referent: MockMethod::new_unimplemented(), @@ -531,13 +523,8 @@ impl crate::vm::ObjectModel for MockVM { mock!(ref_to_header(object)) } - fn ref_to_address(object: ObjectReference) -> Address { - mock!(ref_to_address(object)) - } - - fn address_to_ref(addr: Address) -> ObjectReference { - mock!(address_to_ref(addr)) - } + // TODO: This is not mocked. We need a way to deal with it. + const IN_OBJECT_ADDRESS_OFFSET: isize = -(DEFAULT_OBJECT_REF_OFFSET as isize); fn dump_object(object: ObjectReference) { mock!(dump_object(object)) @@ -629,3 +616,9 @@ impl crate::vm::Scanning for MockVM { mock_any!(forward_weak_refs(worker, tracer_context)) } } + +impl MockVM { + pub fn object_start_to_ref(start: Address) -> ObjectReference { + ObjectReference::from_raw_address(start + DEFAULT_OBJECT_REF_OFFSET).unwrap() + } +} diff --git a/src/vm/object_model.rs b/src/vm/object_model.rs index f63cb53d06..92a87eb5e1 100644 --- a/src/vm/object_model.rs +++ b/src/vm/object_model.rs @@ -80,9 +80,8 @@ use crate::vm::VMBinding; /// /// As the object reference address may be outside the allocated memory, and calculating the object start address may /// be complex, MMTk requires a fixed and efficient in-object address for each object. The in-object address should be a constant -/// offset from the object reference address, and should be inside the allocated memory. MMTk requires the conversion -/// from the object reference to the in-object address ([`ObjectModel::ref_to_address`]) and from the in-object address -/// to the object reference ([`ObjectModel::address_to_ref`]). +/// offset from the object reference address, and should be inside the allocated memory. MMTk requires the binding to +/// specify the offset from the object reference to the in-object address by [`ObjectModel::IN_OBJECT_ADDRESS_OFFSET`]. /// /// ### Object header address /// @@ -433,8 +432,12 @@ pub trait ObjectModel { /// mature space for generational plans. const VM_WORST_CASE_COPY_EXPANSION: f64 = 1.5; - /// If this is true, the binding guarantees that an object reference's raw address is always equal to the return value of the `ref_to_address` method - /// and the return value of the `ref_to_object_start` method. This is a very strong guarantee, but it is also helpful for MMTk to + /// If this is true, the binding guarantees that the object reference's raw address, + /// the in-object address, and the object start is always the same address. To be precise, + /// 1. an object reference's raw address is always equal to the return value of the `ref_to_object_start` method, + /// 2. `IN_OBJECT_ADDRESS_OFFSET` is 0. + /// + /// This is a very strong guarantee, but it is also helpful for MMTk to /// make some assumptions and optimize for this case. /// If a binding sets this to true, and the related methods return inconsistent results, this is an undefined behavior. MMTk may panic /// if any assertion catches this error, but may also fail silently. @@ -467,35 +470,10 @@ pub trait ObjectModel { /// * `object`: The object to be queried. fn ref_to_header(object: ObjectReference) -> Address; - /// Return an address guaranteed to be inside the storage associated - /// with an object. The returned address needs to be deterministic - /// for an given object. For a given object, the returned address - /// *must* be a constant offset from the object reference address. - /// - /// Note that MMTk may forge an arbitrary address - /// directly into a potential object reference, and call this method on the 'object reference'. - /// In that case, the argument `object` may not be a valid object reference, - /// and the implementation of this method should not use any object metadata. - /// - /// MMTk uses this method more frequently than [`crate::vm::ObjectModel::ref_to_object_start`]. - /// - /// Arguments: - /// * `object`: The object to be queried. - fn ref_to_address(object: ObjectReference) -> Address; - - /// Return an object for a given address returned by `ref_to_address()`. - /// This does exactly the opposite of `ref_to_address()`. The returned - /// object reference address *must* be a constant offset from the given address. - /// - /// Note that MMTk may forge an address and call this method with the address. - /// Thus the returned object reference may not always be valid. The binding - /// should simply apply a constant offset the given address, and return - /// it as an object reference, and should not assume the returned object reference - /// is always valid. MMTk is reponsible for using the returned object reference. - /// - /// Arguments: - /// * `addr`: An in-object address. - fn address_to_ref(addr: Address) -> ObjectReference; + /// The offset from the object reference to an in-object address. + /// The binding needs to guarantee that obj_ref.to_raw_address() + IN_OBJECT_ADDRESS_OFFSET + /// is inside the storage associated with the object. + const IN_OBJECT_ADDRESS_OFFSET: isize; /// Dump debugging information for an object. /// diff --git a/src/vm/tests/mock_tests/mock_test_vm_layout_default.rs b/src/vm/tests/mock_tests/mock_test_vm_layout_default.rs index f4f4ac5696..0a80ab5fda 100644 --- a/src/vm/tests/mock_tests/mock_test_vm_layout_default.rs +++ b/src/vm/tests/mock_tests/mock_test_vm_layout_default.rs @@ -22,7 +22,7 @@ pub fn test_with_vm_layout(layout: Option) { // Test allocation let addr = memory_manager::alloc(&mut fixture.mutator, 8, 8, 0, AllocationSemantics::Default); - let obj = ::VMObjectModel::address_to_ref(addr); + let obj = MockVM::object_start_to_ref(addr); // Test SFT assert!(memory_manager::is_in_mmtk_spaces::(obj)); // Test mmapper From 80253fdda7c21366abd3f63b180dc2f614c93530 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 10 Jul 2024 05:19:26 +0000 Subject: [PATCH 4/8] Fix missing imports in DummyVM --- docs/dummyvm/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/dummyvm/src/lib.rs b/docs/dummyvm/src/lib.rs index 7c5341c76d..50cf4cd08c 100644 --- a/docs/dummyvm/src/lib.rs +++ b/docs/dummyvm/src/lib.rs @@ -32,6 +32,8 @@ impl VMBinding for DummyVM { const MAX_ALIGNMENT: usize = 1 << 6; } +use mmtk::util::{Address, ObjectReference}; + impl DummyVM { pub fn object_start_to_ref(start: Address) -> ObjectReference { // Safety: start is the allocation result, and it should not be zero with an offset. From c284fe5349144803b4d18f1c1749e74118c5cb3d Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 10 Jul 2024 05:33:39 +0000 Subject: [PATCH 5/8] Fix a few issues in the comments --- src/util/address.rs | 2 +- src/vm/object_model.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/util/address.rs b/src/util/address.rs index d30edf175e..8e6fbd4b58 100644 --- a/src/util/address.rs +++ b/src/util/address.rs @@ -500,7 +500,7 @@ pub struct ObjectReference(NonZeroUsize); impl ObjectReference { /// The required minimal alignment for object reference. If the object reference's raw address is not aligned to this value, - /// you will see an assertion failure in the debug build. + /// you will see an assertion failure in the debug build when constructing an object reference instance. pub const ALIGNMENT: usize = crate::util::constants::BYTES_IN_ADDRESS; /// Cast the object reference to its raw address. This method is mostly for the convinience of a binding. diff --git a/src/vm/object_model.rs b/src/vm/object_model.rs index 92a87eb5e1..ff65d02cb9 100644 --- a/src/vm/object_model.rs +++ b/src/vm/object_model.rs @@ -64,28 +64,28 @@ use crate::vm::VMBinding; /// Instead, MMTk only uses the following addresses for an object. If you find the MMTk's approach does not work for your language in practice, you are welcome to submit an issue /// or engage with MMTk team on Zulip to disucss further. /// -/// ### Object Reference +/// ## Object Reference /// /// See [`crate::util::address::ObjectReference`]. This is a special address that represents the object. /// MMTk refers to an object by its object reference. An object reference cannot be NULL, and has to be /// word aligned ([`crate::util::address::ObjectReference::ALIGNMENT`]). It is allowed that an object /// reference is not in the allocated memory for the object. /// -/// ### Object Start Address +/// ## Object Start Address /// -/// The address is returned by an allocation call [`crate::memory_manager::alloc`]. This is the start of the address range of the allocation. +/// This address is returned by an allocation call [`crate::memory_manager::alloc`]. This is the start of the address range of the allocation. /// [`ObjectModel::ref_to_object_start`] should return this address for a given object. /// -/// ### In-object Address +/// ## In-object Address /// /// As the object reference address may be outside the allocated memory, and calculating the object start address may -/// be complex, MMTk requires a fixed and efficient in-object address for each object. The in-object address should be a constant -/// offset from the object reference address, and should be inside the allocated memory. MMTk requires the binding to +/// be complex, MMTk requires a fixed and efficient in-object address for each object. The in-object address must be a constant +/// offset from the object reference address, and must be inside the allocated memory. MMTk requires the binding to /// specify the offset from the object reference to the in-object address by [`ObjectModel::IN_OBJECT_ADDRESS_OFFSET`]. /// -/// ### Object header address +/// ## Object header address /// -/// If a binding allows MMTk to use its header bits for object metadata, they need to supply an object header +/// If a binding allows MMTk to use its header bits for object metadata, it needs to supply an object header /// address ([`ObjectModel::ref_to_header`]). MMTk will access header bits using this address. pub trait ObjectModel { // Per-object Metadata Spec definitions go here From 3aee38712d2db9c76ce8709aaae7bc46ac112025 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 10 Jul 2024 06:20:50 +0000 Subject: [PATCH 6/8] Apply cargo fmt to dummyvm --- docs/dummyvm/src/lib.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/dummyvm/src/lib.rs b/docs/dummyvm/src/lib.rs index 50cf4cd08c..17c5abe9a3 100644 --- a/docs/dummyvm/src/lib.rs +++ b/docs/dummyvm/src/lib.rs @@ -37,7 +37,11 @@ use mmtk::util::{Address, ObjectReference}; impl DummyVM { pub fn object_start_to_ref(start: Address) -> ObjectReference { // Safety: start is the allocation result, and it should not be zero with an offset. - unsafe { ObjectReference::from_raw_address_unchecked(start + crate::object_model::OBJECT_REF_OFFSET) } + unsafe { + ObjectReference::from_raw_address_unchecked( + start + crate::object_model::OBJECT_REF_OFFSET, + ) + } } } From 29dfce43f3eb1828e8efb967cc7bdf7566df4426 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 10 Jul 2024 06:37:32 +0000 Subject: [PATCH 7/8] Minor changes to ObjectReference based on the review --- src/util/address.rs | 2 +- src/vm/object_model.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/address.rs b/src/util/address.rs index 8e6fbd4b58..d5e720c0ed 100644 --- a/src/util/address.rs +++ b/src/util/address.rs @@ -214,7 +214,7 @@ impl Address { /// Apply an signed offset to the address. pub const fn offset(self, offset: isize) -> Address { - Address((self.0 as isize).wrapping_add(offset) as usize) + Address(self.0.wrapping_add_signed(offset)) } /// Bitwise 'and' with a mask. diff --git a/src/vm/object_model.rs b/src/vm/object_model.rs index ff65d02cb9..c7cc816e1e 100644 --- a/src/vm/object_model.rs +++ b/src/vm/object_model.rs @@ -433,7 +433,7 @@ pub trait ObjectModel { const VM_WORST_CASE_COPY_EXPANSION: f64 = 1.5; /// If this is true, the binding guarantees that the object reference's raw address, - /// the in-object address, and the object start is always the same address. To be precise, + /// the in-object address, and the object start are always the same address. To be precise, /// 1. an object reference's raw address is always equal to the return value of the `ref_to_object_start` method, /// 2. `IN_OBJECT_ADDRESS_OFFSET` is 0. /// From 56591c11f44aae213d89707e23c2824bf93e8370 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 11 Jul 2024 03:26:44 +0000 Subject: [PATCH 8/8] Minor updates to the docs on is_mmtk_object --- docs/userguide/src/migration/prefix.md | 5 ++++- src/memory_manager.rs | 10 ++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/docs/userguide/src/migration/prefix.md b/docs/userguide/src/migration/prefix.md index 818193aa9b..05fa6f91d9 100644 --- a/docs/userguide/src/migration/prefix.md +++ b/docs/userguide/src/migration/prefix.md @@ -44,7 +44,10 @@ API changes: * trait `ObjectModel` - The methods `ref_to_address` and `address_to_ref` are removed. - Users are required to specify `IN_OBJECT_ADDRESS_OFFSET` instead, which is the offset from the object - reference to the in-object address (which was returned in the old `ref_to_address()`). + reference to the in-object address (the in-object address was the return value for the old `ref_to_address()`). +* type `ObjectReference` + - Add a constant `ALIGNMENT` which equals to the word size. All object references should be at least aligned + to the word size. This is checked in debug builds when an `ObjectReference` is constructed. ## 0.26.0 diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 6f41a3951b..7b3a495fa3 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -597,7 +597,6 @@ pub fn is_live_object(object: ObjectReference) -> bool { /// It is the byte granularity of the valid object (VO) bit. /// 3. Return false otherwise. This function never panics. /// -/// Case 2 means **this function is imprecise for misaligned addresses**. /// This function uses the "valid object (VO) bits" side metadata, i.e. a bitmap. /// For space efficiency, each bit of the bitmap governs a small region of memory. /// The size of a region is currently defined as the [minimum object size](crate::util::constants::MIN_OBJECT_SIZE), @@ -606,13 +605,8 @@ pub fn is_live_object(object: ObjectReference) -> bool { /// The alignment of a region is also the region size. /// If a VO bit is `1`, the bitmap cannot tell which address within the 4-byte or 8-byte region /// is the valid object reference. -/// Therefore, if the input `addr` is not properly aligned, but is close to a valid object -/// reference, this function may still return true. -/// -/// For the reason above, the VM **must check if `addr` is properly aligned** before calling this -/// function. For most VMs, valid object references are always aligned to the word size, so -/// checking `addr.is_aligned_to(BYTES_IN_WORD)` should usually work. If you are paranoid, you can -/// always check against [`crate::util::is_mmtk_object::VO_BIT_REGION_SIZE`]. +/// Therefore, if this method returns true, the binding can compute the object reference by +/// aligning the address to [`crate::util::ObjectReference::ALIGNMENT`]. /// /// This function is useful for conservative root scanning. The VM can iterate through all words in /// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses