Skip to content

Commit a3a72f8

Browse files
authored
Require object reference to be aligned (#1159)
This PR adds some assumptions about object reference, and the conversion between object reference and in-object address. To be specific, this PR requires the following: * Object reference needs to be word aligned * There must be an constant offset between object reference and in-object address. With these requirements, we can do the following computation to find object reference for internal pointers: 1. Use `ref_to_address` with the internal pointer to get the in-object address for the internal pointer. 1. Find the last VO bit that is set. 2. Calculate the data address for the bit. This address is aligned to the VO bit region. 3. Use `address_to_ref` with the data address above. The return value is an object reference, but may not be a valid one. 4. Use the word alignment to find the actual object reference 5. Get the object size from the object reference, and make sure that the internal pointer is actually pointing into the object Step 1 is necessary if the internal pointer is before the in-object address. If we concern about the invalid object reference in `ref_to_address` and `address_to_ref`, we could consider removing the two functions, and use a constant offset instead.
1 parent 6cae51c commit a3a72f8

File tree

12 files changed

+147
-97
lines changed

12 files changed

+147
-97
lines changed

benches/mock_bench/sft.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,12 @@ use criterion::Criterion;
44
use mmtk::memory_manager;
55
use mmtk::util::test_util::fixtures::*;
66
use mmtk::util::test_util::mock_vm::*;
7-
use mmtk::vm::ObjectModel;
8-
use mmtk::vm::VMBinding;
97
use mmtk::AllocationSemantics;
108

119
pub fn bench(c: &mut Criterion) {
1210
let mut fixture = MutatorFixture::create();
1311
let addr = memory_manager::alloc(&mut fixture.mutator, 8, 8, 0, AllocationSemantics::Default);
14-
let obj = <MockVM as VMBinding>::VMObjectModel::address_to_ref(addr);
12+
let obj = MockVM::object_start_to_ref(addr);
1513

1614
c.bench_function("sft read", |b| {
1715
b.iter(|| memory_manager::is_in_mmtk_spaces::<MockVM>(black_box(obj)))

docs/dummyvm/src/api.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,6 @@ pub extern "C" fn mmtk_get_malloc_bytes() -> usize {
259259
#[cfg(test)]
260260
mod tests {
261261
use super::*;
262-
use crate::mmtk::vm::ObjectModel;
263262
use std::ffi::CString;
264263

265264
#[test]
@@ -293,8 +292,8 @@ mod tests {
293292
let addr = mmtk_alloc(mutator, 16, 8, 0, mmtk::AllocationSemantics::Default);
294293
assert!(!addr.is_zero());
295294

296-
// Turn the allocation address into the object reference
297-
let obj = crate::object_model::VMObjectModel::address_to_ref(addr);
295+
// Turn the allocation address into the object reference.
296+
let obj = DummyVM::object_start_to_ref(addr);
298297

299298
// Post allocation
300299
mmtk_post_alloc(mutator, obj, 16, mmtk::AllocationSemantics::Default);

docs/dummyvm/src/lib.rs

+13
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,19 @@ impl VMBinding for DummyVM {
3232
const MAX_ALIGNMENT: usize = 1 << 6;
3333
}
3434

35+
use mmtk::util::{Address, ObjectReference};
36+
37+
impl DummyVM {
38+
pub fn object_start_to_ref(start: Address) -> ObjectReference {
39+
// Safety: start is the allocation result, and it should not be zero with an offset.
40+
unsafe {
41+
ObjectReference::from_raw_address_unchecked(
42+
start + crate::object_model::OBJECT_REF_OFFSET,
43+
)
44+
}
45+
}
46+
}
47+
3548
pub static SINGLETON: OnceLock<Box<MMTK<DummyVM>>> = OnceLock::new();
3649

3750
fn mmtk() -> &'static MMTK<DummyVM> {

docs/dummyvm/src/object_model.rs

+9-17
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@ use mmtk::vm::*;
55

66
pub struct VMObjectModel {}
77

8-
// This is the offset from the allocation result to the object reference for the object.
9-
// The binding can set this to a different value if the ObjectReference in the VM has an offset from the allocation starting address.
10-
// Many methods like `address_to_ref` and `ref_to_address` use this constant.
11-
// For bindings that this offset is not a constant, you can implement the calculation in the methods, and
12-
// remove this constant.
8+
/// This is the offset from the allocation result to the object reference for the object.
9+
/// For bindings that this offset is not a constant, you can implement the calculation in the method `ref_to_object_start``, and
10+
/// remove this constant.
1311
pub const OBJECT_REF_OFFSET: usize = 0;
1412

13+
/// This is the offset from the object reference to an in-object address. The binding needs
14+
/// to guarantee the in-object address is inside the storage associated with the object.
15+
/// It has to be a constant offset. See `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
16+
pub const IN_OBJECT_ADDRESS_OFFSET: isize = 0;
17+
1518
// This is the offset from the object reference to the object header.
1619
// This value is used in `ref_to_header` where MMTk loads header metadata from.
1720
pub const OBJECT_HEADER_OFFSET: usize = 0;
@@ -83,18 +86,7 @@ impl ObjectModel<DummyVM> for VMObjectModel {
8386
object.to_raw_address().sub(OBJECT_HEADER_OFFSET)
8487
}
8588

86-
fn ref_to_address(object: ObjectReference) -> Address {
87-
// This method should return an address that is within the allocation.
88-
// Using `ref_to_object_start` is always correct here.
89-
// However, a binding may optimize this method to make it more efficient.
90-
Self::ref_to_object_start(object)
91-
}
92-
93-
fn address_to_ref(addr: Address) -> ObjectReference {
94-
// This is the reverse operation of `ref_to_address`.
95-
// If the implementation of `ref_to_address` is changed, this implementation needs to be changed accordingly.
96-
unsafe { ObjectReference::from_raw_address_unchecked(addr.add(OBJECT_REF_OFFSET)) }
97-
}
89+
const IN_OBJECT_ADDRESS_OFFSET: isize = IN_OBJECT_ADDRESS_OFFSET;
9890

9991
fn dump_object(_object: ObjectReference) {
10092
unimplemented!()

docs/userguide/src/migration/prefix.md

+19
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,25 @@ Notes for the mmtk-core developers:
3030

3131
<!-- Insert new versions here -->
3232

33+
## 0.27.0
34+
35+
### Introduce `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`
36+
37+
```admonish tldr
38+
We used to have `ObjectModel::ref_to_address` and `ObjectModel::address_to_ref`, and require
39+
the object reference and the in-object address to have a constant offset. Now, the two methods
40+
are removed, and replaced with a constant `ObjectModel::IN_OBJECT_ADDRESS_OFFSET`.
41+
```
42+
43+
API changes:
44+
* trait `ObjectModel`
45+
- The methods `ref_to_address` and `address_to_ref` are removed.
46+
- Users are required to specify `IN_OBJECT_ADDRESS_OFFSET` instead, which is the offset from the object
47+
reference to the in-object address (the in-object address was the return value for the old `ref_to_address()`).
48+
* type `ObjectReference`
49+
- Add a constant `ALIGNMENT` which equals to the word size. All object references should be at least aligned
50+
to the word size. This is checked in debug builds when an `ObjectReference` is constructed.
51+
3352
## 0.26.0
3453

3554
### Rename "edge" to "slot"

src/memory_manager.rs

+2-8
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,6 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
597597
/// It is the byte granularity of the valid object (VO) bit.
598598
/// 3. Return false otherwise. This function never panics.
599599
///
600-
/// Case 2 means **this function is imprecise for misaligned addresses**.
601600
/// This function uses the "valid object (VO) bits" side metadata, i.e. a bitmap.
602601
/// For space efficiency, each bit of the bitmap governs a small region of memory.
603602
/// The size of a region is currently defined as the [minimum object size](crate::util::constants::MIN_OBJECT_SIZE),
@@ -606,13 +605,8 @@ pub fn is_live_object<VM: VMBinding>(object: ObjectReference) -> bool {
606605
/// The alignment of a region is also the region size.
607606
/// If a VO bit is `1`, the bitmap cannot tell which address within the 4-byte or 8-byte region
608607
/// is the valid object reference.
609-
/// Therefore, if the input `addr` is not properly aligned, but is close to a valid object
610-
/// reference, this function may still return true.
611-
///
612-
/// For the reason above, the VM **must check if `addr` is properly aligned** before calling this
613-
/// function. For most VMs, valid object references are always aligned to the word size, so
614-
/// checking `addr.is_aligned_to(BYTES_IN_WORD)` should usually work. If you are paranoid, you can
615-
/// always check against [`crate::util::is_mmtk_object::VO_BIT_REGION_SIZE`].
608+
/// Therefore, if this method returns true, the binding can compute the object reference by
609+
/// aligning the address to [`crate::util::ObjectReference::ALIGNMENT`].
616610
///
617611
/// This function is useful for conservative root scanning. The VM can iterate through all words in
618612
/// a stack, filter out zeros, misaligned words, obviously out-of-range words (such as addresses

src/util/address.rs

+37-13
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ impl Address {
212212
Address(self.0 - size)
213213
}
214214

215+
/// Apply an signed offset to the address.
216+
pub const fn offset(self, offset: isize) -> Address {
217+
Address(self.0.wrapping_add_signed(offset))
218+
}
219+
215220
/// Bitwise 'and' with a mask.
216221
pub const fn and(self, mask: usize) -> usize {
217222
self.0 & mask
@@ -486,17 +491,23 @@ use crate::vm::VMBinding;
486491
/// `usize`. For the convenience of passing `Option<ObjectReference>` to and from native (C/C++)
487492
/// programs, mmtk-core provides [`crate::util::api_util::NullableObjectReference`].
488493
///
494+
/// Note that [`ObjectReference`] has to be word aligned.
495+
///
489496
/// [NPO]: https://doc.rust-lang.org/std/option/index.html#representation
490497
#[repr(transparent)]
491498
#[derive(Copy, Clone, Eq, Hash, PartialOrd, Ord, PartialEq, NoUninit)]
492499
pub struct ObjectReference(NonZeroUsize);
493500

494501
impl ObjectReference {
502+
/// The required minimal alignment for object reference. If the object reference's raw address is not aligned to this value,
503+
/// you will see an assertion failure in the debug build when constructing an object reference instance.
504+
pub const ALIGNMENT: usize = crate::util::constants::BYTES_IN_ADDRESS;
505+
495506
/// Cast the object reference to its raw address. This method is mostly for the convinience of a binding.
496507
///
497508
/// MMTk should not make any assumption on the actual location of the address with the object reference.
498509
/// MMTk should not assume the address returned by this method is in our allocation. For the purposes of
499-
/// setting object metadata, MMTk should use [`crate::vm::ObjectModel::ref_to_address()`] or [`crate::vm::ObjectModel::ref_to_header()`].
510+
/// setting object metadata, MMTk should use [`crate::util::ObjectReference::to_address`] or [`crate::util::ObjectReference::to_header`].
500511
pub fn to_raw_address(self) -> Address {
501512
Address(self.0.get())
502513
}
@@ -506,9 +517,13 @@ impl ObjectReference {
506517
///
507518
/// If `addr` is 0, the result is `None`.
508519
///
509-
/// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::vm::ObjectModel::address_to_ref()`]
510-
/// to turn addresses that are from [`crate::vm::ObjectModel::ref_to_address()`] back to object.
520+
/// MMTk should not assume an arbitrary address can be turned into an object reference. MMTk can use [`crate::util::ObjectReference::from_address`]
521+
/// to turn addresses that are from [`crate::util::ObjectReference::to_address`] back to object.
511522
pub fn from_raw_address(addr: Address) -> Option<ObjectReference> {
523+
debug_assert!(
524+
addr.is_aligned_to(Self::ALIGNMENT),
525+
"ObjectReference is required to be word aligned"
526+
);
512527
NonZeroUsize::new(addr.0).map(ObjectReference)
513528
}
514529

@@ -522,16 +537,19 @@ impl ObjectReference {
522537
/// adding a positive offset to a non-zero address, we know the result must not be zero.
523538
pub unsafe fn from_raw_address_unchecked(addr: Address) -> ObjectReference {
524539
debug_assert!(!addr.is_zero());
540+
debug_assert!(
541+
addr.is_aligned_to(Self::ALIGNMENT),
542+
"ObjectReference is required to be word aligned"
543+
);
525544
ObjectReference(NonZeroUsize::new_unchecked(addr.0))
526545
}
527546

528547
/// Get the in-heap address from an object reference. This method is used by MMTk to get an in-heap address
529-
/// for an object reference. This method is syntactic sugar for [`crate::vm::ObjectModel::ref_to_address`]. See the
530-
/// comments on [`crate::vm::ObjectModel::ref_to_address`].
548+
/// for an object reference.
531549
pub fn to_address<VM: VMBinding>(self) -> Address {
532550
use crate::vm::ObjectModel;
533-
let to_address = VM::VMObjectModel::ref_to_address(self);
534-
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, to_address);
551+
let to_address = Address(self.0.get()).offset(VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET);
552+
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || to_address == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, in-object addr is {}", self, to_address);
535553
to_address
536554
}
537555

@@ -549,17 +567,23 @@ impl ObjectReference {
549567
pub fn to_object_start<VM: VMBinding>(self) -> Address {
550568
use crate::vm::ObjectModel;
551569
let object_start = VM::VMObjectModel::ref_to_object_start(self);
552-
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_address() returns {}", self, object_start);
570+
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || object_start == self.to_raw_address(), "The binding claims unified object reference address, but for object reference {}, ref_to_object_start() returns {}", self, object_start);
553571
object_start
554572
}
555573

556-
/// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`]
557-
/// or [`crate::vm::ObjectModel::ref_to_address`]. This method is syntactic sugar for [`crate::vm::ObjectModel::address_to_ref`].
558-
/// See the comments on [`crate::vm::ObjectModel::address_to_ref`].
574+
/// Get the object reference from an address that is returned from [`crate::util::address::ObjectReference::to_address`].
559575
pub fn from_address<VM: VMBinding>(addr: Address) -> ObjectReference {
560576
use crate::vm::ObjectModel;
561-
let obj = VM::VMObjectModel::address_to_ref(addr);
562-
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, address_to_ref() returns {}", addr, obj);
577+
let obj = unsafe {
578+
ObjectReference::from_raw_address_unchecked(
579+
addr.offset(-VM::VMObjectModel::IN_OBJECT_ADDRESS_OFFSET),
580+
)
581+
};
582+
debug_assert!(!VM::VMObjectModel::UNIFIED_OBJECT_REFERENCE_ADDRESS || addr == obj.to_raw_address(), "The binding claims unified object reference address, but for address {}, the object reference is {}", addr, obj);
583+
debug_assert!(
584+
obj.to_raw_address().is_aligned_to(Self::ALIGNMENT),
585+
"ObjectReference is required to be word aligned"
586+
);
563587
obj
564588
}
565589

src/util/test_util/fixtures.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ pub struct SingleObject {
222222

223223
impl FixtureContent for SingleObject {
224224
fn create() -> Self {
225-
use crate::vm::object_model::ObjectModel;
226225
let mut mutator = MutatorFixture::create();
227226

228227
// A relatively small object, typical for Ruby.
@@ -232,7 +231,7 @@ impl FixtureContent for SingleObject {
232231
let addr = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
233232
assert!(!addr.is_zero());
234233

235-
let objref = MockVM::address_to_ref(addr);
234+
let objref = MockVM::object_start_to_ref(addr);
236235
memory_manager::post_alloc(&mut mutator.mutator, objref, size, semantics);
237236

238237
SingleObject { objref, mutator }
@@ -257,7 +256,6 @@ pub struct TwoObjects {
257256

258257
impl FixtureContent for TwoObjects {
259258
fn create() -> Self {
260-
use crate::vm::object_model::ObjectModel;
261259
let mut mutator = MutatorFixture::create();
262260

263261
let size = 128;
@@ -266,13 +264,13 @@ impl FixtureContent for TwoObjects {
266264
let addr1 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
267265
assert!(!addr1.is_zero());
268266

269-
let objref1 = MockVM::address_to_ref(addr1);
267+
let objref1 = MockVM::object_start_to_ref(addr1);
270268
memory_manager::post_alloc(&mut mutator.mutator, objref1, size, semantics);
271269

272270
let addr2 = memory_manager::alloc(&mut mutator.mutator, size, 8, 0, semantics);
273271
assert!(!addr2.is_zero());
274272

275-
let objref2 = MockVM::address_to_ref(addr2);
273+
let objref2 = MockVM::object_start_to_ref(addr2);
276274
memory_manager::post_alloc(&mut mutator.mutator, objref2, size, semantics);
277275

278276
TwoObjects {

src/util/test_util/mock_vm.rs

+9-16
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use std::sync::Mutex;
2828

2929
/// The offset between object reference and the allocation address if we use
3030
/// the default mock VM.
31-
pub const DEFAULT_OBJECT_REF_OFFSET: usize = 4;
31+
pub const DEFAULT_OBJECT_REF_OFFSET: usize = crate::util::constants::BYTES_IN_ADDRESS;
3232

3333
// To mock static methods, we have to create a static instance of `MockVM`.
3434
lazy_static! {
@@ -231,8 +231,6 @@ pub struct MockVM {
231231
MockMethod<(ObjectReference, Address), ObjectReference>,
232232
pub ref_to_object_start: MockMethod<ObjectReference, Address>,
233233
pub ref_to_header: MockMethod<ObjectReference, Address>,
234-
pub ref_to_address: MockMethod<ObjectReference, Address>,
235-
pub address_to_ref: MockMethod<Address, ObjectReference>,
236234
pub dump_object: MockMethod<ObjectReference, ()>,
237235
// reference glue
238236
pub weakref_clear_referent: MockMethod<ObjectReference, ()>,
@@ -304,12 +302,6 @@ impl Default for MockVM {
304302
object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET)
305303
})),
306304
ref_to_header: MockMethod::new_fixed(Box::new(|object| object.to_raw_address())),
307-
ref_to_address: MockMethod::new_fixed(Box::new(|object| {
308-
object.to_raw_address().sub(DEFAULT_OBJECT_REF_OFFSET)
309-
})),
310-
address_to_ref: MockMethod::new_fixed(Box::new(|addr| {
311-
ObjectReference::from_raw_address(addr.add(DEFAULT_OBJECT_REF_OFFSET)).unwrap()
312-
})),
313305
dump_object: MockMethod::new_unimplemented(),
314306

315307
weakref_clear_referent: MockMethod::new_unimplemented(),
@@ -531,13 +523,8 @@ impl crate::vm::ObjectModel<MockVM> for MockVM {
531523
mock!(ref_to_header(object))
532524
}
533525

534-
fn ref_to_address(object: ObjectReference) -> Address {
535-
mock!(ref_to_address(object))
536-
}
537-
538-
fn address_to_ref(addr: Address) -> ObjectReference {
539-
mock!(address_to_ref(addr))
540-
}
526+
// TODO: This is not mocked. We need a way to deal with it.
527+
const IN_OBJECT_ADDRESS_OFFSET: isize = -(DEFAULT_OBJECT_REF_OFFSET as isize);
541528

542529
fn dump_object(object: ObjectReference) {
543530
mock!(dump_object(object))
@@ -629,3 +616,9 @@ impl crate::vm::Scanning<MockVM> for MockVM {
629616
mock_any!(forward_weak_refs(worker, tracer_context))
630617
}
631618
}
619+
620+
impl MockVM {
621+
pub fn object_start_to_ref(start: Address) -> ObjectReference {
622+
ObjectReference::from_raw_address(start + DEFAULT_OBJECT_REF_OFFSET).unwrap()
623+
}
624+
}

0 commit comments

Comments
 (0)