Skip to content

Commit 47562b7

Browse files
authored
Let MarkSweepSpace use BlockPageResource (#1150)
Let `MarkSweepSpace` use the `BlockPageResource` instead of the raw `FreeListPageResource`. This solves a problem where multiple GC workers contend for the mutex `FreeListPageResource::sync` when calling `FreeListPageResource::release_pages` concurrently during the `Release` stage. We flush the `BlockPageResource` in `MarkSweepSpace::end_of_gc`. To monitor the performance, we added a pair of USDT trace points before and after calling `Plan.end_of_gc`. We also count the invocation of `end_of_gc` into the GC time. We changed `BlockQueue` so that it uses `MaybeUninit` to hold uninitialized elements instead of relying on `B::from_aligned_address(Address::ZERO)` because the `Block` type used by `MarkSweepSpace` is based on `NonZeroUsize`, and cannot be zero. Fixes: #1145
1 parent 1095465 commit 47562b7

File tree

7 files changed

+73
-14
lines changed

7 files changed

+73
-14
lines changed

src/policy/marksweepspace/native_ms/block.rs

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use atomic::Ordering;
44

55
use super::BlockList;
66
use super::MarkSweepSpace;
7+
use crate::util::constants::LOG_BYTES_IN_PAGE;
78
use crate::util::heap::chunk_map::*;
89
use crate::util::linear_scan::Region;
910
use crate::vm::ObjectModel;
@@ -48,6 +49,9 @@ impl Region for Block {
4849
}
4950

5051
impl Block {
52+
/// Log pages in block
53+
pub const LOG_PAGES: usize = Self::LOG_BYTES - LOG_BYTES_IN_PAGE as usize;
54+
5155
pub const METADATA_SPECS: [SideMetadataSpec; 7] = [
5256
Self::MARK_TABLE,
5357
Self::NEXT_BLOCK_TABLE,

src/policy/marksweepspace/native_ms/global.rs

+22-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::{
77
scheduler::{GCWorkScheduler, GCWorker},
88
util::{
99
copy::CopySemantics,
10-
heap::{FreeListPageResource, PageResource},
10+
heap::{BlockPageResource, PageResource},
1111
metadata::{self, side_metadata::SideMetadataSpec, MetadataSpec},
1212
ObjectReference,
1313
},
@@ -64,7 +64,7 @@ pub enum BlockAcquireResult {
6464
/// | GC - End of GC | - | Merge the temp global lists | - |
6565
pub struct MarkSweepSpace<VM: VMBinding> {
6666
pub common: CommonSpace<VM>,
67-
pr: FreeListPageResource<VM>,
67+
pr: BlockPageResource<VM, Block>,
6868
/// Allocation status for all chunks in MS space
6969
chunk_map: ChunkMap,
7070
/// Work packet scheduler
@@ -80,6 +80,8 @@ pub struct MarkSweepSpace<VM: VMBinding> {
8080
abandoned_in_gc: Mutex<AbandonedBlockLists>,
8181
}
8282

83+
unsafe impl<VM: VMBinding> Sync for MarkSweepSpace<VM> {}
84+
8385
pub struct AbandonedBlockLists {
8486
pub available: BlockLists,
8587
pub unswept: BlockLists,
@@ -278,9 +280,19 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
278280
let common = CommonSpace::new(args.into_policy_args(false, false, local_specs));
279281
MarkSweepSpace {
280282
pr: if is_discontiguous {
281-
FreeListPageResource::new_discontiguous(vm_map)
283+
BlockPageResource::new_discontiguous(
284+
Block::LOG_PAGES,
285+
vm_map,
286+
scheduler.num_workers(),
287+
)
282288
} else {
283-
FreeListPageResource::new_contiguous(common.start, common.extent, vm_map)
289+
BlockPageResource::new_contiguous(
290+
Block::LOG_PAGES,
291+
common.start,
292+
common.extent,
293+
vm_map,
294+
scheduler.num_workers(),
295+
)
284296
},
285297
common,
286298
chunk_map: ChunkMap::new(),
@@ -347,14 +359,19 @@ impl<VM: VMBinding> MarkSweepSpace<VM> {
347359

348360
#[cfg(debug_assertions)]
349361
from.assert_empty();
362+
363+
// BlockPageResource uses worker-local block queues to eliminate contention when releasing
364+
// blocks, similar to how the MarkSweepSpace caches blocks in `abandoned_in_gc` before
365+
// returning to the global pool. We flush the BlockPageResource, too.
366+
self.pr.flush_all();
350367
}
351368

352369
/// Release a block.
353370
pub fn release_block(&self, block: Block) {
354371
self.block_clear_metadata(block);
355372

356373
block.deinit();
357-
self.pr.release_pages(block.start());
374+
self.pr.release_block(block);
358375
}
359376

360377
pub fn block_clear_metadata(&self, block: Block) {

src/scheduler/scheduler.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,12 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
530530
// Tell GC trigger that GC ended - this happens before we resume mutators.
531531
mmtk.gc_trigger.policy.on_gc_end(mmtk);
532532

533+
// All other workers are parked, so it is safe to access the Plan instance mutably.
534+
probe!(mmtk, plan_end_of_gc_begin);
535+
let plan_mut: &mut dyn Plan<VM = VM> = unsafe { mmtk.get_plan_mut() };
536+
plan_mut.end_of_gc(worker.tls);
537+
probe!(mmtk, plan_end_of_gc_end);
538+
533539
// Compute the elapsed time of the GC.
534540
let start_time = {
535541
let mut gc_start_time = worker.mmtk.state.gc_start_time.borrow_mut();
@@ -565,10 +571,6 @@ impl<VM: VMBinding> GCWorkScheduler<VM> {
565571
);
566572
}
567573

568-
// All other workers are parked, so it is safe to access the Plan instance mutably.
569-
let plan_mut: &mut dyn Plan<VM = VM> = unsafe { mmtk.get_plan_mut() };
570-
plan_mut.end_of_gc(worker.tls);
571-
572574
#[cfg(feature = "extreme_assertions")]
573575
if crate::util::slot_logger::should_check_duplicate_slots(mmtk.get_plan()) {
574576
// reset the logging info at the end of each GC

src/util/heap/blockpageresource.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ use crate::util::heap::pageresource::CommonPageResource;
88
use crate::util::heap::space_descriptor::SpaceDescriptor;
99
use crate::util::linear_scan::Region;
1010
use crate::util::opaque_pointer::*;
11+
use crate::util::rust_util::zeroed_alloc::new_zeroed_vec;
1112
use crate::vm::*;
1213
use atomic::Ordering;
1314
use spin::RwLock;
1415
use std::cell::UnsafeCell;
16+
use std::mem::MaybeUninit;
1517
use std::sync::atomic::AtomicUsize;
1618
use std::sync::Mutex;
1719

@@ -179,17 +181,29 @@ impl<VM: VMBinding, B: Region> BlockPageResource<VM, B> {
179181

180182
/// A block list that supports fast lock-free push/pop operations
181183
struct BlockQueue<B: Region> {
184+
/// The number of elements in the queue.
182185
cursor: AtomicUsize,
183-
data: UnsafeCell<Vec<B>>,
186+
/// The underlying data storage.
187+
///
188+
/// - `UnsafeCell<T>`: It may be accessed by multiple threads.
189+
/// - `Box<[T]>`: It holds an array allocated on the heap. It cannot be resized, but can be
190+
/// replaced with another array as a whole.
191+
/// - `MaybeUninit<T>`: It may contain uninitialized elements.
192+
///
193+
/// The implementaiton of `BlockQueue` must ensure there is no data race, and it never reads
194+
/// uninitialized elements.
195+
data: UnsafeCell<Box<[MaybeUninit<B>]>>,
184196
}
185197

186198
impl<B: Region> BlockQueue<B> {
187199
/// Create an array
188200
fn new() -> Self {
189-
let default_block = B::from_aligned_address(Address::ZERO);
201+
let zeroed_vec = unsafe { new_zeroed_vec(Self::CAPACITY) };
202+
let boxed_slice = zeroed_vec.into_boxed_slice();
203+
let data = UnsafeCell::new(boxed_slice);
190204
Self {
191205
cursor: AtomicUsize::new(0),
192-
data: UnsafeCell::new(vec![default_block; Self::CAPACITY]),
206+
data,
193207
}
194208
}
195209
}
@@ -199,14 +213,14 @@ impl<B: Region> BlockQueue<B> {
199213

200214
/// Get an entry
201215
fn get_entry(&self, i: usize) -> B {
202-
unsafe { (*self.data.get())[i] }
216+
unsafe { (*self.data.get())[i].assume_init() }
203217
}
204218

205219
/// Set an entry.
206220
///
207221
/// It's unsafe unless the array is accessed by only one thread (i.e. used as a thread-local array).
208222
unsafe fn set_entry(&self, i: usize, block: B) {
209-
(*self.data.get())[i] = block
223+
(*self.data.get())[i].write(block);
210224
}
211225

212226
/// Non-atomically push an element.

src/util/heap/freelistpageresource.rs

+8
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,14 @@ impl<VM: VMBinding> FreeListPageResource<VM> {
320320
self.common.release_discontiguous_chunks(chunk);
321321
}
322322

323+
/// Release pages previously allocated by `alloc_pages`.
324+
///
325+
/// Warning: This method acquires the mutex `self.sync`. If multiple threads release pages
326+
/// concurrently, the lock contention will become a performance bottleneck. This is especially
327+
/// problematic for plans that sweep objects in bulk in the `Release` stage. Spaces except the
328+
/// large object space are recommended to use [`BlockPageResource`] whenever possible.
329+
///
330+
/// [`BlockPageResource`]: crate::util::heap::blockpageresource::BlockPageResource
323331
pub fn release_pages(&self, first: Address) {
324332
debug_assert!(conversions::is_page_aligned(first));
325333
let mut sync = self.sync.lock().unwrap();

tools/tracing/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ Currently, the core provides the following tracepoints.
3838
argument is the length of the string.
3939
- `mmtk:alloc_slow_once_start()`: the allocation slow path starts.
4040
- `mmtk:alloc_slow_once_end()`: the allocation slow path ends.
41+
- `mmtk:plan_end_of_gc_begin()`: before executing `Plan::end_of_gc`.
42+
- `mmtk:plan_end_of_gc_end()`: after executing `Plan::end_of_gc`.
4143

4244
## Tracing tools
4345

tools/tracing/timeline/capture.bt

+12
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,16 @@ usdt:$MMTK:mmtk:process_slots {
7979
}
8080
}
8181

82+
usdt:$MMTK:mmtk:plan_end_of_gc_begin {
83+
if (@enable_print) {
84+
printf("plan_end_of_gc,B,%d,%lu\n", tid, nsecs);
85+
}
86+
}
87+
88+
usdt:$MMTK:mmtk:plan_end_of_gc_end {
89+
if (@enable_print) {
90+
printf("plan_end_of_gc,E,%d,%lu\n", tid, nsecs);
91+
}
92+
}
93+
8294
// vim: ft=bpftrace ts=4 sw=4 sts=4 et

0 commit comments

Comments
 (0)