Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robust memory allocation handling #606

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Push some debug logging and a few misc improvements
DJMcNab committed Aug 7, 2024
commit fe2d6b9a83182d920ef27ce175f79310dc06299c
55 changes: 49 additions & 6 deletions vello/src/lib.rs
Original file line number Diff line number Diff line change
@@ -484,43 +484,86 @@ impl Renderer {
let data = slice.get_mapped_range();
let data: BumpAllocators = bytemuck::pod_read_unaligned(&data);
if data.failed != 0 {
if data.failed == 0x20 {
eprintln!(
if data.failed & 0x20 != 0 {
log::debug!(
"Run failed but next run will be retried, reallocated in last run"
);
} else {
eprintln!(
"Previous run failed, need to reallocate: {:x?}",
data.failed
);
// log::info!(
// "Previous run failed, need to reallocate: {:x?}",
// data.failed
// );
// log::debug!("{:?}", data);
// TODO: Be smarter here, e.g. notice that we're over by a certain factor
// and bump several buffers?

let mut changed = false;
// TODO: Also reduce allocation sizes
// TODO: Free buffers which haven't been used in "a while"

// TODO: This ignore the draw tag length?
if data.binning > self.bump_sizes.bin_data.len() {
changed = true;
log::debug!(
"Resizing binning from {:?} to {:?}",
self.bump_sizes.bin_data,
data.binning
);
self.bump_sizes.bin_data = BufferSize::new(data.binning * 3 / 2);
}
if data.lines > self.bump_sizes.lines.len() {
changed = true;
log::debug!(
"Resizing lines from {:?} to {:?}",
self.bump_sizes.lines,
data.lines
);
self.bump_sizes.lines = BufferSize::new(data.lines * 5 / 4);
}
// if data.blend > self.bump_sizes.? // TODO
if data.ptcl > self.bump_sizes.ptcl.len() {
changed = true;
log::debug!(
"Resizing ptcl from {:?} to {:?}",
self.bump_sizes.ptcl,
data.ptcl
);
// TODO: At 5/4, this doesn't work very well
self.bump_sizes.ptcl = BufferSize::new(data.ptcl * 3 / 2);
}
if data.seg_counts > self.bump_sizes.seg_counts.len() {
changed = true;
log::debug!(
"Resizing seg_counts from {:?} to {:?}",
self.bump_sizes.seg_counts,
data.seg_counts
);
self.bump_sizes.seg_counts =
BufferSize::new(data.seg_counts * 5 / 4);
}
if data.tile > self.bump_sizes.tiles.len() {
changed = true;
log::debug!(
"Resizing tiles from {:?} to {:?}",
self.bump_sizes.tiles,
data.tile
);
self.bump_sizes.tiles = BufferSize::new(data.tile * 5 / 4);
}
if data.segments > self.bump_sizes.segments.len() {
changed = true;
log::debug!(
"Resizing segments from {:?} to {:?}",
self.bump_sizes.segments,
data.segments
);
self.bump_sizes.segments = BufferSize::new(data.segments * 5 / 4);
}
if !changed {
log::warn!("Detected need for reallocation, but didn't reallocate {:x?}. Data {data:?}", data.failed);
} else {
log::info!("Detected need for reallocation, and did reallocate {:x?}. Data {data:?}", data.failed);
}
}
}
}
10 changes: 9 additions & 1 deletion vello/src/render.rs
Original file line number Diff line number Diff line change
@@ -180,6 +180,13 @@ impl Render {
&params.base_color,
bump_sizes,
);
// log::debug!("Config: {{ lines_size: {:?}, binning_size: {:?}, tiles_size: {:?}, seg_counts_size: {:?}, segments_size: {:?}, ptcl_size: {:?} }}",
// cpu_config.gpu.lines_size,
// cpu_config.gpu.binning_size,
// cpu_config.gpu.tiles_size,
// cpu_config.gpu.seg_counts_size,
// cpu_config.gpu.segments_size,
// cpu_config.gpu.ptcl_size);
let buffer_sizes = &cpu_config.buffer_sizes;
let wg_counts = &cpu_config.workgroup_counts;

@@ -195,7 +202,8 @@ impl Render {
recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)),
);
let info_bin_data_buf = ResourceProxy::new_buf(
buffer_sizes.bump_buffers.bin_data.size_in_bytes() as u64,
buffer_sizes.bump_buffers.bin_data.size_in_bytes() as u64
+ (layout.bin_data_start as u64) * std::mem::size_of::<u32>() as u64,
"info_bin_data_buf",
);
let tile_buf = ResourceProxy::new_buf(
14 changes: 7 additions & 7 deletions vello_encoding/src/config.rs
Original file line number Diff line number Diff line change
@@ -194,7 +194,7 @@ impl RenderConfig {
target_height: height,
base_color: base_color.to_premul_u32(),
lines_size: buffer_sizes.bump_buffers.lines.len(),
binning_size: buffer_sizes.bump_buffers.bin_data.len() - layout.bin_data_start,
binning_size: buffer_sizes.bump_buffers.bin_data.len(),
tiles_size: buffer_sizes.bump_buffers.tiles.len(),
seg_counts_size: buffer_sizes.bump_buffers.seg_counts.len(),
segments_size: buffer_sizes.bump_buffers.segments.len(),
@@ -380,12 +380,12 @@ impl BumpBufferSizes {
// The following buffer sizes have been hand picked to accommodate the vello test scenes as
// well as paris-30k. These should instead get derived from the scene layout using
// reasonable heuristics.
let bin_data = BufferSize::new(1 << 18);
let tiles = BufferSize::new(1 << 21);
let lines = BufferSize::new(1 << 21);
let seg_counts = BufferSize::new(1 << 21);
let segments = BufferSize::new(1 << 21);
let ptcl = BufferSize::new(1 << 23);
let bin_data = BufferSize::new(1 << 12);
let tiles = BufferSize::new(1 << 15);
let lines = BufferSize::new(1 << 15);
let seg_counts = BufferSize::new(1 << 15);
let segments = BufferSize::new(1 << 15);
let ptcl = BufferSize::new(1 << 17);
Copy link
Collaborator

@armansito armansito Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These sizes are smaller than before. I imagine this is to verify the resizing logic on the larger scenes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah - it's a combination of testing the smaller sizes, and also to try and reduce memory usage by default. See e.g. linebender/xilem#319

There definitely is room to change this, as tens of mebibytes of memory usage is probably not worth worrying about; the change was mostly made for testing, however

// 16 * 16 (1 << 8) is one blend spill, so this allows for 4096 spills.
let blend_spill = BufferSize::new(1 << 20);
BumpBufferSizes {
5 changes: 1 addition & 4 deletions vello_shaders/shader/coarse.wgsl
Original file line number Diff line number Diff line change
@@ -68,10 +68,7 @@ var<private> cmd_limit: u32;
// Make sure there is space for a command of given size, plus a jump if needed
fn alloc_cmd(size: u32) {
if cmd_offset + size >= cmd_limit {
// We might be able to save a little bit of computation here
// by setting the initial value of the bump allocator.
let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC;
var new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT);
var new_cmd = atomicAdd(&bump.ptcl, PTCL_INCREMENT);
if new_cmd + PTCL_INCREMENT > config.ptcl_size {
// This sets us up for technical UB, as lots of threads will be writing
// to the same locations. But I think it's fine, and predicating the
5 changes: 3 additions & 2 deletions vello_shaders/shader/prepare.wgsl
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@

#import config
#import bump
#import ptcl

@group(0) @binding(0)
var<storage, read_write> config: Config;
@@ -19,7 +20,7 @@ var<storage, read_write> bump: BumpAllocators;
fn main() {
var should_cancel = false;
let previous_failure = atomicLoad(&bump.failed);
if previous_failure == PREVIOUS_RUN {
if (previous_failure & PREVIOUS_RUN) != 0 {
// Don't early-exit from multiple frames in a row
// The CPU should be blocking on the frame which failed anyway, so this
// case should never be reached, but if the CPU side isn't doing that
@@ -62,7 +63,7 @@ fn main() {
}
}
atomicStore(&bump.binning, 0u);
atomicStore(&bump.ptcl, 0u);
atomicStore(&bump.ptcl, config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC);
atomicStore(&bump.tile, 0u);
atomicStore(&bump.seg_counts, 0u);
atomicStore(&bump.segments, 0u);