Skip to content

Commit

Permalink
Use write belt for mesh data gpu upload (#1416)
Browse files Browse the repository at this point in the history
* Mesh renderer now takes a constant ctx
* mesh data upload works now entirely via CpuWriteGpuReadBelt
* simplify frame global uniform buffer handling
* fix calculating wrong size for mesh vertex buffer
  • Loading branch information
Wumpf authored and emilk committed Mar 2, 2023
1 parent 5500de2 commit 84dc341
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 192 deletions.
9 changes: 1 addition & 8 deletions crates/re_renderer/src/allocator/uniform_buffer_fill.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,7 @@ pub fn create_and_fill_uniform_buffer_batch<T: bytemuck::Pod>(
num_buffers as _,
);
staging_buffer.extend(content);
staging_buffer.copy_to_buffer(
ctx.active_frame
.frame_global_command_encoder
.lock()
.get_or_create(&ctx.device),
&buffer,
0,
);
staging_buffer.copy_to_buffer(ctx.active_frame.encoder.lock().get(), &buffer, 0);

(0..num_buffers)
.into_iter()
Expand Down
108 changes: 48 additions & 60 deletions crates/re_renderer/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub struct RenderContext {
#[cfg(all(not(target_arch = "wasm32"), debug_assertions))] // native debug build
pub(crate) err_tracker: std::sync::Arc<crate::error_tracker::ErrorTracker>,

pub mesh_manager: MeshManager,
pub mesh_manager: RwLock<MeshManager>,
pub texture_manager_2d: TextureManager2D,
pub cpu_write_gpu_read_belt: Mutex<CpuWriteGpuReadBelt>,

Expand Down Expand Up @@ -159,19 +159,21 @@ impl RenderContext {
renderers: TypeMap::new(),
});

let mesh_manager = MeshManager::new(
device.clone(),
queue.clone(),
renderers.get_mut().get_or_create(
&shared_renderer_data,
&mut gpu_resources,
&device,
&mut resolver,
),
);
let mesh_manager = RwLock::new(MeshManager::new(renderers.get_mut().get_or_create(
&shared_renderer_data,
&mut gpu_resources,
&device,
&mut resolver,
)));
let texture_manager_2d =
TextureManager2D::new(device.clone(), queue.clone(), &mut gpu_resources.textures);

let active_frame = ActiveFrameContext {
encoder: Mutex::new(FrameGlobalCommandEncoder::new(&device)),
per_frame_data_helper: TypeMap::new(),
frame_index: 0,
};

RenderContext {
device,
queue,
Expand All @@ -193,11 +195,7 @@ impl RenderContext {

inflight_queue_submissions: Vec::new(),

active_frame: ActiveFrameContext {
frame_global_command_encoder: Mutex::new(FrameGlobalCommandEncoder(None)),
per_frame_data_helper: TypeMap::new(),
frame_index: 0
}
active_frame,
}
}

Expand Down Expand Up @@ -227,12 +225,19 @@ impl RenderContext {
pub fn begin_frame(&mut self) {
crate::profile_function!();

// If the currently active frame still has an encoder, we need to finish it and queue it.
// This should only ever happen for the first frame where we created an encoder for preparatory work. Every other frame we take the encoder at submit!
if self.active_frame.encoder.lock().0.is_some() {
assert!(self.active_frame.frame_index == 0, "There was still a command encoder from the previous frame at the beginning of the current. Did you forget to call RenderContext::before_submit?");
self.before_submit();
}

// Request used staging buffer back.
// TODO(andreas): If we'd control all submissions, we could move this directly after the submission which would be a bit better.
self.cpu_write_gpu_read_belt.lock().after_queue_submit();

self.active_frame = ActiveFrameContext {
frame_global_command_encoder: Mutex::new(FrameGlobalCommandEncoder(None)),
encoder: Mutex::new(FrameGlobalCommandEncoder::new(&self.device)),
frame_index: self.active_frame.frame_index + 1,
per_frame_data_helper: TypeMap::new(),
};
Expand All @@ -252,7 +257,7 @@ impl RenderContext {
re_log::debug!(?modified_paths, "got some filesystem events");
}

self.mesh_manager.begin_frame(frame_index);
self.mesh_manager.get_mut().begin_frame(frame_index);
self.texture_manager_2d.begin_frame(frame_index);

{
Expand Down Expand Up @@ -304,13 +309,7 @@ impl RenderContext {
// Unmap all staging buffers.
self.cpu_write_gpu_read_belt.lock().before_queue_submit();

if let Some(command_encoder) = self
.active_frame
.frame_global_command_encoder
.lock()
.0
.take()
{
if let Some(command_encoder) = self.active_frame.encoder.lock().0.take() {
let command_buffer = command_encoder.finish();

// TODO(andreas): For better performance, we should try to bundle this with the single submit call that is currently happening in eframe.
Expand All @@ -321,57 +320,46 @@ impl RenderContext {
}
}

impl Drop for RenderContext {
pub struct FrameGlobalCommandEncoder(Option<wgpu::CommandEncoder>);

impl FrameGlobalCommandEncoder {
fn new(device: &wgpu::Device) -> Self {
Self(Some(device.create_command_encoder(
&wgpu::CommandEncoderDescriptor {
label:
crate::DebugLabel::from("global \"before viewbuilder\" command encoder").get(),
},
)))
}

/// Gets the global encoder for a frame. Only valid within a frame.
pub fn get(&mut self) -> &mut wgpu::CommandEncoder {
self.0
.as_mut()
.expect("Frame global encoder can't be accessed outside of a frame!")
}
}

impl Drop for FrameGlobalCommandEncoder {
fn drop(&mut self) {
// Close global command encoder if there is any pending.
// Not doing so before shutdown causes errors.
if let Some(encoder) = self
.active_frame
.frame_global_command_encoder
.lock()
.0
.take()
{
// Not doing so before shutdown causes errors!
if let Some(encoder) = self.0.take() {
encoder.finish();
}
}
}

pub struct FrameGlobalCommandEncoder(Option<wgpu::CommandEncoder>);

impl FrameGlobalCommandEncoder {
/// Gets or creates a command encoder that runs before all view builder encoder.
pub fn get_or_create(&mut self, device: &wgpu::Device) -> &mut wgpu::CommandEncoder {
self.0.get_or_insert_with(|| {
device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: crate::DebugLabel::from("global \"before viewbuilder\" command encoder")
.get(),
})
})
}
}

pub struct ActiveFrameContext {
/// Command encoder for all commands that should go in before view builder are submitted.
///
/// This should be used for any gpu copy operation outside of a renderer or view builder.
/// (i.e. typically in [`crate::renderer::DrawData`] creation!)
pub frame_global_command_encoder: Mutex<FrameGlobalCommandEncoder>,
pub encoder: Mutex<FrameGlobalCommandEncoder>,

/// Utility type map that will be cleared every frame.
pub per_frame_data_helper: TypeMap,

/// Index of this frame. Is incremented for every render frame.
frame_index: u64,
}

/// Gets allocation size for a uniform buffer padded in a way that multiple can be put in a single wgpu buffer.
///
/// TODO(andreas): Once we have higher level buffer allocators this should be handled there.
pub(crate) fn uniform_buffer_allocation_size<Data>(device: &wgpu::Device) -> u64 {
let uniform_buffer_size = std::mem::size_of::<Data>();
wgpu::util::align_to(
uniform_buffer_size as u32,
device.limits().min_uniform_buffer_offset_alignment,
) as u64
}
7 changes: 1 addition & 6 deletions crates/re_renderer/src/importer/gltf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,7 @@ pub fn load_gltf_from_buffer(
meshes.insert(
mesh.index(),
(
ctx.mesh_manager.create(
&mut ctx.gpu_resources,
&ctx.texture_manager_2d,
&re_mesh,
lifetime,
)?,
ctx.mesh_manager.write().create(ctx, &re_mesh, lifetime)?,
Arc::new(re_mesh),
),
);
Expand Down
8 changes: 2 additions & 6 deletions crates/re_renderer/src/importer/obj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,8 @@ pub fn load_obj_from_buffer(
};
let gpu_mesh = ctx
.mesh_manager
.create(
&mut ctx.gpu_resources,
&ctx.texture_manager_2d,
&mesh,
lifetime,
)
.write()
.create(ctx, &mesh, lifetime)
.unwrap(); // TODO(andreas): Handle error
MeshInstance {
gpu_mesh,
Expand Down
Loading

0 comments on commit 84dc341

Please sign in to comment.