-
Notifications
You must be signed in to change notification settings - Fork 373
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[re_renderer] Uniform buffer utility using
CpuWriteGpuReadBelt
(#1400)
* utility function for uniform buffer from struct * Uniform buffers are now compile time forced to be 256 bytes aligned * CpuWriteGpuReadBelt no longer takes mutable buffer pool * Renderers and frame_global_command_encoder are now behind locks --------- Co-authored-by: Clement Rey <[email protected]>
- Loading branch information
Showing
19 changed files
with
322 additions
and
253 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
crates/re_renderer/src/allocator/uniform_buffer_fill.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
pub use super::cpu_write_gpu_read_belt::{CpuWriteGpuReadBelt, CpuWriteGpuReadBuffer}; | ||
|
||
use crate::{wgpu_resources::BindGroupEntry, DebugLabel, RenderContext}; | ||
|
||
struct UniformBufferAlignmentCheck<T> { | ||
pub _marker: std::marker::PhantomData<T>, | ||
} | ||
impl<T> UniformBufferAlignmentCheck<T> { | ||
/// wgpu requires uniform buffers to be aligned to up to 256 bytes. | ||
/// | ||
/// This is a property of device limits, see [`WebGPU` specification](https://www.w3.org/TR/webgpu/#limits). | ||
/// Implementations are allowed to advertise a lower alignment requirement, however | ||
/// 256 bytes is fairly common even in modern hardware and is even hardcoded for DX12. | ||
/// | ||
/// Technically this is only relevant when sub-allocating a buffer, as the wgpu backend | ||
/// is internally forced to make sure that the start of any [`wgpu::Buffer`] with [`wgpu::BufferUsages::UNIFORM`] usage | ||
/// has this alignment. Practically, ensuring this alignment everywhere | ||
/// | ||
/// Alternatively to enforcing this alignment on the type we could: | ||
/// * only align on the gpu buffer | ||
/// -> causes more fine grained `copy_buffer_to_buffer` calls on the gpu encoder | ||
/// * only align on the [`CpuWriteGpuReadBuffer`] & gpu buffer | ||
/// -> causes more complicated offset computation on [`CpuWriteGpuReadBuffer`] as well as either | ||
/// holes at padding (-> undefined values & slow for write combined!) or complicated nulling of padding | ||
/// | ||
/// About the [`bytemuck::Pod`] requirement (dragged in by [`CpuWriteGpuReadBuffer`]): | ||
/// [`bytemuck::Pod`] forces us to be explicit about padding as it doesn't allow invisible padding bytes! | ||
/// We could drop this and thus make it easier to define uniform buffer types. | ||
/// But this leads to more unsafe code, harder to avoid holes in write combined memory access | ||
/// and potentially undefined values in the padding bytes on GPU. | ||
const CHECK: () = assert!( | ||
std::mem::align_of::<T>() >= 256, | ||
"Uniform buffers need to be aligned to 256 bytes. Use `#[repr(C, align(256))]`" | ||
); | ||
} | ||
/// Utility for fast & efficient creation of uniform buffers from a series of structs. | ||
/// | ||
/// For subsequent frames, this will automatically not allocate any resources (thanks to our buffer pooling mechanism). | ||
/// | ||
/// TODO(#1383): We could do this on a more complex stack allocator. | ||
pub fn create_and_fill_uniform_buffer_batch<T: bytemuck::Pod>( | ||
ctx: &RenderContext, | ||
label: DebugLabel, | ||
content: impl ExactSizeIterator<Item = T>, | ||
) -> Vec<BindGroupEntry> { | ||
#[allow(clippy::let_unit_value)] | ||
let _ = UniformBufferAlignmentCheck::<T>::CHECK; | ||
|
||
let num_buffers = content.len() as u64; | ||
let element_size = std::mem::size_of::<T>() as u64; | ||
|
||
assert!( | ||
element_size > 0, | ||
"Uniform buffer need to have a non-zero size" | ||
); | ||
|
||
let buffer = ctx.gpu_resources.buffers.alloc( | ||
&ctx.device, | ||
&crate::wgpu_resources::BufferDesc { | ||
label, | ||
size: num_buffers * element_size, | ||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, | ||
mapped_at_creation: false, | ||
}, | ||
); | ||
|
||
let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate::<T>( | ||
&ctx.device, | ||
&ctx.gpu_resources.buffers, | ||
num_buffers as _, | ||
); | ||
staging_buffer.extend(content); | ||
staging_buffer.copy_to_buffer( | ||
ctx.active_frame | ||
.frame_global_command_encoder | ||
.lock() | ||
.get_or_create(&ctx.device), | ||
&buffer, | ||
0, | ||
); | ||
|
||
(0..num_buffers) | ||
.into_iter() | ||
.map(|i| BindGroupEntry::Buffer { | ||
handle: buffer.handle, | ||
offset: i * element_size, | ||
size: Some(std::num::NonZeroU64::new(element_size).unwrap()), | ||
}) | ||
.collect() | ||
} | ||
|
||
/// See [`create_and_fill_uniform_buffer`]. | ||
pub fn create_and_fill_uniform_buffer<T: bytemuck::Pod>( | ||
ctx: &mut RenderContext, | ||
label: DebugLabel, | ||
content: T, | ||
) -> BindGroupEntry { | ||
create_and_fill_uniform_buffer_batch(ctx, label, std::iter::once(content)) | ||
.into_iter() | ||
.next() | ||
.unwrap() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
aec0e87
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rust Benchmark
datastore/insert/batch/rects/insert
549182
ns/iter (± 2401
)562683
ns/iter (± 5452
)0.98
datastore/latest_at/batch/rects/query
1838
ns/iter (± 2
)1872
ns/iter (± 11
)0.98
datastore/latest_at/missing_components/primary
355
ns/iter (± 0
)352
ns/iter (± 3
)1.01
datastore/latest_at/missing_components/secondaries
423
ns/iter (± 0
)413
ns/iter (± 5
)1.02
datastore/range/batch/rects/query
153904
ns/iter (± 689
)152009
ns/iter (± 1789
)1.01
mono_points_arrow/generate_message_bundles
46476427
ns/iter (± 672419
)43940291
ns/iter (± 1271642
)1.06
mono_points_arrow/generate_messages
125079757
ns/iter (± 1016055
)124306090
ns/iter (± 1361079
)1.01
mono_points_arrow/encode_log_msg
157056535
ns/iter (± 1670815
)148283826
ns/iter (± 1441515
)1.06
mono_points_arrow/encode_total
327352970
ns/iter (± 2106891
)322695077
ns/iter (± 3011993
)1.01
mono_points_arrow/decode_log_msg
175387465
ns/iter (± 790222
)172713411
ns/iter (± 1540497
)1.02
mono_points_arrow/decode_message_bundles
65046338
ns/iter (± 1715195
)62192589
ns/iter (± 782759
)1.05
mono_points_arrow/decode_total
237795661
ns/iter (± 1755737
)233713482
ns/iter (± 2179769
)1.02
batch_points_arrow/generate_message_bundles
333348
ns/iter (± 524
)328899
ns/iter (± 3928
)1.01
batch_points_arrow/generate_messages
6239
ns/iter (± 26
)6101
ns/iter (± 62
)1.02
batch_points_arrow/encode_log_msg
363395
ns/iter (± 1398
)370951
ns/iter (± 3365
)0.98
batch_points_arrow/encode_total
719735
ns/iter (± 3109
)710103
ns/iter (± 6365
)1.01
batch_points_arrow/decode_log_msg
348090
ns/iter (± 887
)350493
ns/iter (± 2018
)0.99
batch_points_arrow/decode_message_bundles
2074
ns/iter (± 9
)2023
ns/iter (± 16
)1.03
batch_points_arrow/decode_total
355335
ns/iter (± 504
)354190
ns/iter (± 1031
)1.00
arrow_mono_points/insert
6144479949
ns/iter (± 17257908
)6054023341
ns/iter (± 22243189
)1.01
arrow_mono_points/query
1742768
ns/iter (± 25617
)1703940
ns/iter (± 23239
)1.02
arrow_batch_points/insert
2705557
ns/iter (± 8746
)2660869
ns/iter (± 28520
)1.02
arrow_batch_points/query
17434
ns/iter (± 32
)17451
ns/iter (± 177
)1.00
tuid/Tuid::random
34
ns/iter (± 0
)34
ns/iter (± 0
)1
This comment was automatically generated by workflow using github-action-benchmark.