Skip to content

Commit

Permalink
Add missing profiling scopes in re_renderer (#1567)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wumpf authored Mar 12, 2023
1 parent fd5d391 commit 90f583e
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 0 deletions.
5 changes: 5 additions & 0 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ impl CpuWriteGpuReadBelt {
buffer_pool: &GpuBufferPool,
num_elements: usize,
) -> CpuWriteGpuReadBuffer<T> {
crate::profile_function!();

// Potentially overestimate alignment with Self::MIN_ALIGNMENT, see Self::MIN_ALIGNMENT doc string.
let alignment = (std::mem::align_of::<T>() as wgpu::BufferAddress).max(Self::MIN_ALIGNMENT);
// Pad out the size of the used buffer to a multiple of Self::MIN_ALIGNMENT.
Expand Down Expand Up @@ -377,6 +379,8 @@ impl CpuWriteGpuReadBelt {
/// further writes) until after [`CpuWriteGpuReadBelt::after_queue_submit`] is called *and* the GPU is done
/// copying the data from them.
pub fn before_queue_submit(&mut self) {
crate::profile_function!();

// This would be a great usecase for persistent memory mapping, i.e. mapping without the need to unmap
// https://github.com/gfx-rs/wgpu/issues/1468
// However, WebGPU does not support this!
Expand All @@ -393,6 +397,7 @@ impl CpuWriteGpuReadBelt {
/// copy operations are submitted. Additional calls are harmless.
/// Not calling this as soon as possible may result in increased buffer memory usage.
pub fn after_queue_submit(&mut self) {
crate::profile_function!();
self.receive_chunks();

let sender = &self.sender;
Expand Down
3 changes: 3 additions & 0 deletions crates/re_renderer/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ impl RenderContext {
queue: Arc<wgpu::Queue>,
config: RenderContextConfig,
) -> Self {
crate::profile_function!();

let mut gpu_resources = WgpuResourcePools::default();
let global_bindings = GlobalBindings::new(&mut gpu_resources, &device);

Expand Down Expand Up @@ -310,6 +312,7 @@ impl RenderContext {
self.cpu_write_gpu_read_belt.lock().before_queue_submit();

if let Some(command_encoder) = self.active_frame.encoder.lock().0.take() {
crate::profile_scope!("finish & submit frame-global encoder");
let command_buffer = command_encoder.finish();

// TODO(andreas): For better performance, we should try to bundle this with the single submit call that is currently happening in eframe.
Expand Down
2 changes: 2 additions & 0 deletions crates/re_renderer/src/view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ impl ViewBuilder {
phase: DrawPhase,
pass: &mut wgpu::RenderPass<'a>,
) {
crate::profile_function!();

for queued_draw in &self.queued_draws {
if queued_draw.participated_phases.contains(&phase) {
let res = (queued_draw.draw_func)(ctx, phase, pass, queued_draw.draw_data.as_ref())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ where
desc: &Desc,
creation_func: F,
) -> Arc<DynamicResource<Handle, Desc, Res>> {
crate::profile_function!();
let mut state = self.state.write();

// First check if we can reclaim a resource we have around from a previous frame.
Expand Down Expand Up @@ -143,6 +144,7 @@ where
}

pub fn begin_frame(&mut self, frame_index: u64, mut on_destroy_resource: impl FnMut(&Res)) {
crate::profile_function!();
self.current_frame_index = frame_index;
let state = self.state.get_mut();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ impl GpuRenderPipelinePool {
shader_modules: &mut GpuShaderModulePool,
pipeline_layouts: &mut GpuPipelineLayoutPool,
) {
crate::profile_function!();
self.pool.current_frame_index = frame_index;

// Recompile render pipelines referencing shader modules that have been recompiled this frame.
Expand Down
3 changes: 3 additions & 0 deletions crates/re_renderer/src/wgpu_resources/static_resource_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ where
creation_func: F,
) -> Handle {
*self.lookup.entry(desc.clone()).or_insert_with(|| {
crate::profile_scope!("Creating new static resource", std::any::type_name::<Res>());
re_log::debug!(?desc, "Created new resource");
let resource = creation_func(desc);
self.resources.insert(StoredResource {
Expand All @@ -64,6 +65,8 @@ where
}

pub fn recreate_resources<F: FnMut(&Desc) -> Option<Res>>(&mut self, mut recreation_func: F) {
crate::profile_function!();

for (desc, handle) in &self.lookup {
if let Some(new_resource) = recreation_func(desc) {
let resource = self.resources.get_mut(*handle).unwrap();
Expand Down

1 comment on commit 90f583e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust Benchmark

Benchmark suite Current: 90f583e Previous: fd5d391 Ratio
datastore/insert/batch/rects/insert 552534 ns/iter (± 1423) 565171 ns/iter (± 6341) 0.98
datastore/latest_at/batch/rects/query 1829 ns/iter (± 13) 1827 ns/iter (± 13) 1.00
datastore/latest_at/missing_components/primary 285 ns/iter (± 0) 285 ns/iter (± 3) 1
datastore/latest_at/missing_components/secondaries 431 ns/iter (± 1) 422 ns/iter (± 7) 1.02
datastore/range/batch/rects/query 153436 ns/iter (± 214) 148603 ns/iter (± 1587) 1.03
mono_points_arrow/generate_message_bundles 49693844 ns/iter (± 1067596) 44137625 ns/iter (± 1130516) 1.13
mono_points_arrow/generate_messages 135740691 ns/iter (± 1216667) 123837354 ns/iter (± 1240130) 1.10
mono_points_arrow/encode_log_msg 164144726 ns/iter (± 935636) 152057320 ns/iter (± 991182) 1.08
mono_points_arrow/encode_total 353332279 ns/iter (± 1567920) 321410881 ns/iter (± 2401484) 1.10
mono_points_arrow/decode_log_msg 187175579 ns/iter (± 877284) 173139255 ns/iter (± 1274897) 1.08
mono_points_arrow/decode_message_bundles 72762833 ns/iter (± 1060684) 63698857 ns/iter (± 978645) 1.14
mono_points_arrow/decode_total 256330976 ns/iter (± 1971975) 233948901 ns/iter (± 1720292) 1.10
batch_points_arrow/generate_message_bundles 331425 ns/iter (± 1759) 328390 ns/iter (± 3067) 1.01
batch_points_arrow/generate_messages 6271 ns/iter (± 109) 6126 ns/iter (± 86) 1.02
batch_points_arrow/encode_log_msg 365537 ns/iter (± 1261) 366279 ns/iter (± 3019) 1.00
batch_points_arrow/encode_total 714046 ns/iter (± 2532) 721958 ns/iter (± 6434) 0.99
batch_points_arrow/decode_log_msg 349382 ns/iter (± 1704) 344186 ns/iter (± 2688) 1.02
batch_points_arrow/decode_message_bundles 2111 ns/iter (± 5) 2065 ns/iter (± 27) 1.02
batch_points_arrow/decode_total 355057 ns/iter (± 1096) 350246 ns/iter (± 1992) 1.01
arrow_mono_points/insert 6840618970 ns/iter (± 24686392) 6038990508 ns/iter (± 16454856) 1.13
arrow_mono_points/query 1758997 ns/iter (± 9322) 1731451 ns/iter (± 19047) 1.02
arrow_batch_points/insert 2605055 ns/iter (± 11034) 2628225 ns/iter (± 23548) 0.99
arrow_batch_points/query 16927 ns/iter (± 71) 16658 ns/iter (± 204) 1.02
arrow_batch_vecs/insert 42747 ns/iter (± 106) 42204 ns/iter (± 388) 1.01
arrow_batch_vecs/query 506043 ns/iter (± 398) 502909 ns/iter (± 4868) 1.01
tuid/Tuid::random 34 ns/iter (± 0) 34 ns/iter (± 0) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.