Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CpuWriteGpuReadBelt texture copies easier/less error prone #1689

Merged
merged 3 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{num::NonZeroU32, ops::DerefMut, sync::mpsc};

use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool};
use crate::wgpu_resources::{texture_row_data_info, BufferDesc, GpuBuffer, GpuBufferPool};

/// A sub-allocated staging buffer that can be written to.
///
Expand Down Expand Up @@ -85,26 +85,46 @@ where
self.unwritten_element_range.start
}

/// Copies the entire buffer to a texture and drops it.
pub fn copy_to_texture(
/// Copies all so far written data to a rectangle on a single 2d texture layer.
///
/// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
/// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`])
///
/// Implementation note:
/// Does 2D-only entirely for convenience as it greatly simplifies the input parameters.
pub fn copy_to_texture2d(
self,
encoder: &mut wgpu::CommandEncoder,
destination: wgpu::ImageCopyTexture<'_>,
bytes_per_row: Option<NonZeroU32>,
rows_per_image: Option<NonZeroU32>,
copy_size: wgpu::Extent3d,
copy_extent: glam::UVec2,
) {
let bytes_per_row =
texture_row_data_info(destination.texture.format(), copy_extent.x).bytes_per_row_padded;

// Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
// We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
// (and if you did you probably have regrets anyways!)
let required_buffer_size = bytes_per_row * copy_extent.y;
debug_assert_eq!(
required_buffer_size as usize,
self.num_written() * std::mem::size_of::<T>()
);

encoder.copy_buffer_to_texture(
wgpu::ImageCopyBuffer {
buffer: &self.chunk_buffer,
layout: wgpu::ImageDataLayout {
offset: self.byte_offset_in_chunk_buffer,
bytes_per_row,
rows_per_image,
bytes_per_row: NonZeroU32::new(bytes_per_row),
rows_per_image: None,
},
},
destination,
copy_size,
wgpu::Extent3d {
width: copy_extent.x,
height: copy_extent.y,
depth_or_array_layers: 1,
},
);
}

Expand Down
7 changes: 2 additions & 5 deletions crates/re_renderer/src/renderer/depth_cloud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
//! behaves pretty much exactly like our point cloud renderer (see [`point_cloud.rs`]).

use smallvec::smallvec;
use std::num::NonZeroU32;

use crate::{
allocator::create_and_fill_uniform_buffer_batch,
Expand Down Expand Up @@ -330,17 +329,15 @@ fn create_and_upload_texture<T: bytemuck::Pod>(
}
}

depth_texture_staging.copy_to_texture(
depth_texture_staging.copy_to_texture2d(
ctx.active_frame.before_view_builder_encoder.lock().get(),
wgpu::ImageCopyTexture {
texture: &depth_texture.inner.texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
Some(NonZeroU32::new(bytes_per_row_padded).expect("invalid bytes per row")),
None,
depth_texture_size,
depth_cloud.depth_dimensions,
);

depth_texture
Expand Down
71 changes: 29 additions & 42 deletions crates/re_renderer/src/renderer/point_cloud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
//! that srgb->linear conversion happens on texture load.
//!

use std::{
num::{NonZeroU32, NonZeroU64},
ops::Range,
};
use std::{num::NonZeroU64, ops::Range};

use crate::{
allocator::create_and_fill_uniform_buffer_batch, renderer::OutlineMaskProcessor, DebugLabel,
Expand All @@ -25,7 +22,6 @@ use crate::{
use bitflags::bitflags;
use bytemuck::Zeroable;
use enumset::{enum_set, EnumSet};
use itertools::Itertools;
use smallvec::smallvec;

use crate::{
Expand Down Expand Up @@ -175,7 +171,7 @@ impl PointCloudDrawData {
/// If no batches are passed, all points are assumed to be in a single batch with identity transform.
pub fn new<T>(
ctx: &mut RenderContext,
builder: PointCloudBuilder<T>,
mut builder: PointCloudBuilder<T>,
) -> Result<Self, PointCloudDrawDataError> {
crate::profile_function!();

Expand Down Expand Up @@ -266,63 +262,54 @@ impl PointCloudDrawData {
},
);

// TODO(andreas): We want a staging-belt(-like) mechanism to upload data instead of the queue.
// These staging buffers would be provided by the belt.
// To make the data upload simpler (and have it be done in one go), we always update full rows of each of our textures
// To make the data upload simpler (and have it be done in copy-operation), we always update full rows of each of our textures
let num_points_written =
wgpu::util::align_to(vertices.len() as u32, DATA_TEXTURE_SIZE) as usize;
let num_points_zeroed = num_points_written - vertices.len();
let position_and_size_staging = {
crate::profile_scope!("collect_pos_size");
vertices
.iter()
.map(|point| gpu_data::PositionData {
pos: point.position,
radius: point.radius,
})
.chain(std::iter::repeat(gpu_data::PositionData::zeroed()).take(num_points_zeroed))
.collect_vec()
};

// Upload data from staging buffers to gpu.
let size = wgpu::Extent3d {
width: DATA_TEXTURE_SIZE,
height: num_points_written as u32 / DATA_TEXTURE_SIZE,
depth_or_array_layers: 1,
};
let num_elements_padding = num_points_written - vertices.len();
let texture_copy_extent = glam::uvec2(
DATA_TEXTURE_SIZE,
num_points_written as u32 / DATA_TEXTURE_SIZE,
);

{
crate::profile_scope!("write_pos_size_texture");
ctx.queue.write_texture(

let mut staging_buffer = ctx.cpu_write_gpu_read_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
num_points_written,
);
staging_buffer.extend(vertices.iter().map(|point| gpu_data::PositionData {
pos: point.position,
radius: point.radius,
}));
staging_buffer.extend(
std::iter::repeat(gpu_data::PositionData::zeroed()).take(num_elements_padding),
);
staging_buffer.copy_to_texture2d(
ctx.active_frame.before_view_builder_encoder.lock().get(),
wgpu::ImageCopyTexture {
texture: &position_data_texture.texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
bytemuck::cast_slice(&position_and_size_staging),
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: NonZeroU32::new(
DATA_TEXTURE_SIZE * std::mem::size_of::<gpu_data::PositionData>() as u32,
),
rows_per_image: None,
},
size,
texture_copy_extent,
);
}

builder.color_buffer.copy_to_texture(
builder
.color_buffer
.extend(std::iter::repeat(ecolor::Color32::TRANSPARENT).take(num_elements_padding));
builder.color_buffer.copy_to_texture2d(
ctx.active_frame.before_view_builder_encoder.lock().get(),
wgpu::ImageCopyTexture {
texture: &color_texture.texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
NonZeroU32::new(DATA_TEXTURE_SIZE * std::mem::size_of::<[u8; 4]>() as u32),
None,
size,
texture_copy_extent,
);

let draw_data_uniform_buffer_bindings = create_and_fill_uniform_buffer_batch(
Expand Down