Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve dealing with raw buffers for texture read/write #1744

Merged
merged 2 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{num::NonZeroU32, sync::mpsc};

use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo};
use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo};

/// A sub-allocated staging buffer that can be written to.
///
Expand Down Expand Up @@ -100,15 +100,13 @@ where
destination: wgpu::ImageCopyTexture<'_>,
copy_extent: glam::UVec2,
) {
let bytes_per_row = TextureRowDataInfo::new(destination.texture.format(), copy_extent.x)
.bytes_per_row_padded;
let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent);

// Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
// We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
// (and if you did you probably have regrets anyways!)
let required_buffer_size = bytes_per_row * copy_extent.y;
debug_assert_eq!(
required_buffer_size as usize,
buffer_info.buffer_size_padded as usize,
self.num_written() * std::mem::size_of::<T>()
);

Expand All @@ -117,7 +115,7 @@ where
buffer: &self.chunk_buffer,
layout: wgpu::ImageDataLayout {
offset: self.byte_offset_in_chunk_buffer,
bytes_per_row: NonZeroU32::new(bytes_per_row),
bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded),
rows_per_image: None,
},
},
Expand Down
10 changes: 4 additions & 6 deletions crates/re_renderer/src/allocator/gpu_readback_belt.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{num::NonZeroU32, ops::Range, sync::mpsc};

use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, TextureRowDataInfo};
use crate::wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, Texture2DBufferInfo};

/// Identifier used to identify a buffer upon retrieval of the data.
///
Expand Down Expand Up @@ -61,13 +61,11 @@ impl GpuReadbackBuffer {
source.texture.format().describe().block_size as u64,
);

let bytes_per_row = TextureRowDataInfo::new(source.texture.format(), copy_extents.x)
.bytes_per_row_padded;
let num_bytes = bytes_per_row * copy_extents.y;
let buffer_info = Texture2DBufferInfo::new(source.texture.format(), *copy_extents);

// Validate that stay within the slice (wgpu can't fully know our intention here, so we have to check).
debug_assert!(
(num_bytes as u64) <= self.range_in_chunk.end - start_offset,
buffer_info.buffer_size_padded <= self.range_in_chunk.end - start_offset,
"Texture data is too large to fit into the readback buffer!"
);

Expand All @@ -77,7 +75,7 @@ impl GpuReadbackBuffer {
buffer: &self.chunk_buffer,
layout: wgpu::ImageDataLayout {
offset: start_offset,
bytes_per_row: NonZeroU32::new(bytes_per_row),
bytes_per_row: NonZeroU32::new(buffer_info.bytes_per_row_padded),
rows_per_image: None,
},
},
Expand Down
44 changes: 18 additions & 26 deletions crates/re_renderer/src/draw_phases/picking_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::{
allocator::create_and_fill_uniform_buffer,
global_bindings::FrameUniformBuffer,
view_builder::ViewBuilder,
wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc, TextureRowDataInfo},
wgpu_resources::{GpuBindGroup, GpuTexture, Texture2DBufferInfo, TextureDesc},
DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, IntRect, RenderContext,
};

Expand Down Expand Up @@ -122,12 +122,23 @@ impl PickingLayerProcessor {
readback_identifier: GpuReadbackIdentifier,
readback_user_data: T,
) -> Self {
let row_info = TextureRowDataInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.width());
let buffer_size = row_info.bytes_per_row_padded * picking_rect.height();
let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
//let row_info_depth =
//Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
Comment on lines +126 to +127
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is upcoming work leaking in here a little bit. But I tested this PR as is on the web :)


// Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
// This is "trivially true" if the size of the depth format is a multiple of the size of the id format.
debug_assert!(
Self::PICKING_LAYER_FORMAT.describe().block_size
% Self::PICKING_LAYER_DEPTH_FORMAT.describe().block_size
== 0
);
let buffer_size = row_info_id.buffer_size_padded; // + row_info_depth.buffer_size_padded;

let readback_buffer = ctx.gpu_readback_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
buffer_size as u64,
buffer_size,
readback_identifier,
Box::new(ReadbackBeltMetadata {
picking_rect,
Expand Down Expand Up @@ -279,31 +290,12 @@ impl PickingLayerProcessor {
ctx.gpu_readback_belt
.lock()
.readback_data::<ReadbackBeltMetadata<T>>(identifier, |data, metadata| {
// Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned,
// so much, that we can't interpret it just as `PickingLayerId`.
// Therefore, we have to do a copy of the data regardless.
let row_info = TextureRowDataInfo::new(
let buffer_info_id = Texture2DBufferInfo::new(
Self::PICKING_LAYER_FORMAT,
metadata.picking_rect.extent.x,
metadata.picking_rect.extent,
);

// Copies need to use [u8] because of aforementioned alignment issues.
let mut picking_data = vec![
PickingLayerId::default();
(metadata.picking_rect.extent.x * metadata.picking_rect.extent.y)
as usize
];
let picking_data_as_u8 = bytemuck::cast_slice_mut(&mut picking_data);
for row in 0..metadata.picking_rect.extent.y {
let offset_padded = (row_info.bytes_per_row_padded * row) as usize;
let offset_unpadded = (row_info.bytes_per_row_unpadded * row) as usize;
picking_data_as_u8[offset_unpadded
..(offset_unpadded + row_info.bytes_per_row_unpadded as usize)]
.copy_from_slice(
&data[offset_padded
..(offset_padded + row_info.bytes_per_row_unpadded as usize)],
);
}
let picking_data = buffer_info_id.remove_padding_and_convert(data);

result = Some(PickingResult {
picking_data,
Expand Down
13 changes: 6 additions & 7 deletions crates/re_renderer/src/draw_phases/screenshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//! Or alternatively try to render the images in several tiles 🤔. In any case this would greatly improve quality!

use crate::{
wgpu_resources::{GpuTexture, TextureDesc, TextureRowDataInfo},
wgpu_resources::{GpuTexture, Texture2DBufferInfo, TextureDesc},
DebugLabel, GpuReadbackBuffer, GpuReadbackIdentifier, RenderContext,
};

Expand All @@ -37,12 +37,11 @@ impl ScreenshotProcessor {
readback_identifier: GpuReadbackIdentifier,
readback_user_data: T,
) -> Self {
let row_info = TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution.x);
let buffer_size = row_info.bytes_per_row_padded * resolution.y;
let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution);
let screenshot_readback_buffer = ctx.gpu_readback_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
buffer_size as u64,
buffer_info.buffer_size_padded,
readback_identifier,
Box::new(ReadbackBeltMetadata {
extent: resolution,
Expand Down Expand Up @@ -130,9 +129,9 @@ impl ScreenshotProcessor {
.lock()
.readback_data::<ReadbackBeltMetadata<T>>(identifier, |data: &[u8], metadata| {
screenshot_was_available = Some(());
let texture_row_info =
TextureRowDataInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent.x);
let texture_data = texture_row_info.remove_padding(data);
let buffer_info =
Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent);
let texture_data = buffer_info.remove_padding(data);
on_screenshot(&texture_data, metadata.extent, metadata.user_data);
});
screenshot_was_available
Expand Down
21 changes: 10 additions & 11 deletions crates/re_renderer/src/renderer/depth_cloud.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use crate::{
view_builder::ViewBuilder,
wgpu_resources::{
BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle,
GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc, TextureDesc,
TextureRowDataInfo,
GpuRenderPipelineHandle, GpuTexture, PipelineLayoutDesc, RenderPipelineDesc,
Texture2DBufferInfo, TextureDesc,
},
ColorMap, OutlineMaskPreference, PickingLayerProcessor,
};
Expand Down Expand Up @@ -336,34 +336,33 @@ fn create_and_upload_texture<T: bytemuck::Pod>(
.textures
.alloc(&ctx.device, &depth_texture_desc);

let TextureRowDataInfo {
bytes_per_row_unpadded: bytes_per_row_unaligned,
bytes_per_row_padded,
} = TextureRowDataInfo::new(depth_texture_desc.format, depth_texture_desc.size.width);

// Not supporting compressed formats here.
debug_assert!(depth_texture_desc.format.describe().block_dimensions == (1, 1));

let buffer_info =
Texture2DBufferInfo::new(depth_texture_desc.format, depth_cloud.depth_dimensions);

// TODO(andreas): CpuGpuWriteBelt should make it easier to do this.
let bytes_padding_per_row = (bytes_per_row_padded - bytes_per_row_unaligned) as usize;
let bytes_padding_per_row =
(buffer_info.bytes_per_row_padded - buffer_info.bytes_per_row_unpadded) as usize;
// Sanity check the padding size. If this happens something is seriously wrong, as it would imply
// that we can't express the required alignment with the block size.
debug_assert!(
bytes_padding_per_row % std::mem::size_of::<T>() == 0,
"Padding is not a multiple of pixel size. Can't correctly pad the texture data"
);
let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::<T>();

let mut depth_texture_staging = ctx.cpu_write_gpu_read_belt.lock().allocate::<T>(
&ctx.device,
&ctx.gpu_resources.buffers,
data.len() + num_pixel_padding_per_row * depth_texture_desc.size.height as usize,
buffer_info.buffer_size_padded as usize / std::mem::size_of::<T>(),
);

// Fill with a single copy if possible, otherwise do multiple, filling in padding.
if num_pixel_padding_per_row == 0 {
if bytes_padding_per_row == 0 {
depth_texture_staging.extend_from_slice(data);
} else {
let num_pixel_padding_per_row = bytes_padding_per_row / std::mem::size_of::<T>();
for row in data.chunks(depth_texture_desc.size.width as usize) {
depth_texture_staging.extend_from_slice(row);
depth_texture_staging
Expand Down
91 changes: 75 additions & 16 deletions crates/re_renderer/src/wgpu_resources/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,44 +117,67 @@ impl WgpuResourcePools {
}
}

/// Utility for dealing with rows of raw texture data.
#[derive(Clone, Copy)]
pub struct TextureRowDataInfo {
/// Utility for dealing with buffers containing raw 2D texture data.
#[derive(Clone)]
pub struct Texture2DBufferInfo {
/// How many bytes per row contain actual data.
pub bytes_per_row_unpadded: u32,

/// How many bytes per row are required to be allocated in total.
///
/// Padding bytes are always at the end of a row.
pub bytes_per_row_padded: u32,

/// Size required for an unpadded buffer.
pub buffer_size_unpadded: wgpu::BufferAddress,

/// Size required for a padded buffer as it is read/written from/to the GPU.
pub buffer_size_padded: wgpu::BufferAddress,
}

impl TextureRowDataInfo {
pub fn new(format: wgpu::TextureFormat, width: u32) -> Self {
impl Texture2DBufferInfo {
#[inline]
pub fn new(format: wgpu::TextureFormat, extent: glam::UVec2) -> Self {
let format_info = format.describe();
let width_blocks = width / format_info.block_dimensions.0 as u32;
let bytes_per_row_unaligned = width_blocks * format_info.block_size as u32;

let width_blocks = extent.x / format_info.block_dimensions.0 as u32;
let height_blocks = extent.y / format_info.block_dimensions.1 as u32;

let bytes_per_row_unpadded = width_blocks * format_info.block_size as u32;
let bytes_per_row_padded =
wgpu::util::align_to(bytes_per_row_unpadded, wgpu::COPY_BYTES_PER_ROW_ALIGNMENT);

Self {
bytes_per_row_unpadded: bytes_per_row_unaligned,
bytes_per_row_padded: wgpu::util::align_to(
bytes_per_row_unaligned,
wgpu::COPY_BYTES_PER_ROW_ALIGNMENT,
),
bytes_per_row_unpadded,
bytes_per_row_padded,
buffer_size_unpadded: (bytes_per_row_unpadded * height_blocks) as wgpu::BufferAddress,
buffer_size_padded: (bytes_per_row_padded * height_blocks) as wgpu::BufferAddress,
}
}

#[inline]
pub fn num_rows(&self) -> u32 {
self.buffer_size_padded as u32 / self.bytes_per_row_padded
}

/// Removes the padding from a buffer containing gpu texture data.
///
/// The buffer have the expected size for a padded buffer to hold the texture data.
Wumpf marked this conversation as resolved.
Show resolved Hide resolved
///
/// Note that if you're passing in gpu data, there no alignment guarantees on the returned slice,
/// do NOT convert it using [`bytemuck`]. Use [`Texture2DBufferInfo::remove_padding_and_convert`] instead.
pub fn remove_padding<'a>(&self, buffer: &'a [u8]) -> Cow<'a, [u8]> {
crate::profile_function!();

assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);

if self.bytes_per_row_padded == self.bytes_per_row_unpadded {
return Cow::Borrowed(buffer);
}

let height = (buffer.len() as u32) / self.bytes_per_row_padded;
let mut unpadded_buffer =
Vec::with_capacity((self.bytes_per_row_unpadded * height) as usize);
let mut unpadded_buffer = Vec::with_capacity(self.buffer_size_unpadded as _);

for row in 0..height {
for row in 0..self.num_rows() {
let offset = (self.bytes_per_row_padded * row) as usize;
unpadded_buffer.extend_from_slice(
&buffer[offset..(offset + self.bytes_per_row_unpadded as usize)],
Expand All @@ -163,4 +186,40 @@ impl TextureRowDataInfo {

unpadded_buffer.into()
}

/// Removes the padding from a buffer containing gpu texture data and remove convert to a given type.
///
/// The buffer have the expected size for a padded buffer to hold the texture data.
Wumpf marked this conversation as resolved.
Show resolved Hide resolved
///
/// The unpadded row size is expected to be a multiple of the size of the target type.
/// (Which means that, while uncommon, it technically doesn't need to be as big as a block in the pixel - this can be useful for e.g. packing wide bitfields)
pub fn remove_padding_and_convert<T: bytemuck::Pod>(&self, buffer: &[u8]) -> Vec<T> {
crate::profile_function!();

assert!(buffer.len() as wgpu::BufferAddress == self.buffer_size_padded);
assert!(self.bytes_per_row_unpadded % std::mem::size_of::<T>() as u32 == 0);

// Due to https://github.com/gfx-rs/wgpu/issues/3508 the data might be completely unaligned,
// so much, that we can't even interpret it as e.g. a u32 slice.
// Therefore, we have to do a copy of the data regardless of whether it's padded or not.

let mut unpadded_buffer: Vec<T> = vec![
T::zeroed();
(self.num_rows() * self.bytes_per_row_unpadded / std::mem::size_of::<T>() as u32)
as usize
]; // Consider using unsafe set_len() instead of vec![] to avoid zeroing the memory.
Wumpf marked this conversation as resolved.
Show resolved Hide resolved
let unpaadded_buffer_u8 = bytemuck::cast_slice_mut(&mut unpadded_buffer);
Wumpf marked this conversation as resolved.
Show resolved Hide resolved

for row in 0..self.num_rows() {
let offset_padded = (self.bytes_per_row_padded * row) as usize;
let offset_unpadded = (self.bytes_per_row_unpadded * row) as usize;
unpaadded_buffer_u8
[offset_unpadded..(offset_unpadded + self.bytes_per_row_unpadded as usize)]
.copy_from_slice(
&buffer[offset_padded..(offset_padded + self.bytes_per_row_unpadded as usize)],
);
}

unpadded_buffer
}
}