Skip to content

Commit

Permalink
Lift point cloud size limitations (#5192)
Browse files Browse the repository at this point in the history
### What

* First half of #3076

Removes the old limitation of 4mio points caused by fixed "data texture"
size.

We now allocate dynamically for the needed amount of points. This means
that we use less memory when there's less points and support as many
points in a single `DrawData` as the maximum texture size. This is at
least 16.5mio points on mobile webgl (4096 is a common max texture size
on Android) and about 265mio points on a typical desktop machine &
WebGPU (common max texture size is 16k).
With this change we share a single `DrawData` per visualizer execution,
meaning the limit applies to the total of each 2D & 3D points in a
single space view.


![image](https://github.com/rerun-io/rerun/assets/1220815/d0335a0c-d112-4067-a2ff-1f4f870eeed9)
Scene with 35 mio points. Renders on my desktop with a bit under 300ms
per frame since we still re-upload data every frame.

Note that this change is also the first step towards secondary caching,
i.e. **not** re-uploading (and preparing) all the data for the gpu every
frame: we can now use as many independent point cloud draw data as we
want at a relatively small allocation & bind group setting overhead
without having to fear excessive memory use. (I haven't measured, but
the overhead for new draw data shouldn't be entirely insignificant,
which is why I still keep the number down in this PR, putting the
results an entire visualizer a single one)

Line renderer limitations will be addressed in a follow-up PR.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using newly built examples:
[app.rerun.io](https://app.rerun.io/pr/5192/index.html)
* Using examples from latest `main` build:
[app.rerun.io](https://app.rerun.io/pr/5192/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[app.rerun.io](https://app.rerun.io/pr/5192/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!

- [PR Build Summary](https://build.rerun.io/pr/5192)
- [Docs
preview](https://rerun.io/preview/d390ba812bf94dec252899b499ec68cfe2708268/docs)
<!--DOCS-PREVIEW-->
- [Examples
preview](https://rerun.io/preview/d390ba812bf94dec252899b499ec68cfe2708268/examples)
<!--EXAMPLES-PREVIEW-->
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)
  • Loading branch information
Wumpf authored Feb 14, 2024
1 parent de69361 commit 21c5bd6
Show file tree
Hide file tree
Showing 19 changed files with 400 additions and 202 deletions.
18 changes: 12 additions & 6 deletions crates/re_renderer/shader/point_cloud.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ var<uniform> batch: BatchUniformBuffer;
const FLAG_ENABLE_SHADING: u32 = 1u;
const FLAG_DRAW_AS_CIRCLES: u32 = 2u;

const TEXTURE_SIZE: u32 = 2048u;

struct VertexOut {
@builtin(position)
position: vec4f,
Expand Down Expand Up @@ -77,16 +75,24 @@ struct PointData {

// Read and unpack data at a given location
fn read_data(idx: u32) -> PointData {
let coord = vec2u(idx % TEXTURE_SIZE, idx / TEXTURE_SIZE);
let position_data = textureLoad(position_data_texture, coord, 0);
let color = textureLoad(color_texture, coord, 0);
let position_data_texture_size = textureDimensions(position_data_texture);
let position_data = textureLoad(position_data_texture,
vec2u(idx % position_data_texture_size.x, idx / position_data_texture_size.x), 0);

let color_texture_size = textureDimensions(color_texture);
let color = textureLoad(color_texture,
vec2u(idx % color_texture_size.x, idx / color_texture_size.x), 0);

let picking_instance_id_texture_size = textureDimensions(picking_instance_id_texture);
let picking_instance_id = textureLoad(picking_instance_id_texture,
vec2u(idx % picking_instance_id_texture_size.x, idx / picking_instance_id_texture_size.x), 0).xy;

var data: PointData;
let pos_4d = batch.world_from_obj * vec4f(position_data.xyz, 1.0);
data.pos = pos_4d.xyz / pos_4d.w;
data.unresolved_radius = position_data.w;
data.color = color;
data.picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg;
data.picking_instance_id = picking_instance_id;
return data;
}

Expand Down
59 changes: 46 additions & 13 deletions crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@ use std::sync::mpsc;

use crate::{
texture_info::Texture2DBufferInfo,
wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool},
wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, GpuTexture},
};

#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum CpuWriteGpuReadError {
#[error("Attempting to allocate an empty buffer.")]
ZeroSizeBufferAllocation,

#[error("Buffer is full, can't append more data! Buffer has capacity for {buffer_element_capacity} elements.")]
BufferFull { buffer_element_capacity: usize },
#[error("Buffer is full, can't append more data!
Buffer contains {buffer_element_size} elements and has a capacity for {buffer_element_capacity} elements.
Tried to add {num_elements_attempted_to_add} elements.")]
BufferFull {
buffer_element_capacity: usize,
buffer_element_size: usize,
num_elements_attempted_to_add: usize,
},

#[error("Target buffer has a size of {target_buffer_size}, can't write {copy_size} bytes with an offset of {destination_offset}!")]
TargetBufferTooSmall {
Expand All @@ -20,9 +26,9 @@ pub enum CpuWriteGpuReadError {
destination_offset: u64,
},

#[error("Target texture doesn't fit the size of the written data to this buffer! Texture size: {target_texture_size} bytes, written data size: {written_data_size} bytes")]
#[error("Target texture doesn't fit the size of the written data to this buffer! Texture copy size: {copy_size:?} bytes, written data size: {written_data_size} bytes")]
TargetTextureBufferSizeMismatch {
target_texture_size: u64,
copy_size: wgpu::Extent3d,
written_data_size: usize,
},
}
Expand Down Expand Up @@ -95,6 +101,8 @@ where
(
Err(CpuWriteGpuReadError::BufferFull {
buffer_element_capacity: self.capacity(),
buffer_element_size: self.num_written(),
num_elements_attempted_to_add: elements.len(),
}),
&elements[..self.remaining_capacity()],
)
Expand Down Expand Up @@ -133,6 +141,8 @@ where
if self.unwritten_element_range.start >= self.unwritten_element_range.end {
return Err(CpuWriteGpuReadError::BufferFull {
buffer_element_capacity: self.capacity(),
buffer_element_size: self.num_written(),
num_elements_attempted_to_add: 1,
});
}

Expand All @@ -154,6 +164,8 @@ where
(
Err(CpuWriteGpuReadError::BufferFull {
buffer_element_capacity: self.capacity(),
buffer_element_size: self.num_written(),
num_elements_attempted_to_add: num_elements,
}),
self.remaining_capacity(),
)
Expand Down Expand Up @@ -183,6 +195,8 @@ where
if self.remaining_capacity() == 0 {
return Err(CpuWriteGpuReadError::BufferFull {
buffer_element_capacity: self.capacity(),
buffer_element_size: self.num_written(),
num_elements_attempted_to_add: 1,
});
}

Expand Down Expand Up @@ -210,6 +224,29 @@ where
self.unwritten_element_range.end
}

/// Copies all so far written data to the first layer of a 2d texture.
///
/// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
/// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`])
///
/// Fails if the buffer size is not sufficient to fill the entire texture.
pub fn copy_to_texture2d_entire_first_layer(
self,
encoder: &mut wgpu::CommandEncoder,
destination: &GpuTexture,
) -> Result<(), CpuWriteGpuReadError> {
self.copy_to_texture2d(
encoder,
wgpu::ImageCopyTexture {
texture: &destination.texture,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
destination.texture.size(),
)
}

/// Copies all so far written data to a rectangle on a single 2d texture layer.
///
/// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
Expand All @@ -221,17 +258,17 @@ where
self,
encoder: &mut wgpu::CommandEncoder,
destination: wgpu::ImageCopyTexture<'_>,
copy_extent: glam::UVec2,
copy_size: wgpu::Extent3d,
) -> Result<(), CpuWriteGpuReadError> {
let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent);
let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_size);

// Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
// We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
// (and if you did you probably have regrets anyways!)
if buffer_info.buffer_size_padded as usize != self.num_written() * std::mem::size_of::<T>()
{
return Err(CpuWriteGpuReadError::TargetTextureBufferSizeMismatch {
target_texture_size: buffer_info.buffer_size_padded,
copy_size,
written_data_size: self.num_written() * std::mem::size_of::<T>(),
});
}
Expand All @@ -246,11 +283,7 @@ where
},
},
destination,
wgpu::Extent3d {
width: copy_extent.x,
height: copy_extent.y,
depth_or_array_layers: 1,
},
copy_size,
);

Ok(())
Expand Down
10 changes: 3 additions & 7 deletions crates/re_renderer/src/allocator/gpu_readback_belt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl GpuReadbackBuffer {
&mut self,
encoder: &mut wgpu::CommandEncoder,
source: wgpu::ImageCopyTexture<'_>,
copy_extents: glam::UVec2,
copy_extents: wgpu::Extent3d,
) -> Result<(), GpuReadbackError> {
self.read_multiple_texture2d(encoder, &[(source, copy_extents)])
}
Expand All @@ -60,7 +60,7 @@ impl GpuReadbackBuffer {
pub fn read_multiple_texture2d(
&mut self,
encoder: &mut wgpu::CommandEncoder,
sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, glam::UVec2)],
sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, wgpu::Extent3d)],
) -> Result<(), GpuReadbackError> {
for (source, copy_extents) in sources_and_extents {
let start_offset = wgpu::util::align_to(
Expand Down Expand Up @@ -92,11 +92,7 @@ impl GpuReadbackBuffer {
rows_per_image: None,
},
},
wgpu::Extent3d {
width: copy_extents.x,
height: copy_extents.y,
depth_or_array_layers: 1,
},
*copy_extents,
);

self.range_in_chunk =
Expand Down
14 changes: 6 additions & 8 deletions crates/re_renderer/src/draw_phases/picking_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,15 @@ impl PickingLayerProcessor {
frame_uniform_buffer,
);

let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
let row_info_id =
Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.wgpu_extent());
let row_info_depth = Texture2DBufferInfo::new(
if direct_depth_readback {
Self::PICKING_LAYER_DEPTH_FORMAT
} else {
DepthReadbackWorkaround::READBACK_FORMAT
},
picking_rect.extent,
picking_rect.wgpu_extent(),
);

// Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
Expand Down Expand Up @@ -343,10 +344,7 @@ impl PickingLayerProcessor {
encoder: &mut wgpu::CommandEncoder,
render_pipelines: &GpuRenderPipelinePoolAccessor<'_>,
) -> Result<(), PickingLayerError> {
let extent = glam::uvec2(
self.picking_target.texture.width(),
self.picking_target.texture.height(),
);
let extent = self.picking_target.texture.size();

let readable_depth_texture =
if let Some(depth_copy_workaround) = self.depth_readback_workaround.as_ref() {
Expand Down Expand Up @@ -420,15 +418,15 @@ impl PickingLayerProcessor {

let buffer_info_id = Texture2DBufferInfo::new(
Self::PICKING_LAYER_FORMAT,
metadata.picking_rect.extent,
metadata.picking_rect.wgpu_extent(),
);
let buffer_info_depth = Texture2DBufferInfo::new(
if metadata.depth_readback_workaround_in_use {
DepthReadbackWorkaround::READBACK_FORMAT
} else {
Self::PICKING_LAYER_DEPTH_FORMAT
},
metadata.picking_rect.extent,
metadata.picking_rect.wgpu_extent(),
);

let picking_id_data = buffer_info_id
Expand Down
28 changes: 15 additions & 13 deletions crates/re_renderer/src/draw_phases/screenshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{

/// Type used as user data on the gpu readback belt.
struct ReadbackBeltMetadata<T: 'static + Send + Sync> {
extent: glam::UVec2,
extent: wgpu::Extent3d,
user_data: T,
}

Expand All @@ -41,14 +41,19 @@ impl ScreenshotProcessor {
readback_identifier: GpuReadbackIdentifier,
readback_user_data: T,
) -> Self {
let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution);
let size = wgpu::Extent3d {
width: resolution.x,
height: resolution.y,
depth_or_array_layers: 1,
};
let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, size);
let screenshot_readback_buffer = Mutex::new(ctx.gpu_readback_belt.lock().allocate(
&ctx.device,
&ctx.gpu_resources.buffers,
buffer_info.buffer_size_padded,
readback_identifier,
Box::new(ReadbackBeltMetadata {
extent: resolution,
extent: size,
user_data: readback_user_data,
}),
));
Expand All @@ -57,11 +62,7 @@ impl ScreenshotProcessor {
&ctx.device,
&TextureDesc {
label: format!("{view_name} - ScreenshotProcessor").into(),
size: wgpu::Extent3d {
width: resolution.x,
height: resolution.y,
depth_or_array_layers: 1,
},
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
Expand Down Expand Up @@ -113,10 +114,7 @@ impl ScreenshotProcessor {
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
glam::uvec2(
self.screenshot_texture.texture.width(),
self.screenshot_texture.texture.height(),
),
self.screenshot_texture.texture.size(),
)
}

Expand All @@ -141,7 +139,11 @@ impl ScreenshotProcessor {
let buffer_info =
Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent);
let texture_data = buffer_info.remove_padding(data);
on_screenshot(&texture_data, metadata.extent, metadata.user_data);
on_screenshot(
&texture_data,
glam::uvec2(metadata.extent.width, metadata.extent.height),
metadata.user_data,
);
});
screenshot_was_available
}
Expand Down
Loading

0 comments on commit 21c5bd6

Please sign in to comment.