Lift point cloud size limitations (#5192)

### What * First half of #3076 Removes the old limitation of 4mio points caused by fixed "data texture" size. We now allocate dynamically for the needed amount of points. This means that we use less memory when there's less points and support as many points in a single `DrawData` as the maximum texture size. This is at least 16.5mio points on mobile webgl (4096 is a common max texture size on Android) and about 265mio points on a typical desktop machine & WebGPU (common max texture size is 16k). With this change we share a single `DrawData` per visualizer execution, meaning the limit applies to the total of each 2D & 3D points in a single space view. ![image](https://github.com/rerun-io/rerun/assets/1220815/d0335a0c-d112-4067-a2ff-1f4f870eeed9) Scene with 35 mio points. Renders on my desktop with a bit under 300ms per frame since we still re-upload data every frame. Note that this change is also the first step towards secondary caching, i.e. **not** re-uploading (and preparing) all the data for the gpu every frame: we can now use as many independent point cloud draw data as we want at a relatively small allocation & bind group setting overhead without having to fear excessive memory use. (I haven't measured, but the overhead for new draw data shouldn't be entirely insignificant, which is why I still keep the number down in this PR, putting the results an entire visualizer a single one) Line renderer limitations will be addressed in a follow-up PR. ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using newly built examples: [app.rerun.io](https://app.rerun.io/pr/5192/index.html) * Using examples from latest `main` build: [app.rerun.io](https://app.rerun.io/pr/5192/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [app.rerun.io](https://app.rerun.io/pr/5192/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! - [PR Build Summary](https://build.rerun.io/pr/5192) - [Docs preview](https://rerun.io/preview/d390ba812bf94dec252899b499ec68cfe2708268/docs)  - [Examples preview](https://rerun.io/preview/d390ba812bf94dec252899b499ec68cfe2708268/examples)  - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)
rerun-io · Feb 14, 2024 · 21c5bd6 · 21c5bd6
1 parent de69361
commit 21c5bd6
Show file tree

Hide file tree

Showing 19 changed files with 400 additions and 202 deletions.
diff --git a/crates/re_renderer/shader/point_cloud.wgsl b/crates/re_renderer/shader/point_cloud.wgsl
@@ -40,8 +40,6 @@ var<uniform> batch: BatchUniformBuffer;
 const FLAG_ENABLE_SHADING: u32 = 1u;
 const FLAG_DRAW_AS_CIRCLES: u32 = 2u;
 
-const TEXTURE_SIZE: u32 = 2048u;
-
 struct VertexOut {
     @builtin(position)
     position: vec4f,
@@ -77,16 +75,24 @@ struct PointData {
 
 // Read and unpack data at a given location
 fn read_data(idx: u32) -> PointData {
-    let coord = vec2u(idx % TEXTURE_SIZE, idx / TEXTURE_SIZE);
-    let position_data = textureLoad(position_data_texture, coord, 0);
-    let color = textureLoad(color_texture, coord, 0);
+    let position_data_texture_size = textureDimensions(position_data_texture);
+    let position_data = textureLoad(position_data_texture,
+         vec2u(idx % position_data_texture_size.x, idx / position_data_texture_size.x), 0);
+
+    let color_texture_size = textureDimensions(color_texture);
+    let color = textureLoad(color_texture,
+         vec2u(idx % color_texture_size.x, idx / color_texture_size.x), 0);
+
+    let picking_instance_id_texture_size = textureDimensions(picking_instance_id_texture);
+    let picking_instance_id = textureLoad(picking_instance_id_texture,
+         vec2u(idx % picking_instance_id_texture_size.x, idx / picking_instance_id_texture_size.x), 0).xy;
 
     var data: PointData;
     let pos_4d = batch.world_from_obj * vec4f(position_data.xyz, 1.0);
     data.pos = pos_4d.xyz / pos_4d.w;
     data.unresolved_radius = position_data.w;
     data.color = color;
-    data.picking_instance_id = textureLoad(picking_instance_id_texture, coord, 0).rg;
+    data.picking_instance_id = picking_instance_id;
     return data;
 }
 

diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs
@@ -2,16 +2,22 @@ use std::sync::mpsc;
 
 use crate::{
     texture_info::Texture2DBufferInfo,
-    wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool},
+    wgpu_resources::{BufferDesc, GpuBuffer, GpuBufferPool, GpuTexture},
 };
 
 #[derive(thiserror::Error, Debug, PartialEq, Eq)]
 pub enum CpuWriteGpuReadError {
     #[error("Attempting to allocate an empty buffer.")]
     ZeroSizeBufferAllocation,
 
-    #[error("Buffer is full, can't append more data! Buffer has capacity for {buffer_element_capacity} elements.")]
-    BufferFull { buffer_element_capacity: usize },
+    #[error("Buffer is full, can't append more data!
+ Buffer contains {buffer_element_size} elements and has a capacity for {buffer_element_capacity} elements.
+ Tried to add {num_elements_attempted_to_add} elements.")]
+    BufferFull {
+        buffer_element_capacity: usize,
+        buffer_element_size: usize,
+        num_elements_attempted_to_add: usize,
+    },
 
     #[error("Target buffer has a size of {target_buffer_size}, can't write {copy_size} bytes with an offset of {destination_offset}!")]
     TargetBufferTooSmall {
@@ -20,9 +26,9 @@ pub enum CpuWriteGpuReadError {
         destination_offset: u64,
     },
 
-    #[error("Target texture doesn't fit the size of the written data to this buffer! Texture size: {target_texture_size} bytes, written data size: {written_data_size} bytes")]
+    #[error("Target texture doesn't fit the size of the written data to this buffer! Texture copy size: {copy_size:?} bytes, written data size: {written_data_size} bytes")]
     TargetTextureBufferSizeMismatch {
-        target_texture_size: u64,
+        copy_size: wgpu::Extent3d,
         written_data_size: usize,
     },
 }
@@ -95,6 +101,8 @@ where
             (
                 Err(CpuWriteGpuReadError::BufferFull {
                     buffer_element_capacity: self.capacity(),
+                    buffer_element_size: self.num_written(),
+                    num_elements_attempted_to_add: elements.len(),
                 }),
                 &elements[..self.remaining_capacity()],
             )
@@ -133,6 +141,8 @@ where
                 if self.unwritten_element_range.start >= self.unwritten_element_range.end {
                     return Err(CpuWriteGpuReadError::BufferFull {
                         buffer_element_capacity: self.capacity(),
+                        buffer_element_size: self.num_written(),
+                        num_elements_attempted_to_add: 1,
                     });
                 }
 
@@ -154,6 +164,8 @@ where
             (
                 Err(CpuWriteGpuReadError::BufferFull {
                     buffer_element_capacity: self.capacity(),
+                    buffer_element_size: self.num_written(),
+                    num_elements_attempted_to_add: num_elements,
                 }),
                 self.remaining_capacity(),
             )
@@ -183,6 +195,8 @@ where
         if self.remaining_capacity() == 0 {
             return Err(CpuWriteGpuReadError::BufferFull {
                 buffer_element_capacity: self.capacity(),
+                buffer_element_size: self.num_written(),
+                num_elements_attempted_to_add: 1,
             });
         }
 
@@ -210,6 +224,29 @@ where
         self.unwritten_element_range.end
     }
 
+    /// Copies all so far written data to the first layer of a 2d texture.
+    ///
+    /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
+    /// (taking into account required padding as specified by [`wgpu::COPY_BYTES_PER_ROW_ALIGNMENT`])
+    ///
+    /// Fails if the buffer size is not sufficient to fill the entire texture.
+    pub fn copy_to_texture2d_entire_first_layer(
+        self,
+        encoder: &mut wgpu::CommandEncoder,
+        destination: &GpuTexture,
+    ) -> Result<(), CpuWriteGpuReadError> {
+        self.copy_to_texture2d(
+            encoder,
+            wgpu::ImageCopyTexture {
+                texture: &destination.texture,
+                mip_level: 0,
+                origin: wgpu::Origin3d::ZERO,
+                aspect: wgpu::TextureAspect::All,
+            },
+            destination.texture.size(),
+        )
+    }
+
     /// Copies all so far written data to a rectangle on a single 2d texture layer.
     ///
     /// Assumes that the buffer consists of as-tightly-packed-as-possible rows of data.
@@ -221,17 +258,17 @@ where
         self,
         encoder: &mut wgpu::CommandEncoder,
         destination: wgpu::ImageCopyTexture<'_>,
-        copy_extent: glam::UVec2,
+        copy_size: wgpu::Extent3d,
     ) -> Result<(), CpuWriteGpuReadError> {
-        let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_extent);
+        let buffer_info = Texture2DBufferInfo::new(destination.texture.format(), copy_size);
 
         // Validate that we stay within the written part of the slice (wgpu can't fully know our intention here, so we have to check).
         // We go one step further and require the size to be exactly equal - it's too unlikely that you wrote more than is needed!
         // (and if you did you probably have regrets anyways!)
         if buffer_info.buffer_size_padded as usize != self.num_written() * std::mem::size_of::<T>()
         {
             return Err(CpuWriteGpuReadError::TargetTextureBufferSizeMismatch {
-                target_texture_size: buffer_info.buffer_size_padded,
+                copy_size,
                 written_data_size: self.num_written() * std::mem::size_of::<T>(),
             });
         }
@@ -246,11 +283,7 @@ where
                 },
             },
             destination,
-            wgpu::Extent3d {
-                width: copy_extent.x,
-                height: copy_extent.y,
-                depth_or_array_layers: 1,
-            },
+            copy_size,
         );
 
         Ok(())

diff --git a/crates/re_renderer/src/allocator/gpu_readback_belt.rs b/crates/re_renderer/src/allocator/gpu_readback_belt.rs
@@ -42,7 +42,7 @@ impl GpuReadbackBuffer {
         &mut self,
         encoder: &mut wgpu::CommandEncoder,
         source: wgpu::ImageCopyTexture<'_>,
-        copy_extents: glam::UVec2,
+        copy_extents: wgpu::Extent3d,
     ) -> Result<(), GpuReadbackError> {
         self.read_multiple_texture2d(encoder, &[(source, copy_extents)])
     }
@@ -60,7 +60,7 @@ impl GpuReadbackBuffer {
     pub fn read_multiple_texture2d(
         &mut self,
         encoder: &mut wgpu::CommandEncoder,
-        sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, glam::UVec2)],
+        sources_and_extents: &[(wgpu::ImageCopyTexture<'_>, wgpu::Extent3d)],
     ) -> Result<(), GpuReadbackError> {
         for (source, copy_extents) in sources_and_extents {
             let start_offset = wgpu::util::align_to(
@@ -92,11 +92,7 @@ impl GpuReadbackBuffer {
                         rows_per_image: None,
                     },
                 },
-                wgpu::Extent3d {
-                    width: copy_extents.x,
-                    height: copy_extents.y,
-                    depth_or_array_layers: 1,
-                },
+                *copy_extents,
             );
 
             self.range_in_chunk =

diff --git a/crates/re_renderer/src/draw_phases/picking_layer.rs b/crates/re_renderer/src/draw_phases/picking_layer.rs
@@ -261,14 +261,15 @@ impl PickingLayerProcessor {
             frame_uniform_buffer,
         );
 
-        let row_info_id = Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.extent);
+        let row_info_id =
+            Texture2DBufferInfo::new(Self::PICKING_LAYER_FORMAT, picking_rect.wgpu_extent());
         let row_info_depth = Texture2DBufferInfo::new(
             if direct_depth_readback {
                 Self::PICKING_LAYER_DEPTH_FORMAT
             } else {
                 DepthReadbackWorkaround::READBACK_FORMAT
             },
-            picking_rect.extent,
+            picking_rect.wgpu_extent(),
         );
 
         // Offset of the depth buffer in the readback buffer needs to be aligned to size of a depth pixel.
@@ -343,10 +344,7 @@ impl PickingLayerProcessor {
         encoder: &mut wgpu::CommandEncoder,
         render_pipelines: &GpuRenderPipelinePoolAccessor<'_>,
     ) -> Result<(), PickingLayerError> {
-        let extent = glam::uvec2(
-            self.picking_target.texture.width(),
-            self.picking_target.texture.height(),
-        );
+        let extent = self.picking_target.texture.size();
 
         let readable_depth_texture =
             if let Some(depth_copy_workaround) = self.depth_readback_workaround.as_ref() {
@@ -420,15 +418,15 @@ impl PickingLayerProcessor {
 
                 let buffer_info_id = Texture2DBufferInfo::new(
                     Self::PICKING_LAYER_FORMAT,
-                    metadata.picking_rect.extent,
+                    metadata.picking_rect.wgpu_extent(),
                 );
                 let buffer_info_depth = Texture2DBufferInfo::new(
                     if metadata.depth_readback_workaround_in_use {
                         DepthReadbackWorkaround::READBACK_FORMAT
                     } else {
                         Self::PICKING_LAYER_DEPTH_FORMAT
                     },
-                    metadata.picking_rect.extent,
+                    metadata.picking_rect.wgpu_extent(),
                 );
 
                 let picking_id_data = buffer_info_id

diff --git a/crates/re_renderer/src/draw_phases/screenshot.rs b/crates/re_renderer/src/draw_phases/screenshot.rs
@@ -21,7 +21,7 @@ use crate::{
 
 /// Type used as user data on the gpu readback belt.
 struct ReadbackBeltMetadata<T: 'static + Send + Sync> {
-    extent: glam::UVec2,
+    extent: wgpu::Extent3d,
     user_data: T,
 }
 
@@ -41,14 +41,19 @@ impl ScreenshotProcessor {
         readback_identifier: GpuReadbackIdentifier,
         readback_user_data: T,
     ) -> Self {
-        let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, resolution);
+        let size = wgpu::Extent3d {
+            width: resolution.x,
+            height: resolution.y,
+            depth_or_array_layers: 1,
+        };
+        let buffer_info = Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, size);
         let screenshot_readback_buffer = Mutex::new(ctx.gpu_readback_belt.lock().allocate(
             &ctx.device,
             &ctx.gpu_resources.buffers,
             buffer_info.buffer_size_padded,
             readback_identifier,
             Box::new(ReadbackBeltMetadata {
-                extent: resolution,
+                extent: size,
                 user_data: readback_user_data,
             }),
         ));
@@ -57,11 +62,7 @@ impl ScreenshotProcessor {
             &ctx.device,
             &TextureDesc {
                 label: format!("{view_name} - ScreenshotProcessor").into(),
-                size: wgpu::Extent3d {
-                    width: resolution.x,
-                    height: resolution.y,
-                    depth_or_array_layers: 1,
-                },
+                size,
                 mip_level_count: 1,
                 sample_count: 1,
                 dimension: wgpu::TextureDimension::D2,
@@ -113,10 +114,7 @@ impl ScreenshotProcessor {
                 origin: wgpu::Origin3d::ZERO,
                 aspect: wgpu::TextureAspect::All,
             },
-            glam::uvec2(
-                self.screenshot_texture.texture.width(),
-                self.screenshot_texture.texture.height(),
-            ),
+            self.screenshot_texture.texture.size(),
         )
     }
 
@@ -141,7 +139,11 @@ impl ScreenshotProcessor {
                 let buffer_info =
                     Texture2DBufferInfo::new(Self::SCREENSHOT_COLOR_FORMAT, metadata.extent);
                 let texture_data = buffer_info.remove_padding(data);
-                on_screenshot(&texture_data, metadata.extent, metadata.user_data);
+                on_screenshot(
+                    &texture_data,
+                    glam::uvec2(metadata.extent.width, metadata.extent.height),
+                    metadata.user_data,
+                );
             });
         screenshot_was_available
     }