From 6f75c54d640940ae0a04aa5d074000f17ba57ef2 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 6 Dec 2023 22:12:41 +0100 Subject: [PATCH] [wgpu-hal] Inline RayQuery Support (#3507) Co-authored-by: JMS55 <47158642+JMS55@users.noreply.github.com> Co-authored-by: Ashley Ruglys Co-authored-by: Connor Fitzgerald --- CHANGELOG.md | 14 + Cargo.lock | 1 + wgpu-core/src/binding_model.rs | 1 + wgpu-core/src/device/resource.rs | 2 + wgpu-hal/Cargo.toml | 1 + wgpu-hal/examples/halmark/main.rs | 2 + wgpu-hal/examples/ray-traced-triangle/main.rs | 1113 +++++++++++++++++ .../examples/ray-traced-triangle/shader.wgsl | 37 + wgpu-hal/src/dx12/command.rs | 20 + wgpu-hal/src/dx12/conv.rs | 1 + wgpu-hal/src/dx12/device.rs | 36 + wgpu-hal/src/dx12/mod.rs | 5 + wgpu-hal/src/empty.rs | 36 + wgpu-hal/src/gles/command.rs | 18 + wgpu-hal/src/gles/device.rs | 21 + wgpu-hal/src/gles/mod.rs | 1 + wgpu-hal/src/lib.rs | 178 ++- wgpu-hal/src/metal/command.rs | 18 + wgpu-hal/src/metal/device.rs | 30 + wgpu-hal/src/metal/mod.rs | 5 + wgpu-hal/src/vulkan/adapter.rs | 110 +- wgpu-hal/src/vulkan/command.rs | 237 ++++ wgpu-hal/src/vulkan/conv.rs | 112 ++ wgpu-hal/src/vulkan/device.rs | 283 ++++- wgpu-hal/src/vulkan/mod.rs | 14 + wgpu-types/src/lib.rs | 64 +- wgpu/src/backend/web.rs | 1 + 27 files changed, 2351 insertions(+), 10 deletions(-) create mode 100644 wgpu-hal/examples/ray-traced-triangle/main.rs create mode 100644 wgpu-hal/examples/ray-traced-triangle/shader.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index de9cc2b314..0e03e6c83e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -716,6 +716,20 @@ By @cwfitzgerald in [#3671](https://github.com/gfx-rs/wgpu/pull/3671). - Change type of `bytes_per_row` and `rows_per_image` (members of `ImageDataLayout`) from `Option` to `Option`. By @teoxoy in [#3529](https://github.com/gfx-rs/wgpu/pull/3529) - On Web, `Instance::create_surface_from_canvas()` and `create_surface_from_offscreen_canvas()` now take the canvas by value. By @daxpedda in [#3690](https://github.com/gfx-rs/wgpu/pull/3690) +### Added/New Features + +#### General +- Added feature flags for ray-tracing (currently only hal): `RAY_QUERY` and `RAY_TRACING` @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + +#### Vulkan + +- Implemented basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + +#### Hal + +- Added basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + + ### Changes #### General diff --git a/Cargo.lock b/Cargo.lock index 114cd20729..cf9387f636 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4081,6 +4081,7 @@ dependencies = [ "core-graphics-types", "d3d12", "env_logger", + "glam", "glow", "glutin", "glutin_wgl_sys 0.5.0", diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 826e4c9bc8..f8cdba11ce 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -341,6 +341,7 @@ impl BindingTypeMaxCountValidator { wgt::BindingType::StorageTexture { .. } => { self.storage_textures.add(binding.visibility, count); } + wgt::BindingType::AccelerationStructure => todo!(), } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index e068d67f6b..6b8db72a77 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -1646,6 +1646,7 @@ impl Device { }, ) } + Bt::AccelerationStructure => todo!(), }; // Validate the count parameter @@ -2140,6 +2141,7 @@ impl Device { buffers: &hal_buffers, samplers: &hal_samplers, textures: &hal_textures, + acceleration_structures: &[], }; let raw = unsafe { self.raw diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 4bae3dc082..5ca8445e25 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -169,6 +169,7 @@ features = ["wgsl-in"] [dev-dependencies] cfg-if = "1" env_logger = "0.10" +glam = "0.24.2" # for ray-traced-triangle example winit = { version = "0.29.4", features = [ "android-native-activity", ] } # for "halmark" example diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 60b2c144f2..ea1e7648e9 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -442,6 +442,7 @@ impl Example { buffers: &[global_buffer_binding], samplers: &[&sampler], textures: &[texture_binding], + acceleration_structures: &[], entries: &[ hal::BindGroupEntry { binding: 0, @@ -475,6 +476,7 @@ impl Example { buffers: &[local_buffer_binding], samplers: &[], textures: &[], + acceleration_structures: &[], entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs new file mode 100644 index 0000000000..7202b35bee --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -0,0 +1,1113 @@ +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; +use raw_window_handle::{HasDisplayHandle, HasWindowHandle}; + +use glam::{Affine3A, Mat4, Vec3}; +use std::{ + borrow::{Borrow, Cow}, + iter, mem, ptr, + time::Instant, +}; +use winit::window::WindowButtons; + +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_FRAMES: u32 = 3; + +/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) +/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) +#[derive(Clone)] +#[repr(C)] +struct AccelerationStructureInstance { + transform: [f32; 12], + custom_index_and_mask: u32, + shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, +} + +impl std::fmt::Debug for AccelerationStructureInstance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instance") + .field("transform", &self.transform) + .field("custom_index()", &self.custom_index()) + .field("mask()", &self.mask()) + .field( + "shader_binding_table_record_offset()", + &self.shader_binding_table_record_offset(), + ) + .field("flags()", &self.flags()) + .field( + "acceleration_structure_reference", + &self.acceleration_structure_reference, + ) + .finish() + } +} + +#[allow(dead_code)] +impl AccelerationStructureInstance { + const LOW_24_MASK: u32 = 0x00ff_ffff; + const MAX_U24: u32 = (1u32 << 24u32) - 1u32; + + #[inline] + fn affine_to_rows(mat: &Affine3A) -> [f32; 12] { + let row_0 = mat.matrix3.row(0); + let row_1 = mat.matrix3.row(1); + let row_2 = mat.matrix3.row(2); + let translation = mat.translation; + [ + row_0.x, + row_0.y, + row_0.z, + translation.x, + row_1.x, + row_1.y, + row_1.z, + translation.y, + row_2.x, + row_2.y, + row_2.z, + translation.z, + ] + } + + #[inline] + fn rows_to_affine(rows: &[f32; 12]) -> Affine3A { + Affine3A::from_cols_array(&[ + rows[0], rows[3], rows[6], rows[9], rows[1], rows[4], rows[7], rows[10], rows[2], + rows[5], rows[8], rows[11], + ]) + } + + pub fn transform_as_affine(&self) -> Affine3A { + Self::rows_to_affine(&self.transform) + } + pub fn set_transform(&mut self, transform: &Affine3A) { + self.transform = Self::affine_to_rows(transform); + } + + pub fn custom_index(&self) -> u32 { + self.custom_index_and_mask & Self::LOW_24_MASK + } + + pub fn mask(&self) -> u8 { + (self.custom_index_and_mask >> 24) as u8 + } + + pub fn shader_binding_table_record_offset(&self) -> u32 { + self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK + } + + pub fn flags(&self) -> u8 { + (self.shader_binding_table_record_offset_and_flags >> 24) as u8 + } + + pub fn set_custom_index(&mut self, custom_index: u32) { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + self.custom_index_and_mask = + (custom_index & Self::LOW_24_MASK) | (self.custom_index_and_mask & !Self::LOW_24_MASK) + } + + pub fn set_mask(&mut self, mask: u8) { + self.custom_index_and_mask = + (self.custom_index_and_mask & Self::LOW_24_MASK) | (u32::from(mask) << 24) + } + + pub fn set_shader_binding_table_record_offset( + &mut self, + shader_binding_table_record_offset: u32, + ) { + debug_assert!(shader_binding_table_record_offset <= Self::MAX_U24, "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24); + self.shader_binding_table_record_offset_and_flags = (shader_binding_table_record_offset + & Self::LOW_24_MASK) + | (self.shader_binding_table_record_offset_and_flags & !Self::LOW_24_MASK) + } + + pub fn set_flags(&mut self, flags: u8) { + self.shader_binding_table_record_offset_and_flags = + (self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK) + | (u32::from(flags) << 24) + } + + pub fn new( + transform: &Affine3A, + custom_index: u32, + mask: u8, + shader_binding_table_record_offset: u32, + flags: u8, + acceleration_structure_reference: u64, + ) -> Self { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + debug_assert!( + shader_binding_table_record_offset <= Self::MAX_U24, + "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24 + ); + AccelerationStructureInstance { + transform: Self::affine_to_rows(transform), + custom_index_and_mask: (custom_index & Self::MAX_U24) | (u32::from(mask) << 24), + shader_binding_table_record_offset_and_flags: (shader_binding_table_record_offset + & Self::MAX_U24) + | (u32::from(flags) << 24), + acceleration_structure_reference, + } + } +} + +struct ExecutionContext { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec, + used_cmd_bufs: Vec, + frames_recorded: usize, +} + +impl ExecutionContext { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + + contexts: Vec>, + context_index: usize, + extent: [u32; 2], + start: Instant, + pipeline: A::ComputePipeline, + bind_group: A::BindGroup, + bgl: A::BindGroupLayout, + shader_module: A::ShaderModule, + texture_view: A::TextureView, + uniform_buffer: A::Buffer, + pipeline_layout: A::PipelineLayout, + vertices_buffer: A::Buffer, + indices_buffer: A::Buffer, + texture: A::Texture, + instances: [AccelerationStructureInstance; 3], + instances_buffer: A::Buffer, + blas: A::AccelerationStructure, + tlas: A::AccelerationStructure, + scratch_buffer: A::Buffer, + time: f32, +} + +impl Example { + fn init(window: &winit::window::Window) -> Result> { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: wgt::InstanceFlags::default(), + dx12_shader_compiler: wgt::Dx12Compiler::Dxc { + dxil_path: None, + dxc_path: None, + }, + gles_minor_version: wgt::Gles3MinorVersion::default(), + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let surface = { + let raw_window_handle = window.window_handle()?.as_raw(); + let raw_display_handle = window.display_handle()?.as_raw(); + + unsafe { + instance + .create_surface(raw_display_handle, raw_window_handle) + .unwrap() + } + }; + + let (adapter, features) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + panic!("No adapters found"); + } + let exposed = adapters.swap_remove(0); + dbg!(exposed.features); + (exposed.adapter, exposed.features) + }; + let surface_caps = unsafe { adapter.surface_capabilities(&surface) } + .expect("Surface doesn't support presentation"); + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, queue } = + unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; + + let window_size: (u32, u32) = window.inner_size().into(); + dbg!(&surface_caps.formats); + let surface_format = if surface_caps + .formats + .contains(&wgt::TextureFormat::Rgba8Snorm) + { + wgt::TextureFormat::Rgba8Unorm + } else { + *surface_caps.formats.first().unwrap() + }; + let surface_config = hal::SurfaceConfiguration { + swap_chain_size: DESIRED_FRAMES + .max(*surface_caps.swap_chain_sizes.start()) + .min(*surface_caps.swap_chain_sizes.end()), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + format: surface_format, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + view_formats: vec![surface_format], + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + #[allow(dead_code)] + struct Uniforms { + view_inverse: glam::Mat4, + proj_inverse: glam::Mat4, + } + + let bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::WriteOnly, + format: wgt::TextureFormat::Rgba8Unorm, + view_dimension: wgt::TextureViewDimension::D2, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::AccelerationStructure, + count: None, + }, + ], + }; + + let bgl = unsafe { device.create_bind_group_layout(&bgl_desc).unwrap() }; + + let naga_shader = { + let shader_file = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("examples") + .join("ray-traced-triangle") + .join("shader.wgsl"); + let source = std::fs::read_to_string(shader_file).unwrap(); + let module = naga::front::wgsl::Frontend::new().parse(&source).unwrap(); + let info = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::RAY_QUERY, + ) + .validate(&module) + .unwrap(); + hal::NagaShader { + module: Cow::Owned(module), + info, + debug_source: None, + } + }; + let shader_desc = hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }; + let shader_module = unsafe { + device + .create_shader_module(&shader_desc, hal::ShaderInput::Naga(naga_shader)) + .unwrap() + }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline = unsafe { + device.create_compute_pipeline(&hal::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: &pipeline_layout, + stage: hal::ProgrammableStage { + module: &shader_module, + entry_point: "main", + }, + }) + } + .unwrap(); + + let vertices: [f32; 9] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 0.0]; + + let vertices_size_in_bytes = vertices.len() * 4; + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size_in_bytes = indices.len() * 4; + + let vertices_buffer = unsafe { + let vertices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("vertices buffer"), + size: vertices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&vertices_buffer, 0..vertices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + vertices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + vertices_size_in_bytes, + ); + device.unmap_buffer(&vertices_buffer).unwrap(); + assert!(mapping.is_coherent); + + vertices_buffer + }; + + let indices_buffer = unsafe { + let indices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("indices buffer"), + size: indices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&indices_buffer, 0..indices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size_in_bytes, + ); + device.unmap_buffer(&indices_buffer).unwrap(); + assert!(mapping.is_coherent); + + indices_buffer + }; + + let blas_triangles = vec![hal::AccelerationStructureTriangles { + vertex_buffer: Some(&vertices_buffer), + first_vertex: 0, + vertex_format: wgt::VertexFormat::Float32x3, + vertex_count: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureTriangleIndices { + buffer: Some(&indices_buffer), + format: wgt::IndexFormat::Uint32, + offset: 0, + count: indices.len() as u32, + }), + transform: None, + flags: hal::AccelerationStructureGeometryFlags::OPAQUE, + }]; + let blas_entries = hal::AccelerationStructureEntries::Triangles(blas_triangles); + + let mut tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: None, + count: 3, + offset: 0, + }); + + let blas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &blas_entries, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + }, + ) + }; + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + let tlas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &tlas_entries, + flags: tlas_flags, + }, + ) + }; + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("tlas"), + size: tlas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + + let uniforms = { + let view = Mat4::look_at_rh(Vec3::new(0.0, 0.0, 2.5), Vec3::ZERO, Vec3::Y); + let proj = Mat4::perspective_rh(59.0_f32.to_radians(), 1.0, 0.001, 1000.0); + + Uniforms { + view_inverse: view.inverse(), + proj_inverse: proj.inverse(), + } + }; + + let uniforms_size = std::mem::size_of::(); + + let uniform_buffer = unsafe { + let uniform_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("uniform buffer"), + size: uniforms_size as u64, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&uniform_buffer, 0..uniforms_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + &uniforms as *const Uniforms as *const u8, + mapping.ptr.as_ptr(), + uniforms_size, + ); + device.unmap_buffer(&uniform_buffer).unwrap(); + assert!(mapping.is_coherent); + uniform_buffer + }; + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8Unorm, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![wgt::TextureFormat::Rgba8Unorm], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + range: wgt::ImageSubresourceRange::default(), + plane: None, + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let bind_group = { + let buffer_binding = hal::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::STORAGE_READ_WRITE, + }; + let group_desc = hal::BindGroupDescriptor { + label: Some("bind group"), + layout: &bgl, + buffers: &[buffer_binding], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[&tlas], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&group_desc).unwrap() } + }; + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: blas_sizes + .build_scratch_size + .max(tlas_sizes.build_scratch_size), + usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + + let instances = [ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + ]; + + let instances_buffer_size = + instances.len() * std::mem::size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + + instances_buffer + }; + + if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { + i.buffer = Some(&instances_buffer); + assert!( + instances.len() <= i.count as usize, + "Tlas allocation to small" + ); + } + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::empty() + ..hal::AccelerationStructureUses::BUILD_OUTPUT, + }); + + cmd_encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + entries: &blas_entries, + source_acceleration_structure: None, + scratch_buffer_offset: 0, + }], + ); + + let scratch_buffer_barrier = hal::BufferBarrier { + buffer: &scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(scratch_buffer_barrier)); + + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::BUILD_INPUT, + }); + + cmd_encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + entries: &tlas_entries, + source_acceleration_structure: None, + scratch_buffer_offset: 0, + }], + ); + + cmd_encoder.place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::SHADER_INPUT, + }); + + let texture_barrier = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::STORAGE_READ_WRITE, + }; + + cmd_encoder.transition_textures(iter::once(texture_barrier)); + } + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Self { + instance, + adapter, + surface, + surface_format: surface_config.format, + device, + queue, + pipeline, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + pipeline_layout, + bind_group, + texture, + instances, + instances_buffer, + blas, + tlas, + scratch_buffer, + time: 0.0, + indices_buffer, + vertices_buffer, + uniform_buffer, + texture_view, + bgl, + shader_module, + }) + } + + fn update(&mut self, _event: winit::event::WindowEvent) {} + + fn render(&mut self) { + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + + let instances_buffer_size = + self.instances.len() * std::mem::size_of::(); + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + self.time += 1.0 / 60.0; + + self.instances[0].set_transform(&Affine3A::from_rotation_y(self.time)); + + unsafe { + let mapping = self + .device + .map_buffer(&self.instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + self.instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + self.device.unmap_buffer(&self.instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + + let instances = hal::AccelerationStructureInstances { + buffer: Some(&self.instances_buffer), + count: self.instances.len() as u32, + offset: 0, + }; + + ctx.encoder + .place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::SHADER_INPUT + ..hal::AccelerationStructureUses::BUILD_INPUT, + }); + + ctx.encoder.build_acceleration_structures( + 1, + [hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Update, + flags: tlas_flags, + destination_acceleration_structure: &self.tlas, + scratch_buffer: &self.scratch_buffer, + entries: &hal::AccelerationStructureEntries::Instances(instances), + source_acceleration_structure: Some(&self.tlas), + scratch_buffer_offset: 0, + }], + ); + + ctx.encoder + .place_acceleration_structure_barrier(hal::AccelerationStructureBarrier { + usage: hal::AccelerationStructureUses::BUILD_OUTPUT + ..hal::AccelerationStructureUses::SHADER_INPUT, + }); + + let scratch_buffer_barrier = hal::BufferBarrier { + buffer: &self.scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + ctx.encoder + .transition_buffers(iter::once(scratch_buffer_barrier)); + + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COPY_DST, + range: wgt::ImageSubresourceRange::default(), + plane: None, + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + unsafe { + ctx.encoder.begin_compute_pass(&hal::ComputePassDescriptor { + label: None, + timestamp_writes: None, + }); + ctx.encoder.set_compute_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.bind_group, &[]); + ctx.encoder.dispatch([512 / 8, 512 / 8, 1]); + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::PRESENT, + }; + let target_barrier2 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::STORAGE_READ_WRITE..hal::TextureUses::COPY_SRC, + }; + let target_barrier3 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_SRC..hal::TextureUses::STORAGE_READ_WRITE, + }; + unsafe { + ctx.encoder.end_compute_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier2)); + ctx.encoder.copy_texture_to_texture( + &self.texture, + hal::TextureUses::COPY_SRC, + surface_tex.borrow(), + std::iter::once(hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + dst_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 512, + height: 512, + depth: 1, + }, + }), + ); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + ctx.encoder.transition_textures(iter::once(target_barrier3)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue.present(&self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.bind_group); + self.device.destroy_buffer(self.scratch_buffer); + self.device.destroy_buffer(self.instances_buffer); + self.device.destroy_buffer(self.indices_buffer); + self.device.destroy_buffer(self.vertices_buffer); + self.device.destroy_buffer(self.uniform_buffer); + self.device.destroy_acceleration_structure(self.tlas); + self.device.destroy_acceleration_structure(self.blas); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_compute_pipeline(self.pipeline); + self.device.destroy_pipeline_layout(self.pipeline_layout); + self.device.destroy_bind_group_layout(self.bgl); + self.device.destroy_shader_module(self.shader_module); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } +} + +cfg_if::cfg_if! { + // Apple + Metal + if #[cfg(all(any(target_os = "macos", target_os = "ios"), feature = "metal"))] { + type Api = hal::api::Metal; + } + // Wasm + Vulkan + else if #[cfg(all(not(target_arch = "wasm32"), feature = "vulkan"))] { + type Api = hal::api::Vulkan; + } + // Windows + DX12 + else if #[cfg(all(windows, feature = "dx12"))] { + type Api = hal::api::Dx12; + } + // Anything + GLES + else if #[cfg(feature = "gles")] { + type Api = hal::api::Gles; + } + // Fallback + else { + type Api = hal::api::Empty; + } +} + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new().unwrap(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-ray-traced-triangle") + .with_inner_size(winit::dpi::PhysicalSize { + width: 512, + height: 512, + }) + .with_resizable(false) + .with_enabled_buttons(WindowButtons::CLOSE) + .build(&event_loop) + .unwrap(); + + let example_result = Example::::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + event_loop + .run(move |event, target| { + let _ = &window; // force ownership by the closure + target.set_control_flow(winit::event_loop::ControlFlow::Poll); + match event { + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::CloseRequested => { + target.exit(); + } + winit::event::WindowEvent::KeyboardInput { event, .. } + if event.physical_key + == winit::keyboard::PhysicalKey::Code( + winit::keyboard::KeyCode::Escape, + ) => + { + target.exit(); + } + winit::event::WindowEvent::RedrawRequested => { + let ex = example.as_mut().unwrap(); + ex.render(); + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::LoopExiting => { + example.take().unwrap().exit(); + } + winit::event::Event::AboutToWait => { + window.request_redraw(); + } + _ => {} + } + }) + .unwrap(); +} diff --git a/wgpu-hal/examples/ray-traced-triangle/shader.wgsl b/wgpu-hal/examples/ray-traced-triangle/shader.wgsl new file mode 100644 index 0000000000..8d9e475e3e --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/shader.wgsl @@ -0,0 +1,37 @@ +struct Uniforms { + view_inv: mat4x4, + proj_inv: mat4x4, +}; +@group(0) @binding(0) +var uniforms: Uniforms; + +@group(0) @binding(1) +var output: texture_storage_2d; + +@group(0) @binding(2) +var acc_struct: acceleration_structure; + +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let target_size = textureDimensions(output); + + let pixel_center = vec2(global_id.xy) + vec2(0.5); + let in_uv = pixel_center / vec2(target_size.xy); + let d = in_uv * 2.0 - 1.0; + + let origin = (uniforms.view_inv * vec4(0.0, 0.0, 0.0, 1.0)).xyz; + let temp = uniforms.proj_inv * vec4(d.x, d.y, 1.0, 1.0); + let direction = (uniforms.view_inv * vec4(normalize(temp.xyz), 0.0)).xyz; + + var rq: ray_query; + rayQueryInitialize(&rq, acc_struct, RayDesc(0u, 0xFFu, 0.1, 200.0, origin, direction)); + rayQueryProceed(&rq); + + var color = vec4(0.0, 0.0, 0.0, 1.0); + let intersection = rayQueryGetCommittedIntersection(&rq); + if intersection.kind != RAY_QUERY_INTERSECTION_NONE { + color = vec4(intersection.barycentrics, 1.0 - intersection.barycentrics.x - intersection.barycentrics.y, 1.0); + } + + textureStore(output, global_id.xy, color); +} \ No newline at end of file diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 5bbd0d4ba5..81f0fbccc9 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1192,4 +1192,24 @@ impl crate::CommandEncoder for super::CommandEncoder { ) }; } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator>, + { + // Implement using `BuildRaytracingAccelerationStructure`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure + todo!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + todo!() + } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index f484d1a9e2..2b6c1d959e 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -112,6 +112,7 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> d3d12::DescriptorRangeType { .. } | Bt::StorageTexture { .. } => d3d12::DescriptorRangeType::UAV, + Bt::AccelerationStructure => todo!(), } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 90af93661e..9f6133592a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -673,6 +673,7 @@ impl crate::Device for super::Device { num_texture_views += count } wgt::BindingType::Sampler { .. } => num_samplers += count, + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1194,6 +1195,7 @@ impl crate::Device for super::Device { cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); } } + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1573,4 +1575,38 @@ impl crate::Device for super::Device { .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) } } + + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo + todo!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + // Implement using `GetGPUVirtualAddress`: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12resource-getgpuvirtualaddress + todo!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + // Create a D3D12 resource as per-usual. + todo!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + // Destroy a D3D12 resource as per-usual. + todo!() + } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index c50b0af165..4ae224f5fe 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -82,6 +82,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. @@ -600,6 +602,9 @@ pub struct ComputePipeline { unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} +#[derive(Debug)] +pub struct AccelerationStructure {} + impl SwapChain { unsafe fn release_resources(self) -> d3d12::ComPtr { self.raw diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 487d317870..9fd42bd6f5 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -29,6 +29,7 @@ impl crate::Api for Api { type Sampler = Resource; type QuerySet = Resource; type Fence = Resource; + type AccelerationStructure = Resource; type BindGroupLayout = Resource; type BindGroup = Resource; @@ -236,6 +237,25 @@ impl crate::Device for Context { false } unsafe fn stop_capture(&self) {} + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, Api>, + ) -> crate::AccelerationStructureBuildSizes { + Default::default() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &Resource, + ) -> wgt::BufferAddress { + Default::default() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {} } impl crate::CommandEncoder for Encoder { @@ -410,4 +430,20 @@ impl crate::CommandEncoder for Encoder { unsafe fn dispatch(&mut self, count: [u32; 3]) {} unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + descriptors: T, + ) where + Api: 'a, + T: IntoIterator>, + { + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + } } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 28dbf1688d..fee08f201b 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1166,4 +1166,22 @@ impl crate::CommandEncoder for super::CommandEncoder { indirect_offset: offset, }); } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator>, + { + unimplemented!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 35c6f910de..a48fe9dc75 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1125,6 +1125,7 @@ impl crate::Device for super::Device { ty: wgt::BufferBindingType::Storage { .. }, .. } => &mut num_storage_buffers, + wgt::BindingType::AccelerationStructure => unimplemented!(), }; binding_to_slot[entry.binding as usize] = *counter; @@ -1211,6 +1212,7 @@ impl crate::Device for super::Device { format: format_desc.internal, }) } + wgt::BindingType::AccelerationStructure => unimplemented!(), }; contents.push(binding); } @@ -1458,6 +1460,25 @@ impl crate::Device for super::Device { .end_frame_capture(ptr::null_mut(), ptr::null_mut()) } } + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<(), crate::DeviceError> { + unimplemented!() + } + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &(), + ) -> wgt::BufferAddress { + unimplemented!() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {} } #[cfg(all( diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index ad3bbaf5ae..7021c3e12d 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -153,6 +153,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = (); type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b2c4e240ef..1da760f50c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -212,6 +212,8 @@ pub trait Api: Clone + fmt::Debug + Sized { type ShaderModule: fmt::Debug + WasmNotSendSync; type RenderPipeline: fmt::Debug + WasmNotSendSync; type ComputePipeline: fmt::Debug + WasmNotSendSync; + + type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static; } pub trait Instance: Sized + WasmNotSendSync { @@ -385,6 +387,23 @@ pub trait Device: WasmNotSendSync { unsafe fn start_capture(&self) -> bool; unsafe fn stop_capture(&self); + + unsafe fn create_acceleration_structure( + &self, + desc: &AccelerationStructureDescriptor, + ) -> Result; + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &GetAccelerationStructureBuildSizesDescriptor, + ) -> AccelerationStructureBuildSizes; + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &A::AccelerationStructure, + ) -> wgt::BufferAddress; + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: A::AccelerationStructure, + ); } pub trait Queue: WasmNotSendSync { @@ -613,6 +632,26 @@ pub trait CommandEncoder: WasmNotSendSync + fmt::Debug { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); + + /// To get the required sizes for the buffer allocations use `get_acceleration_structure_build_sizes` per descriptor + /// All buffers must be synchronized externally + /// All buffer regions, which are written to may only be passed once per function call, + /// with the exception of updates in the same descriptor. + /// Consequences of this limitation: + /// - scratch buffers need to be unique + /// - a tlas can't be build in the same call with a blas it contains + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + descriptor_count: u32, + descriptors: T, + ) where + A: 'a, + T: IntoIterator>; + + unsafe fn place_acceleration_structure_barrier( + &mut self, + barrier: AccelerationStructureBarrier, + ); } bitflags!( @@ -773,12 +812,15 @@ bitflags::bitflags! { const INDIRECT = 1 << 9; /// A buffer used to store query results. const QUERY_RESOLVE = 1 << 10; + const ACCELERATION_STRUCTURE_SCRATCH = 1 << 11; + const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; + const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 13; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits() | Self::COPY_SRC.bits() | Self::INDEX.bits() | Self::VERTEX.bits() | Self::UNIFORM.bits() | - Self::STORAGE_READ.bits() | Self::INDIRECT.bits(); + Self::STORAGE_READ.bits() | Self::INDIRECT.bits() | Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits() | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits(); /// The combination of states that a buffer must exclusively be in. - const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits(); + const EXCLUSIVE = Self::MAP_WRITE.bits() | Self::COPY_DST.bits() | Self::STORAGE_READ_WRITE.bits() | Self::ACCELERATION_STRUCTURE_SCRATCH.bits(); /// The combination of all usages that the are guaranteed to be be ordered by the hardware. /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there /// are no barriers needed for synchronization. @@ -1086,6 +1128,7 @@ pub struct BindGroupDescriptor<'a, A: Api> { pub samplers: &'a [&'a A::Sampler], pub textures: &'a [TextureBinding<'a, A>], pub entries: &'a [BindGroupEntry], + pub acceleration_structures: &'a [&'a A::AccelerationStructure], } #[derive(Clone, Debug)] @@ -1417,3 +1460,134 @@ fn test_default_limits() { let limits = wgt::Limits::default(); assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); } + +#[derive(Clone, Debug)] +pub struct AccelerationStructureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub format: AccelerationStructureFormat, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureFormat { + TopLevel, + BottomLevel, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureBuildMode { + Build, + Update, +} + +/// Information of the required size for a corresponding entries struct (+ flags) +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct AccelerationStructureBuildSizes { + pub acceleration_structure_size: wgt::BufferAddress, + pub update_scratch_size: wgt::BufferAddress, + pub build_scratch_size: wgt::BufferAddress, +} + +/// Updates use source_acceleration_structure if present, else the update will be performed in place. +/// For updates, only the data is allowed to change (not the meta data or sizes). +#[derive(Clone, Debug)] +pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { + pub entries: &'a AccelerationStructureEntries<'a, A>, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub source_acceleration_structure: Option<&'a A::AccelerationStructure>, + pub destination_acceleration_structure: &'a A::AccelerationStructure, + pub scratch_buffer: &'a A::Buffer, + pub scratch_buffer_offset: wgt::BufferAddress, +} + +/// - All buffers, buffer addresses and offsets will be ignored. +/// - The build mode will be ignored. +/// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups), +/// may result in reduced size requirements. +/// - Any other change may result in a bigger or smaller size requirement. +#[derive(Clone, Debug)] +pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> { + pub entries: &'a AccelerationStructureEntries<'a, A>, + pub flags: AccelerationStructureBuildFlags, +} + +/// Entries for a single descriptor +/// * `Instances` - Multiple instances for a top level acceleration structure +/// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure +/// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure +#[derive(Debug)] +pub enum AccelerationStructureEntries<'a, A: Api> { + Instances(AccelerationStructureInstances<'a, A>), + Triangles(Vec>), + AABBs(Vec>), +} + +/// * `first_vertex` - offset in the vertex buffer (as number of vertices) +/// * `indices` - optional index buffer with attributes +/// * `transform` - optional transform +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangles<'a, A: Api> { + pub vertex_buffer: Option<&'a A::Buffer>, + pub vertex_format: wgt::VertexFormat, + pub first_vertex: u32, + pub vertex_count: u32, + pub vertex_stride: wgt::BufferAddress, + pub indices: Option>, + pub transform: Option>, + pub flags: AccelerationStructureGeometryFlags, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureAABBs<'a, A: Api> { + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, + pub stride: wgt::BufferAddress, + pub flags: AccelerationStructureGeometryFlags, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureInstances<'a, A: Api> { + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangleIndices<'a, A: Api> { + pub format: wgt::IndexFormat, + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, +} + +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangleTransform<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub offset: u32, +} + +pub type AccelerationStructureBuildFlags = wgt::AccelerationStructureFlags; +pub type AccelerationStructureGeometryFlags = wgt::AccelerationStructureGeometryFlags; + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct AccelerationStructureUses: u8 { + // For blas used as input for tlas + const BUILD_INPUT = 1 << 0; + // Target for acceleration structure build + const BUILD_OUTPUT = 1 << 1; + // Tlas used in a shader + const SHADER_INPUT = 1 << 2; + } +} + +#[derive(Debug, Clone)] +pub struct AccelerationStructureBarrier { + pub usage: Range, +} diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index b06f46e8a9..6f1a0d9c2f 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -1216,6 +1216,24 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.compute.as_ref().unwrap(); encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); } + + unsafe fn build_acceleration_structures<'a, T>( + &mut self, + _descriptor_count: u32, + _descriptors: T, + ) where + super::Api: 'a, + T: IntoIterator>, + { + unimplemented!() + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + _barriers: crate::AccelerationStructureBarrier, + ) { + unimplemented!() + } } impl Drop for super::CommandEncoder { diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 475332b76d..6a387dd57b 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -637,6 +637,7 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::ReadWrite => true, }; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } let br = naga::ResourceBinding { @@ -768,6 +769,7 @@ impl crate::Device for super::Device { ); counter.textures += size; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } } } @@ -1218,4 +1220,32 @@ impl crate::Device for super::Device { } shared_capture_manager.stop_capture(); } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + unimplemented!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + unimplemented!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 8890092d31..39589115e7 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -66,6 +66,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } pub struct Instance { @@ -844,3 +846,6 @@ pub struct CommandBuffer { unsafe impl Send for CommandBuffer {} unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct AccelerationStructure; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 01bb6f88d4..a9eaf8b609 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -35,6 +35,9 @@ pub struct PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDevice16BitStorageFeatures, )>, + acceleration_structure: Option, + buffer_device_address: Option, + ray_query: Option, zero_initialize_workgroup_memory: Option, } @@ -75,6 +78,15 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.zero_initialize_workgroup_memory { info = info.push_next(feature); } + if let Some(ref mut feature) = self.acceleration_structure { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.buffer_device_address { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_query { + info = info.push_next(feature); + } info } @@ -283,6 +295,37 @@ impl PhysicalDeviceFeatures { } else { None }, + acceleration_structure: if enabled_extensions + .contains(&vk::KhrAccelerationStructureFn::name()) + { + Some( + vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(true) + .build(), + ) + } else { + None + }, + buffer_device_address: if enabled_extensions + .contains(&vk::KhrBufferDeviceAddressFn::name()) + { + Some( + vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder() + .buffer_device_address(true) + .build(), + ) + } else { + None + }, + ray_query: if enabled_extensions.contains(&vk::KhrRayQueryFn::name()) { + Some( + vk::PhysicalDeviceRayQueryFeaturesKHR::builder() + .ray_query(true) + .build(), + ) + } else { + None + }, zero_initialize_workgroup_memory: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name()) { @@ -520,6 +563,18 @@ impl PhysicalDeviceFeatures { features.set(F::DEPTH32FLOAT_STENCIL8, texture_d32_s8); + features.set( + F::RAY_TRACING_ACCELERATION_STRUCTURE, + caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) + && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()), + ); + + features.set( + F::RAY_QUERY, + caps.supports_extension(vk::KhrRayQueryFn::name()), + ); + let rg11b10ufloat_renderable = supports_format( instance, phd, @@ -570,12 +625,13 @@ impl PhysicalDeviceFeatures { } /// Information gathered about a physical device capabilities. -#[derive(Default)] +#[derive(Default, Debug)] pub struct PhysicalDeviceCapabilities { supported_extensions: Vec, properties: vk::PhysicalDeviceProperties, maintenance_3: Option, descriptor_indexing: Option, + acceleration_structure: Option, driver: Option, /// The device API version. /// @@ -706,6 +762,18 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDrawIndirectCountFn::name()); } + // Require `VK_KHR_deferred_host_operations`, `VK_KHR_acceleration_structure` and `VK_KHR_buffer_device_address` if the feature `RAY_TRACING` was requested + if requested_features.contains(wgt::Features::RAY_TRACING_ACCELERATION_STRUCTURE) { + extensions.push(vk::KhrDeferredHostOperationsFn::name()); + extensions.push(vk::KhrAccelerationStructureFn::name()); + extensions.push(vk::KhrBufferDeviceAddressFn::name()); + } + + // Require `VK_KHR_ray_query` if the associated feature was requested + if requested_features.contains(wgt::Features::RAY_QUERY) { + extensions.push(vk::KhrRayQueryFn::name()); + } + // Require `VK_EXT_conservative_rasterization` if the associated feature was requested if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { extensions.push(vk::ExtConservativeRasterizationFn::name()); @@ -821,6 +889,9 @@ impl super::InstanceShared { >= vk::API_VERSION_1_2 || capabilities.supports_extension(vk::KhrDriverPropertiesFn::name()); + let supports_acceleration_structure = + capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); + let mut builder = vk::PhysicalDeviceProperties2KHR::builder(); if supports_maintenance3 { capabilities.maintenance_3 = @@ -835,6 +906,13 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if supports_acceleration_structure { + let next = capabilities + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructurePropertiesKHR::default()); + builder = builder.push_next(next); + } + if supports_driver_properties { let next = capabilities .driver @@ -917,6 +995,12 @@ impl super::InstanceShared { builder = builder.push_next(&mut next.0); builder = builder.push_next(&mut next.1); } + if capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()) { + let next = features + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default()); + builder = builder.push_next(next); + } // `VK_KHR_zero_initialize_workgroup_memory` is promoted to 1.3 if capabilities.device_api_version >= vk::API_VERSION_1_3 @@ -1252,6 +1336,22 @@ impl super::Adapter { } else { None }; + let ray_tracing_fns = if enabled_extensions.contains(&khr::AccelerationStructure::name()) + && enabled_extensions.contains(&khr::BufferDeviceAddress::name()) + { + Some(super::RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure::new( + &self.instance.raw, + &raw_device, + ), + buffer_device_address: khr::BufferDeviceAddress::new( + &self.instance.raw, + &raw_device, + ), + }) + } else { + None + }; let naga_options = { use naga::back::spv; @@ -1300,6 +1400,10 @@ impl super::Adapter { capabilities.push(spv::Capability::StorageImageWriteWithoutFormat); } + if features.contains(wgt::Features::RAY_QUERY) { + capabilities.push(spv::Capability::RayQueryKHR); + } + let mut flags = spv::WriterFlags::empty(); flags.set( spv::WriterFlags::DEBUG, @@ -1367,6 +1471,7 @@ impl super::Adapter { extension_fns: super::DeviceExtensionFunctions { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, + ray_tracing: ray_tracing_fns, }, vendor_id: self.phd_capabilities.properties.vendor_id, timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, @@ -1421,7 +1526,8 @@ impl super::Adapter { size: memory_heap.size, }) .collect(), - buffer_device_address: false, + buffer_device_address: enabled_extensions + .contains(&khr::BufferDeviceAddress::name()), }; gpu_alloc::GpuAllocator::new(config, properties) }; diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index c31da9e2c8..239133bb54 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -414,6 +414,243 @@ impl crate::CommandEncoder for super::CommandEncoder { }; } + unsafe fn build_acceleration_structures<'a, T>(&mut self, descriptor_count: u32, descriptors: T) + where + super::Api: 'a, + T: IntoIterator>, + { + const CAPACITY_OUTER: usize = 8; + const CAPACITY_INNER: usize = 1; + let descriptor_count = descriptor_count as usize; + + let ray_tracing_functions = self + .device + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let get_device_address = |buffer: Option<&super::Buffer>| unsafe { + match buffer { + Some(buffer) => ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + None => panic!("Buffers are required to build acceleration structures"), + } + }; + + // storage to all the data required for cmd_build_acceleration_structures + let mut ranges_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptor_count); + let mut geometries_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureGeometryKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptor_count); + + // pointers to all the data required for cmd_build_acceleration_structures + let mut geometry_infos = smallvec::SmallVec::< + [vk::AccelerationStructureBuildGeometryInfoKHR; CAPACITY_OUTER], + >::with_capacity(descriptor_count); + let mut ranges_ptrs = smallvec::SmallVec::< + [&[vk::AccelerationStructureBuildRangeInfoKHR]; CAPACITY_OUTER], + >::with_capacity(descriptor_count); + + for desc in descriptors { + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder( + ) + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(instances.buffer), + }); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instance_data, + }); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count) + .primitive_offset(instances.offset); + + (smallvec::smallvec![*geometry], smallvec::smallvec![*range]) + } + crate::AccelerationStructureEntries::Triangles(ref in_geometries) => { + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(triangles.vertex_buffer), + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(ref indices) = triangles.indices { + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(indices.buffer), + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + + if let Some(ref transform) = triangles.transform { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = + triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(conv::map_acceleration_structure_geomety_flags( + triangles.flags, + )); + + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(ref in_geometries) => { + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(aabb.buffer), + }) + .stride(aabb.stride); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(aabb.count) + .primitive_offset(aabb.offset); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { + aabbs: *aabbs_data, + }) + .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); + + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + }; + + ranges_storage.push(ranges); + geometries_storage.push(geometries); + + let scratch_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), + ) + }; + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(ty) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .dst_acceleration_structure(desc.destination_acceleration_structure.raw) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: scratch_device_address + desc.scratch_buffer_offset, + }); + + if desc.mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = desc + .source_acceleration_structure + .unwrap_or(desc.destination_acceleration_structure) + .raw; + } + + geometry_infos.push(*geometry_info); + } + + for (i, geometry_info) in geometry_infos.iter_mut().enumerate() { + geometry_info.geometry_count = geometries_storage[i].len() as u32; + geometry_info.p_geometries = geometries_storage[i].as_ptr(); + ranges_ptrs.push(&ranges_storage[i]); + } + + unsafe { + ray_tracing_functions + .acceleration_structure + .cmd_build_acceleration_structures(self.active, &geometry_infos, &ranges_ptrs); + } + } + + unsafe fn place_acceleration_structure_barrier( + &mut self, + barrier: crate::AccelerationStructureBarrier, + ) { + let (src_stage, src_access) = + conv::map_acceleration_structure_usage_to_barrier(barrier.usage.start); + let (dst_stage, dst_access) = + conv::map_acceleration_structure_usage_to_barrier(barrier.usage.end); + + unsafe { + self.device.raw.cmd_pipeline_barrier( + self.active, + src_stage | vk::PipelineStageFlags::TOP_OF_PIPE, + dst_stage | vk::PipelineStageFlags::BOTTOM_OF_PIPE, + vk::DependencyFlags::empty(), + &[vk::MemoryBarrier::builder() + .src_access_mask(src_access) + .dst_access_mask(dst_access) + .build()], + &[], + &[], + ) + }; + } // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 70dbb5714d..826da91416 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -517,6 +517,16 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { if usage.contains(crate::BufferUses::INDIRECT) { flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; } + if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } flags } @@ -569,6 +579,15 @@ pub fn map_buffer_usage_to_barrier( stages |= vk::PipelineStageFlags::DRAW_INDIRECT; access |= vk::AccessFlags::INDIRECT_COMMAND_READ; } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + ) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR + | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } (stages, access) } @@ -720,6 +739,7 @@ pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + wgt::BindingType::AccelerationStructure => vk::DescriptorType::ACCELERATION_STRUCTURE_KHR, } } @@ -851,3 +871,95 @@ pub fn map_pipeline_statistics( } flags } + +pub fn map_acceleration_structure_format( + format: crate::AccelerationStructureFormat, +) -> vk::AccelerationStructureTypeKHR { + match format { + crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + crate::AccelerationStructureFormat::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + } +} + +pub fn map_acceleration_structure_build_mode( + format: crate::AccelerationStructureBuildMode, +) -> vk::BuildAccelerationStructureModeKHR { + match format { + crate::AccelerationStructureBuildMode::Build => { + vk::BuildAccelerationStructureModeKHR::BUILD + } + crate::AccelerationStructureBuildMode::Update => { + vk::BuildAccelerationStructureModeKHR::UPDATE + } + } +} + +pub fn map_acceleration_structure_flags( + flags: crate::AccelerationStructureBuildFlags, +) -> vk::BuildAccelerationStructureFlagsKHR { + let mut vk_flags = vk::BuildAccelerationStructureFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_TRACE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_BUILD) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_BUILD; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_UPDATE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_UPDATE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::LOW_MEMORY) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_COMPACTION) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_COMPACTION + } + + vk_flags +} + +pub fn map_acceleration_structure_geomety_flags( + flags: crate::AccelerationStructureGeometryFlags, +) -> vk::GeometryFlagsKHR { + let mut vk_flags = vk::GeometryFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureGeometryFlags::OPAQUE) { + vk_flags |= vk::GeometryFlagsKHR::OPAQUE; + } + + if flags.contains(crate::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION) { + vk_flags |= vk::GeometryFlagsKHR::NO_DUPLICATE_ANY_HIT_INVOCATION; + } + + vk_flags +} + +pub fn map_acceleration_structure_usage_to_barrier( + usage: crate::AccelerationStructureUses, +) -> (vk::PipelineStageFlags, vk::AccessFlags) { + let mut stages = vk::PipelineStageFlags::empty(); + let mut access = vk::AccessFlags::empty(); + + if usage.contains(crate::AccelerationStructureUses::BUILD_INPUT) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } + if usage.contains(crate::AccelerationStructureUses::BUILD_OUTPUT) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } + if usage.contains(crate::AccelerationStructureUses::SHADER_INPUT) { + stages |= vk::PipelineStageFlags::VERTEX_SHADER + | vk::PipelineStageFlags::FRAGMENT_SHADER + | vk::PipelineStageFlags::COMPUTE_SHADER; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR; + } + + (stages, access) +} diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 28ff5b1265..0caf1aa1c2 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -863,12 +863,21 @@ impl crate::Device for super::Device { desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), ); + let alignment_mask = if desc.usage.intersects( + crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + 16 + } else { + req.alignment + } - 1; + let block = unsafe { self.mem_allocator.lock().alloc( &*self.shared, gpu_alloc::Request { size: req.size, - align_mask: req.alignment - 1, + align_mask: alignment_mask, usage: alloc_usage, memory_types: req.memory_type_bits & self.valid_ash_memory_types, }, @@ -1256,6 +1265,9 @@ impl crate::Device for super::Device { wgt::BindingType::StorageTexture { .. } => { desc_count.storage_image += count; } + wgt::BindingType::AccelerationStructure => { + desc_count.acceleration_structure += count; + } } } @@ -1430,6 +1442,10 @@ impl crate::Device for super::Device { let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); let mut image_infos = Vec::with_capacity(desc.textures.len()); + let mut acceleration_structure_infos = + Vec::with_capacity(desc.acceleration_structures.len()); + let mut raw_acceleration_structures = + Vec::with_capacity(desc.acceleration_structures.len()); for entry in desc.entries { let (ty, size) = desc.layout.types[entry.binding as usize]; if size == 0 { @@ -1439,6 +1455,9 @@ impl crate::Device for super::Device { .dst_set(*set.raw()) .dst_binding(entry.binding) .descriptor_type(ty); + + let mut extra_descriptor_count = 0; + write = match ty { vk::DescriptorType::SAMPLER => { let index = sampler_infos.len(); @@ -1489,9 +1508,44 @@ impl crate::Device for super::Device { )); write.buffer_info(&buffer_infos[index..]) } + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { + let index = acceleration_structure_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + + let raw_start = raw_acceleration_structures.len(); + + raw_acceleration_structures.extend( + desc.acceleration_structures[start as usize..end as usize] + .iter() + .map(|acceleration_structure| acceleration_structure.raw), + ); + + let acceleration_structure_info = + vk::WriteDescriptorSetAccelerationStructureKHR::builder() + .acceleration_structures(&raw_acceleration_structures[raw_start..]); + + // todo: Dereference the struct to get around lifetime issues. Safe as long as we never resize + // `raw_acceleration_structures`. + let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; + + assert!( + index < desc.acceleration_structures.len(), + "Encountered more acceleration structures then expected" + ); + acceleration_structure_infos.push(acceleration_structure_info); + + extra_descriptor_count += 1; + + write.push_next(&mut acceleration_structure_infos[index]) + } _ => unreachable!(), }; - writes.push(write.build()); + + let mut write = write.build(); + write.descriptor_count += extra_descriptor_count; + + writes.push(write); } unsafe { self.shared.raw.update_descriptor_sets(&writes, &[]) }; @@ -2024,6 +2078,231 @@ impl crate::Device for super::Device { } } } + + unsafe fn get_acceleration_structure_build_sizes<'a>( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor<'a, super::Api>, + ) -> crate::AccelerationStructureBuildSizes { + const CAPACITY: usize = 8; + + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let (geometries, primitive_counts) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::default(); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: instance_data, + }); + + ( + smallvec::smallvec![*geometry], + smallvec::smallvec![instances.count], + ) + } + crate::AccelerationStructureEntries::Triangles(ref in_geometries) => { + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); + + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let pritive_count = if let Some(ref indices) = triangles.indices { + triangle_data = + triangle_data.index_type(conv::map_index_format(indices.format)); + indices.count / 3 + } else { + triangles.vertex_count + }; + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(conv::map_acceleration_structure_geomety_flags( + triangles.flags, + )); + + geometries.push(*geometry); + primitive_counts.push(pritive_count); + } + (geometries, primitive_counts) + } + crate::AccelerationStructureEntries::AABBs(ref in_geometries) => { + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .stride(aabb.stride); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { aabbs: *aabbs_data }) + .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); + + geometries.push(*geometry); + primitive_counts.push(aabb.count); + } + (geometries, primitive_counts) + } + }; + + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(ty) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(&geometries); + + let raw = unsafe { + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &primitive_counts, + ) + }; + + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: raw.acceleration_structure_size, + update_scratch_size: raw.update_scratch_size, + build_scratch_size: raw.build_scratch_size, + } + } + + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + unsafe { + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) + } + } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let vk_buffer_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + unsafe { + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; + + self.shared + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } + + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); + + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; + + if let Some(label) = desc.label { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); + } + + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) + } + } + + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + let ray_tracing_functions = self + .shared + .extension_fns + .ray_tracing + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + unsafe { + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } + } } impl From for crate::DeviceError { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 843e4ef36f..45deda5d5b 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -72,6 +72,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = AccelerationStructure; type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; @@ -193,6 +194,12 @@ enum ExtensionFn { struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, + ray_tracing: Option, +} + +struct RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure, + buffer_device_address: khr::BufferDeviceAddress, } /// Set of internal capabilities, which don't show up in the exposed @@ -358,6 +365,13 @@ pub struct Buffer { block: Option>>, } +#[derive(Debug)] +pub struct AccelerationStructure { + raw: vk::AccelerationStructureKHR, + buffer: vk::Buffer, + block: Mutex>, +} + #[derive(Debug)] pub struct Texture { raw: vk::Image, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 560a061355..dd9f906746 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -764,7 +764,6 @@ bitflags::bitflags! { /// /// This is a native only feature. const VERTEX_ATTRIBUTE_64BIT = 1 << 53; - /// Allows vertex shaders to have outputs which are not consumed /// by the fragment shader. /// @@ -773,7 +772,6 @@ bitflags::bitflags! { /// - Metal /// - OpenGL const SHADER_UNUSED_VERTEX_OUTPUT = 1 << 54; - /// Allows for creation of textures of format [`TextureFormat::NV12`] /// /// Supported platforms: @@ -782,11 +780,25 @@ bitflags::bitflags! { /// /// This is a native only feature. const TEXTURE_FORMAT_NV12 = 1 << 55; + /// Allows for the creation of ray-tracing acceleration structures. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_TRACING_ACCELERATION_STRUCTURE = 1 << 56; - // 55..59 available + // 57 available // Shader: + /// Allows for the creation of ray-tracing queries within shaders. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_QUERY = 1 << 58; /// Enables 64-bit floating point types in SPIR-V shaders. /// /// Note: even when supported by GPU hardware, 64-bit floating point operations are @@ -825,7 +837,6 @@ bitflags::bitflags! { /// /// This is a native only feature. const SHADER_EARLY_DEPTH_TEST = 1 << 62; - /// Allows two outputs from a shader to be used for blending. /// Note that dual-source blending doesn't support multiple render targets. /// @@ -6175,6 +6186,21 @@ pub enum BindingType { /// Dimension of the texture view that is going to be sampled. view_dimension: TextureViewDimension, }, + + /// A ray-tracing acceleration structure binding. + /// + /// Example WGSL syntax: + /// ```rust,ignore + /// @group(0) @binding(0) + /// var as: acceleration_structure; + /// ``` + /// + /// Example GLSL syntax: + /// ```cpp,ignore + /// layout(binding = 0) + /// uniform accelerationStructureEXT as; + /// ``` + AccelerationStructure, } impl BindingType { @@ -6825,6 +6851,36 @@ impl Default for InstanceDescriptor { } } +bitflags::bitflags!( + /// Flags for acceleration structures + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct AccelerationStructureFlags: u8 { + /// Allow for incremental updates (no change in size) + const ALLOW_UPDATE = 1 << 0; + /// Allow the acceleration structure to be compacted in a copy operation + const ALLOW_COMPACTION = 1 << 1; + /// Optimize for fast ray tracing performance + const PREFER_FAST_TRACE = 1 << 2; + /// Optimize for fast build time + const PREFER_FAST_BUILD = 1 << 3; + /// Optimize for low memory footprint (scratch and output) + const LOW_MEMORY = 1 << 4; + } +); +impl_bitflags!(AccelerationStructureFlags); + +bitflags::bitflags!( + /// Flags for acceleration structure geometries + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] + pub struct AccelerationStructureGeometryFlags: u8 { + /// Is OPAQUE + const OPAQUE = 1 << 0; + /// NO_DUPLICATE_ANY_HIT_INVOCATION + const NO_DUPLICATE_ANY_HIT_INVOCATION = 1 << 1; + } +); +impl_bitflags!(AccelerationStructureGeometryFlags); + pub use send_sync::*; #[doc(hidden)] diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index b9bb04ffb0..1faf25bfee 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -1584,6 +1584,7 @@ impl crate::context::Context for Context { storage_texture.view_dimension(map_texture_view_dimension(view_dimension)); mapped_entry.storage_texture(&storage_texture); } + wgt::BindingType::AccelerationStructure => todo!(), } mapped_entry