From cd16e71654ef7acf0979c113198a660a3d519f26 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Sat, 3 Sep 2022 15:28:00 +0200 Subject: [PATCH 01/33] Initial commit --- wgpu-hal/examples/halmark/main.rs | 8 ++++ wgpu-hal/src/empty.rs | 4 ++ wgpu-hal/src/gles/device.rs | 6 +++ wgpu-hal/src/gles/mod.rs | 1 + wgpu-hal/src/lib.rs | 18 ++++++++ wgpu-hal/src/vulkan/adapter.rs | 21 +++++++++ wgpu-hal/src/vulkan/device.rs | 71 +++++++++++++++++++++++++++++++ wgpu-hal/src/vulkan/mod.rs | 9 ++++ 8 files changed, 138 insertions(+) diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 383efcdc53..1fa19bfd07 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -345,6 +345,14 @@ impl Example { }; let sampler = unsafe { device.create_sampler(&sampler_desc).unwrap() }; + let accel = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("my as"), + size: 1024, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + }; + let globals = Globals { // cgmath::ortho() projection mvp: [ diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 0c546469b2..59eae2dedf 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -28,6 +28,7 @@ impl crate::Api for Api { type Sampler = Resource; type QuerySet = Resource; type Fence = Resource; + type AccelerationStructure = Resource; type BindGroupLayout = Resource; type BindGroup = Resource; @@ -118,6 +119,9 @@ impl crate::Device for Context { unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { Ok(Resource) } + unsafe fn create_acceleration_structure(&self, desc: &crate::AccelerationStructureDescriptor) -> DeviceResult { + Ok(Resource) + } unsafe fn destroy_buffer(&self, buffer: Resource) {} unsafe fn map_buffer( &self, diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 04ecdffe02..b8fd7d3842 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -455,6 +455,12 @@ impl crate::Device for super::Device { data, }) } + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<(), crate::DeviceError> { + unimplemented!() + } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { if let Some(raw) = buffer.raw { let gl = &self.shared.context.lock(); diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 717502f2c6..a688f230e7 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -112,6 +112,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = (); type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 02d3c13af1..88d0a848fc 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -172,6 +172,8 @@ pub trait Api: Clone + Sized { type ShaderModule: fmt::Debug + Send + Sync; type RenderPipeline: Send + Sync; type ComputePipeline: Send + Sync; + + type AccelerationStructure: fmt::Debug + Send + Sync + 'static; } pub trait Instance: Sized + Send + Sync { @@ -236,6 +238,9 @@ pub trait Device: Send + Sync { /// /// The initial usage is `BufferUses::empty()`. unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result; + + unsafe fn create_acceleration_structure(&self, desc: &AccelerationStructureDescriptor) -> Result; + unsafe fn destroy_buffer(&self, buffer: A::Buffer); //TODO: clarify if zero-sized mapping is allowed unsafe fn map_buffer( @@ -810,6 +815,19 @@ pub struct BufferDescriptor<'a> { pub memory_flags: MemoryFlags, } +#[derive(Clone, Debug)] +pub struct AccelerationStructureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub format: AccelerationStructureFormat, +} + +#[derive(Clone, Debug)] +pub enum AccelerationStructureFormat { + TopLevel, + BottomLevel, +} + #[derive(Clone, Debug)] pub struct TextureDescriptor<'a> { pub label: Label<'a>, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 0a3afb690e..94ca7e0d13 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -27,6 +27,7 @@ pub struct PhysicalDeviceFeatures { vk::PhysicalDeviceShaderFloat16Int8Features, vk::PhysicalDevice16BitStorageFeatures, )>, + acceleration_structure: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -65,6 +66,9 @@ impl PhysicalDeviceFeatures { info = info.push_next(f16_i8_feature); info = info.push_next(_16bit_feature); } + if let Some(ref mut feature) = self.acceleration_structure { + info = info.push_next(feature); + } info } @@ -295,6 +299,12 @@ impl PhysicalDeviceFeatures { } else { None }, + acceleration_structure: if true { + Some(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(true).build()) + } else { + None + } } } @@ -579,6 +589,11 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDrawIndirectCountFn::name()); } + if true { + extensions.push(vk::KhrDeferredHostOperationsFn::name()); + extensions.push(vk::KhrAccelerationStructureFn::name()); + } + if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { extensions.push(vk::ExtConservativeRasterizationFn::name()); } @@ -1098,6 +1113,11 @@ impl super::Adapter { } else { None }; + let acceleration_structure_fn = if enabled_extensions.contains(&khr::AccelerationStructure::name()) { + Some(khr::AccelerationStructure::new(&self.instance.raw, &raw_device)) + } else { + None + }; let naga_options = { use naga::back::spv; @@ -1190,6 +1210,7 @@ impl super::Adapter { extension_fns: super::DeviceExtensionFunctions { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, + acceleration_structure: acceleration_structure_fn, }, vendor_id: self.phd_capabilities.properties.vendor_id, timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index b9d74e36e3..5f9a326608 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -808,6 +808,77 @@ impl crate::Device for super::Device { block: Mutex::new(block), }) } + + unsafe fn create_acceleration_structure(&self, desc: &crate::AccelerationStructureDescriptor) -> Result { + let functor = match self.shared.extension_fns.acceleration_structure { + Some(ref functor) => { + functor + } + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let vk_buffer_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage( + vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS + ) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + dbg!(&req); + + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; + + self.shared + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } + + let ty = match desc.format { + crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + crate::AccelerationStructureFormat::BottomLevel => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(256) + .size(desc.size / 2) + .ty(ty).build(); + + dbg!(&vk_info); + + let raw_acceleration_structure = functor.create_acceleration_structure( + &vk_info, + None, + )?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::ACCELERATION_STRUCTURE_KHR, raw_acceleration_structure, label); + } + + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { self.shared.raw.destroy_buffer(buffer.raw, None); self.mem_allocator diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index d3416a50ed..443d7f1422 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -65,6 +65,7 @@ impl crate::Api for Api { type Sampler = Sampler; type QuerySet = QuerySet; type Fence = Fence; + type AccelerationStructure = AccelerationStructure; type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; @@ -147,6 +148,7 @@ enum ExtensionFn { struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, + acceleration_structure: Option, } /// Set of internal capabilities, which don't show up in the exposed @@ -344,6 +346,13 @@ pub struct Buffer { block: Mutex>, } +#[derive(Debug)] +pub struct AccelerationStructure { + raw: vk::AccelerationStructureKHR, + buffer: vk::Buffer, + block: Mutex>, +} + #[derive(Debug)] pub struct Texture { raw: vk::Image, From ae4dcfed87df8e0b5fd21ffea0645c925c3201de Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Sun, 11 Sep 2022 20:53:15 +0200 Subject: [PATCH 02/33] Woo! building an AS works --- wgpu-hal/examples/halmark/main.rs | 131 +++++++++++++++++++++++++-- wgpu-hal/src/empty.rs | 28 +++++- wgpu-hal/src/lib.rs | 61 ++++++++++++- wgpu-hal/src/vulkan/adapter.rs | 89 ++++++++++++++++--- wgpu-hal/src/vulkan/command.rs | 51 +++++++++++ wgpu-hal/src/vulkan/conv.rs | 30 +++++++ wgpu-hal/src/vulkan/device.rs | 143 ++++++++++++++++++++++++------ wgpu-hal/src/vulkan/mod.rs | 1 + 8 files changed, 486 insertions(+), 48 deletions(-) diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 1fa19bfd07..ef92b30f7f 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -252,7 +252,9 @@ impl Example { let staging_buffer_desc = hal::BufferDescriptor { label: Some("stage"), size: texture_data.len() as wgt::BufferAddress, - usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::COPY_SRC + | hal::BufferUses::BUFFER_DEVICE_ADDRESS, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }; let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; @@ -269,6 +271,110 @@ impl Example { assert!(mapping.is_coherent); } + let triangle: [f32; 9] = [0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 1.0, 0.0, 0.0]; + + let triangle_size = std::mem::size_of::<[f32; 9]>(); + + dbg!(&triangle_size); + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size = std::mem::size_of::<[u32; 3]>(); + + let triangle_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("t buf"), + size: triangle_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap() + }; + + let i_buf = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("i buf"), + size: indices_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap() + }; + + unsafe { + let mapping = device + .map_buffer(&triangle_buffer, 0..triangle_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + triangle.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + triangle_size, + ); + device.unmap_buffer(&staging_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + unsafe { + let mapping = device.map_buffer(&i_buf, 0..indices_size as u64).unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size, + ); + device.unmap_buffer(&staging_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + let geometry = hal::AccelerationStructureGeometry::Triangles { + vertex_buffer: &triangle_buffer, + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: 3, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureGeometryIndices { + buffer: &i_buf, + format: wgt::IndexFormat::Uint32, + }), + }; + + let sizes = unsafe { + device.get_acceleration_structure_build_size( + &geometry, + hal::AccelerationStructureFormat::BottomLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 1, + ) + }; + + dbg!(&sizes); + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("my as"), + size: sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: sizes.build_scratch_size, + usage: hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::STORAGE_READ_WRITE, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + let texture_desc = hal::TextureDescriptor { label: None, size: wgt::Extent3d { @@ -291,6 +397,21 @@ impl Example { }; let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + // todo: extract out bytes from transmission renderer example and try those. + cmd_encoder.build_acceleration_structures( + &geometry, + hal::AccelerationStructureFormat::BottomLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 1, + 0, + &blas, + &scratch_buffer, + ); + } + { let buffer_barrier = hal::BufferBarrier { buffer: &staging_buffer, @@ -345,14 +466,6 @@ impl Example { }; let sampler = unsafe { device.create_sampler(&sampler_desc).unwrap() }; - let accel = unsafe { - device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { - label: Some("my as"), - size: 1024, - format: hal::AccelerationStructureFormat::BottomLevel, - }) - }; - let globals = Globals { // cgmath::ortho() projection mvp: [ diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 59eae2dedf..8119af97c3 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -119,9 +119,22 @@ impl crate::Device for Context { unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { Ok(Resource) } - unsafe fn create_acceleration_structure(&self, desc: &crate::AccelerationStructureDescriptor) -> DeviceResult { + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> DeviceResult { Ok(Resource) } + unsafe fn get_acceleration_structure_build_size( + &self, + geometry: &crate::AccelerationStructureGeometry, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + ) -> crate::AccelerationStructureBuildSizes { + Default::default() + } unsafe fn destroy_buffer(&self, buffer: Resource) {} unsafe fn map_buffer( &self, @@ -395,4 +408,17 @@ impl crate::CommandEncoder for Encoder { unsafe fn dispatch(&mut self, count: [u32; 3]) {} unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} + + unsafe fn build_acceleration_structures( + &mut self, + geometry: &crate::AccelerationStructureGeometry, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + primitive_offset: u32, + destination_acceleration_structure: &Resource, + scratch_buffer: &Resource, + ) { + } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 88d0a848fc..e475c5748a 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -239,7 +239,19 @@ pub trait Device: Send + Sync { /// The initial usage is `BufferUses::empty()`. unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result; - unsafe fn create_acceleration_structure(&self, desc: &AccelerationStructureDescriptor) -> Result; + unsafe fn create_acceleration_structure( + &self, + desc: &AccelerationStructureDescriptor, + ) -> Result; + + unsafe fn get_acceleration_structure_build_size( + &self, + geometry: &AccelerationStructureGeometry, + format: AccelerationStructureFormat, + mode: AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + ) -> AccelerationStructureBuildSizes; unsafe fn destroy_buffer(&self, buffer: A::Buffer); //TODO: clarify if zero-sized mapping is allowed @@ -528,6 +540,18 @@ pub trait CommandEncoder: Send + Sync { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); + + unsafe fn build_acceleration_structures( + &mut self, + geometry: &crate::AccelerationStructureGeometry, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + primitive_offset: u32, + destination_acceleration_structure: &A::AccelerationStructure, + scratch_buffer: &A::Buffer, + ); } bitflags!( @@ -664,6 +688,8 @@ bitflags::bitflags! { const STORAGE_READ_WRITE = 1 << 8; /// The indirect or count buffer in a indirect draw or dispatch. const INDIRECT = 1 << 9; + const BUFFER_DEVICE_ADDRESS = 1 << 10; + const ACCELERATION_STRUCTURE_BUILD_INPUT = 1 << 11; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits | Self::COPY_SRC.bits | Self::INDEX.bits | Self::VERTEX.bits | Self::UNIFORM.bits | @@ -822,12 +848,25 @@ pub struct AccelerationStructureDescriptor<'a> { pub format: AccelerationStructureFormat, } -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] pub enum AccelerationStructureFormat { TopLevel, BottomLevel, } +#[derive(Clone, Debug)] +pub enum AccelerationStructureBuildMode { + Build, + Update, +} + +#[derive(Clone, Debug, Default)] +pub struct AccelerationStructureBuildSizes { + pub acceleration_structure_size: wgt::BufferAddress, + pub update_scratch_size: wgt::BufferAddress, + pub build_scratch_size: wgt::BufferAddress, +} + #[derive(Clone, Debug)] pub struct TextureDescriptor<'a> { pub label: Label<'a>, @@ -1115,6 +1154,24 @@ pub struct BufferCopy { pub size: wgt::BufferSize, } +pub enum AccelerationStructureGeometry<'a, A: Api> { + Triangles { + vertex_buffer: &'a A::Buffer, + vertex_format: wgt::VertexFormat, + max_vertex: u32, + vertex_stride: wgt::BufferAddress, + indices: Option>, + }, + Instances { + buffer: &'a A::Buffer, + }, +} + +pub struct AccelerationStructureGeometryIndices<'a, A: Api> { + pub format: wgt::IndexFormat, + pub buffer: &'a A::Buffer, +} + #[derive(Clone, Debug)] pub struct TextureCopyBase { pub mip_level: u32, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 94ca7e0d13..c5f75864a9 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -28,6 +28,8 @@ pub struct PhysicalDeviceFeatures { vk::PhysicalDevice16BitStorageFeatures, )>, acceleration_structure: Option, + buffer_device_address: Option, + ray_query: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -69,6 +71,12 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.acceleration_structure { info = info.push_next(feature); } + if let Some(ref mut feature) = self.buffer_device_address { + info = info.push_next(feature); + } + if let Some(ref mut feature) = self.ray_query { + info = info.push_next(feature); + } info } @@ -299,12 +307,37 @@ impl PhysicalDeviceFeatures { } else { None }, - acceleration_structure: if true { - Some(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() - .acceleration_structure(true).build()) + acceleration_structure: if enabled_extensions + .contains(&vk::KhrAccelerationStructureFn::name()) + { + Some( + vk::PhysicalDeviceAccelerationStructureFeaturesKHR::builder() + .acceleration_structure(true) + .build(), + ) } else { None - } + }, + buffer_device_address: if enabled_extensions + .contains(&vk::KhrBufferDeviceAddressFn::name()) + { + Some( + vk::PhysicalDeviceBufferDeviceAddressFeaturesKHR::builder() + .buffer_device_address(true) + .build(), + ) + } else { + None + }, + ray_query: if enabled_extensions.contains(&vk::KhrRayQueryFn::name()) { + Some( + vk::PhysicalDeviceRayQueryFeaturesKHR::builder() + .ray_query(true) + .build(), + ) + } else { + None + }, } } @@ -516,11 +549,12 @@ impl PhysicalDeviceFeatures { } /// Information gathered about a physical device capabilities. -#[derive(Default)] +#[derive(Default, Debug)] pub struct PhysicalDeviceCapabilities { supported_extensions: Vec, properties: vk::PhysicalDeviceProperties, descriptor_indexing: Option, + acceleration_structure: Option, } // This is safe because the structs have `p_next: *mut c_void`, which we null out/never read. @@ -592,6 +626,8 @@ impl PhysicalDeviceCapabilities { if true { extensions.push(vk::KhrDeferredHostOperationsFn::name()); extensions.push(vk::KhrAccelerationStructureFn::name()); + extensions.push(vk::KhrBufferDeviceAddressFn::name()); + extensions.push(vk::KhrRayQueryFn::name()); } if requested_features.contains(wgt::Features::CONSERVATIVE_RASTERIZATION) { @@ -752,6 +788,9 @@ impl super::InstanceShared { let supports_descriptor_indexing = capabilities.supports_extension(vk::ExtDescriptorIndexingFn::name()); + let supports_acceleration_structure = + capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); + let mut builder = vk::PhysicalDeviceProperties2::builder(); if supports_descriptor_indexing { @@ -761,6 +800,13 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if supports_acceleration_structure { + let next = capabilities + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructurePropertiesKHR::default()); + builder = builder.push_next(next); + } + let mut properties2 = builder.build(); unsafe { get_device_properties.get_physical_device_properties2(phd, &mut properties2); @@ -846,6 +892,12 @@ impl super::InstanceShared { builder = builder.push_next(&mut next.0); builder = builder.push_next(&mut next.1); } + if capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()) { + let next = features + .acceleration_structure + .insert(vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default()); + builder = builder.push_next(next); + } let mut features2 = builder.build(); unsafe { @@ -1113,11 +1165,24 @@ impl super::Adapter { } else { None }; - let acceleration_structure_fn = if enabled_extensions.contains(&khr::AccelerationStructure::name()) { - Some(khr::AccelerationStructure::new(&self.instance.raw, &raw_device)) - } else { - None - }; + let acceleration_structure_fn = + if enabled_extensions.contains(&khr::AccelerationStructure::name()) { + Some(khr::AccelerationStructure::new( + &self.instance.raw, + &raw_device, + )) + } else { + None + }; + let buffer_device_address_fn = + if enabled_extensions.contains(&khr::BufferDeviceAddress::name()) { + Some(khr::BufferDeviceAddress::new( + &self.instance.raw, + &raw_device, + )) + } else { + None + }; let naga_options = { use naga::back::spv; @@ -1211,6 +1276,7 @@ impl super::Adapter { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, acceleration_structure: acceleration_structure_fn, + buffer_device_address: buffer_device_address_fn, }, vendor_id: self.phd_capabilities.properties.vendor_id, timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, @@ -1259,7 +1325,8 @@ impl super::Adapter { size: memory_heap.size, }) .collect(), - buffer_device_address: false, + buffer_device_address: enabled_extensions + .contains(&khr::BufferDeviceAddress::name()), }; gpu_alloc::GpuAllocator::new(config, properties) }; diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index e225ca8356..641145dc94 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -341,6 +341,57 @@ impl crate::CommandEncoder for super::CommandEncoder { ); } + unsafe fn build_acceleration_structures( + &mut self, + geometry: &crate::AccelerationStructureGeometry, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + primitive_offset: u32, + destination_acceleration_structure: &super::AccelerationStructure, + scratch_buffer: &super::Buffer, + ) { + let extension = match self.device.extension_fns.acceleration_structure { + Some(ref extension) => extension, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let bda_extension = match self.device.extension_fns.buffer_device_address { + Some(ref extension) => extension, + None => panic!("Feature `BDA` not enabled"), + }; + + let geometry = + super::device::map_acceleration_structure_geometry(geometry, &bda_extension).build(); + + let geometries = &[geometry]; + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(primitive_count) + .primitive_offset(primitive_offset) + .build(); + + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_format(format)) + .mode(conv::map_acceleration_structure_build_mode(mode)) + .flags(vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE) + .geometries(geometries) + .dst_acceleration_structure(destination_acceleration_structure.raw) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: bda_extension.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(scratch_buffer.raw), + ), + }) + .build(); + + let range = &[range][..]; + let range = &[range][..]; + let geometry_info = &[geometry_info]; + + extension.cmd_build_acceleration_structures(self.active, geometry_info, range); + } + // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index dc5b915970..95ff907f83 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -491,6 +491,12 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { if usage.contains(crate::BufferUses::INDIRECT) { flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; } + if usage.contains(crate::BufferUses::BUFFER_DEVICE_ADDRESS) { + flags |= vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } + if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT) { + flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR; + } flags } @@ -823,3 +829,27 @@ pub fn map_pipeline_statistics( } flags } + +pub fn map_acceleration_structure_format( + format: crate::AccelerationStructureFormat, +) -> vk::AccelerationStructureTypeKHR { + match format { + crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, + crate::AccelerationStructureFormat::BottomLevel => { + vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL + } + } +} + +pub fn map_acceleration_structure_build_mode( + format: crate::AccelerationStructureBuildMode, +) -> vk::BuildAccelerationStructureModeKHR { + match format { + crate::AccelerationStructureBuildMode::Build => { + vk::BuildAccelerationStructureModeKHR::BUILD + } + crate::AccelerationStructureBuildMode::Update => { + vk::BuildAccelerationStructureModeKHR::UPDATE + } + } +} diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 5f9a326608..79a12e689b 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -809,11 +809,53 @@ impl crate::Device for super::Device { }) } - unsafe fn create_acceleration_structure(&self, desc: &crate::AccelerationStructureDescriptor) -> Result { - let functor = match self.shared.extension_fns.acceleration_structure { - Some(ref functor) => { - functor - } + unsafe fn get_acceleration_structure_build_size( + &self, + geometry: &crate::AccelerationStructureGeometry, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: (), + primitive_count: u32, + ) -> crate::AccelerationStructureBuildSizes { + let extension = match self.shared.extension_fns.acceleration_structure { + Some(ref extension) => extension, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let bda_extension = match self.shared.extension_fns.buffer_device_address { + Some(ref extension) => extension, + None => panic!("Feature `BDA` not enabled"), + }; + + let geometry = map_acceleration_structure_geometry(geometry, &bda_extension); + + let geometries = &[*geometry]; + + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_format(format)) + .mode(conv::map_acceleration_structure_build_mode(mode)) + .flags(vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE) + .geometries(geometries); + + let raw = extension.get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &[primitive_count], + ); + + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: raw.acceleration_structure_size, + update_scratch_size: raw.update_scratch_size, + build_scratch_size: raw.build_scratch_size, + } + } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + let extension = match self.shared.extension_fns.acceleration_structure { + Some(ref extension) => extension, None => panic!("Feature `RAY_TRACING` not enabled"), }; @@ -821,14 +863,12 @@ impl crate::Device for super::Device { .size(desc.size) .usage( vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR - | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS, ) .sharing_mode(vk::SharingMode::EXCLUSIVE); let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); - - dbg!(&req); let block = self.mem_allocator.lock().alloc( &*self.shared, @@ -849,27 +889,20 @@ impl crate::Device for super::Device { .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); } - let ty = match desc.format { - crate::AccelerationStructureFormat::TopLevel => vk::AccelerationStructureTypeKHR::TOP_LEVEL, - crate::AccelerationStructureFormat::BottomLevel => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, - }; - let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() - .buffer(raw_buffer) - .offset(256) - .size(desc.size / 2) - .ty(ty).build(); - - dbg!(&vk_info); + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); - let raw_acceleration_structure = functor.create_acceleration_structure( - &vk_info, - None, - )?; + let raw_acceleration_structure = extension.create_acceleration_structure(&vk_info, None)?; if let Some(label) = desc.label { - self.shared - .set_object_name(vk::ObjectType::ACCELERATION_STRUCTURE_KHR, raw_acceleration_structure, label); + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); } Ok(super::AccelerationStructure { @@ -2006,3 +2039,63 @@ impl From for crate::DeviceError { Self::OutOfMemory } } + +pub unsafe fn map_acceleration_structure_geometry<'a>( + geometry: &crate::AccelerationStructureGeometry, + buffer_device_address: &ash::extensions::khr::BufferDeviceAddress, +) -> vk::AccelerationStructureGeometryKHRBuilder<'a> { + match geometry { + crate::AccelerationStructureGeometry::Instances { buffer } => { + let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( + vk::DeviceOrHostAddressConstKHR { + device_address: buffer_device_address.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + }, + ); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + &crate::AccelerationStructureGeometry::Triangles { + vertex_buffer, + vertex_format, + max_vertex, + vertex_stride, + ref indices, + } => { + let mut triangles_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: buffer_device_address.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), + ), + }) + .vertex_format(conv::map_vertex_format(vertex_format)) + .vertex_stride(vertex_stride) + .max_vertex(max_vertex); + + if let Some(indices) = indices { + triangles_data = triangles_data + .index_type(conv::map_index_format(indices.format)) + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: buffer_device_address.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), + ), + }) + } + + let triangles_data = triangles_data.build(); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + } +} diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 443d7f1422..5732fbfb60 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -149,6 +149,7 @@ struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, acceleration_structure: Option, + buffer_device_address: Option, } /// Set of internal capabilities, which don't show up in the exposed From 98cf75b594f378ef6b186c27f955e60469234a73 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Sun, 11 Sep 2022 23:29:49 +0200 Subject: [PATCH 03/33] Fix alignment for amdpro driver --- wgpu-hal/examples/halmark/main.rs | 179 +++++++++++++++++++++++++++++- wgpu-hal/src/empty.rs | 7 ++ wgpu-hal/src/lib.rs | 10 +- wgpu-hal/src/vulkan/conv.rs | 13 ++- wgpu-hal/src/vulkan/device.rs | 47 +++++++- 5 files changed, 245 insertions(+), 11 deletions(-) diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index ef92b30f7f..eeb3838533 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -78,6 +78,8 @@ struct Example { context_index: usize, extent: [u32; 2], start: Instant, + buffers: Vec, + acceleration_structures: Vec, } impl Example { @@ -288,7 +290,7 @@ impl Example { size: triangle_size as u64, usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT, + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) .unwrap() @@ -301,7 +303,7 @@ impl Example { size: indices_size as u64, usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT, + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) .unwrap() @@ -316,7 +318,7 @@ impl Example { mapping.ptr.as_ptr(), triangle_size, ); - device.unmap_buffer(&staging_buffer).unwrap(); + device.unmap_buffer(&triangle_buffer).unwrap(); assert!(mapping.is_coherent); } @@ -327,7 +329,7 @@ impl Example { mapping.ptr.as_ptr(), indices_size, ); - device.unmap_buffer(&staging_buffer).unwrap(); + device.unmap_buffer(&i_buf).unwrap(); assert!(mapping.is_coherent); } @@ -375,6 +377,146 @@ impl Example { .unwrap() }; + #[derive(Clone, Copy)] + struct Vec4 { + x: f32, + y: f32, + z: f32, + w: f32, + } + + struct Mat4 { + rows: [Vec4; 4], + } + + impl Mat4 { + const fn from_translation(x: f32, y: f32, z: f32) -> Self { + Mat4 { + rows: [ + Vec4 { + x: 1.0, + y: 0.0, + z: 0.0, + w: 0.0, + }, + Vec4 { + x: 0.0, + y: 1.0, + z: 0.0, + w: 0.0, + }, + Vec4 { + x: 0.0, + y: 0.0, + z: 1.0, + w: 0.0, + }, + Vec4 { x, y, z, w: 1.0 }, + ], + } + } + } + + fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { + let row_0 = matrix.rows[0]; + let row_1 = matrix.rows[1]; + let row_2 = matrix.rows[2]; + [ + row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, + row_2.y, row_2.z, row_2.w, + ] + } + + fn pack_24_8(low_24: u32, high_8: u8) -> u32 { + (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) + } + + #[derive(Debug)] + #[repr(C)] + struct Instance { + transform: [f32; 12], + instance_custom_index_and_mask: u32, + instance_shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, + } + + let instances = unsafe { + [ + Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_translation(0.0, 0.0, 0.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: device + .get_acceleration_structure_device_address(&blas), + }, + Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_translation(1.0, 1.0, 1.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: device + .get_acceleration_structure_device_address(&blas), + }, + ] + }; + + let instances_buffer_size = instances.len() * std::mem::size_of::(); + + dbg!(&instances_buffer_size); + + let instances_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap() + }; + + unsafe { + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + let instance_geometry: hal::AccelerationStructureGeometry = + hal::AccelerationStructureGeometry::Instances { + buffer: &instances_buffer, + }; + + let instance_sizes = unsafe { + device.get_acceleration_structure_build_size( + &instance_geometry, + hal::AccelerationStructureFormat::TopLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 2, + ) + }; + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("my tlas"), + size: instance_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + let texture_desc = hal::TextureDescriptor { label: None, size: wgt::Extent3d { @@ -410,6 +552,24 @@ impl Example { &blas, &scratch_buffer, ); + + let as_barrier = hal::BufferBarrier { + buffer: &staging_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(as_barrier)); + + cmd_encoder.build_acceleration_structures( + &instance_geometry, + hal::AccelerationStructureFormat::TopLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 2, + 0, + &tlas, + &scratch_buffer, + ); } { @@ -623,6 +783,8 @@ impl Example { context_index: 0, extent: [window_size.0, window_size.1], start: Instant::now(), + buffers: vec![triangle_buffer, i_buf, scratch_buffer, instances_buffer], + acceleration_structures: vec![blas, tlas], }) } @@ -651,6 +813,15 @@ impl Example { self.device.destroy_buffer(self.global_buffer); self.device.destroy_texture_view(self.texture_view); self.device.destroy_texture(self.texture); + + for buffer in self.buffers.drain(..) { + self.device.destroy_buffer(buffer); + } + + for a_s in self.acceleration_structures.drain(..) { + self.device.destroy_acceleration_structure(a_s); + } + self.device.destroy_sampler(self.sampler); self.device.destroy_shader_module(self.shader); self.device.destroy_render_pipeline(self.pipeline); diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 8119af97c3..904f4d119b 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -135,6 +135,13 @@ impl crate::Device for Context { ) -> crate::AccelerationStructureBuildSizes { Default::default() } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &Resource, + ) -> wgt::BufferAddress { + Default::default() + } + unsafe fn destroy_acceleration_structure(&self, buffer: Resource) {} unsafe fn destroy_buffer(&self, buffer: Resource) {} unsafe fn map_buffer( &self, diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index e475c5748a..212a31d848 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -253,6 +253,13 @@ pub trait Device: Send + Sync { primitive_count: u32, ) -> AccelerationStructureBuildSizes; + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &A::AccelerationStructure, + ) -> wgt::BufferAddress; + + unsafe fn destroy_acceleration_structure(&self, buffer: A::AccelerationStructure); + unsafe fn destroy_buffer(&self, buffer: A::Buffer); //TODO: clarify if zero-sized mapping is allowed unsafe fn map_buffer( @@ -689,7 +696,8 @@ bitflags::bitflags! { /// The indirect or count buffer in a indirect draw or dispatch. const INDIRECT = 1 << 9; const BUFFER_DEVICE_ADDRESS = 1 << 10; - const ACCELERATION_STRUCTURE_BUILD_INPUT = 1 << 11; + const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 11; + const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits | Self::COPY_SRC.bits | Self::INDEX.bits | Self::VERTEX.bits | Self::UNIFORM.bits | diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 95ff907f83..f8d6e68f1f 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -494,7 +494,10 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { if usage.contains(crate::BufferUses::BUFFER_DEVICE_ADDRESS) { flags |= vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } - if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_BUILD_INPUT) { + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR; } flags @@ -549,6 +552,14 @@ pub fn map_buffer_usage_to_barrier( stages |= vk::PipelineStageFlags::DRAW_INDIRECT; access |= vk::AccessFlags::INDIRECT_COMMAND_READ; } + if usage.intersects( + crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + ) { + stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; + access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR + | vk::AccessFlags::ACCELERATION_STRUCTURE_WRITE_KHR; + } (stages, access) } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 79a12e689b..a0cfe725d9 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -788,7 +788,14 @@ impl crate::Device for super::Device { &*self.shared, gpu_alloc::Request { size: req.size, - align_mask: req.alignment - 1, + align_mask: if desc + .usage + .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT) + { + 16 + } else { + req.alignment + } - 1, usage: alloc_usage, memory_types: req.memory_type_bits & self.valid_ash_memory_types, }, @@ -850,6 +857,21 @@ impl crate::Device for super::Device { } } + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + let extension = match self.shared.extension_fns.acceleration_structure { + Some(ref extension) => extension, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + extension.get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) + } + unsafe fn create_acceleration_structure( &self, desc: &crate::AccelerationStructureDescriptor, @@ -861,10 +883,7 @@ impl crate::Device for super::Device { let vk_buffer_info = vk::BufferCreateInfo::builder() .size(desc.size) - .usage( - vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR - | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS, - ) + .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) .sharing_mode(vk::SharingMode::EXCLUSIVE); let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; @@ -919,6 +938,24 @@ impl crate::Device for super::Device { .dealloc(&*self.shared, buffer.block.into_inner()); } + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + let extension = match self.shared.extension_fns.acceleration_structure { + Some(ref extension) => extension, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + extension.destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } + unsafe fn map_buffer( &self, buffer: &super::Buffer, From 6655d2380495302a460e20692b6f91c80c1ffdb0 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Mon, 12 Sep 2022 23:26:26 +0200 Subject: [PATCH 04/33] Ray tracing works in shaders! --- Cargo.lock | 9 +- wgpu-hal/Cargo.toml | 1 + wgpu-hal/examples/halmark/main.rs | 296 +------ wgpu-hal/examples/ray-traced-triangle/main.rs | 829 ++++++++++++++++++ .../examples/ray-traced-triangle/shader.comp | 44 + .../ray-traced-triangle/shader.comp.spv | Bin 0 -> 3520 bytes wgpu-hal/src/empty.rs | 4 +- wgpu-hal/src/lib.rs | 20 +- wgpu-hal/src/vulkan/command.rs | 61 +- wgpu-hal/src/vulkan/conv.rs | 1 + wgpu-hal/src/vulkan/device.rs | 141 +-- wgpu-types/src/lib.rs | 2 + 12 files changed, 1035 insertions(+), 373 deletions(-) create mode 100644 wgpu-hal/examples/ray-traced-triangle/main.rs create mode 100644 wgpu-hal/examples/ray-traced-triangle/shader.comp create mode 100644 wgpu-hal/examples/ray-traced-triangle/shader.comp.spv diff --git a/Cargo.lock b/Cargo.lock index 7b48ee4aad..7fea60ce6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -814,6 +814,12 @@ version = "0.20.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43e957e744be03f5801a55472f593d43fabdebf25a4585db250f04d86b1675f" +[[package]] +name = "glam" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518faa5064866338b013ff9b2350dc318e14cc4fcd6cb8206d7e7c9886c98815" + [[package]] name = "glow" version = "0.11.2" @@ -2352,7 +2358,7 @@ dependencies = [ "ddsfile", "env_logger", "futures-intrusive", - "glam", + "glam 0.20.5", "js-sys", "log", "naga", @@ -2414,6 +2420,7 @@ dependencies = [ "env_logger", "foreign-types 0.3.2", "fxhash", + "glam 0.21.3", "glow", "glutin", "gpu-alloc", diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index b89f400537..9fcc70e8b6 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -109,6 +109,7 @@ features = ["wgsl-in"] [dev-dependencies] env_logger = "0.9" winit = "0.27.1" # for "halmark" example +glam = "0.21.3" # for ray-traced-triangle example [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] glutin = "0.28.0" # for "gles" example diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index eeb3838533..bd8196d8dc 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -78,8 +78,6 @@ struct Example { context_index: usize, extent: [u32; 2], start: Instant, - buffers: Vec, - acceleration_structures: Vec, } impl Example { @@ -254,9 +252,7 @@ impl Example { let staging_buffer_desc = hal::BufferDescriptor { label: Some("stage"), size: texture_data.len() as wgt::BufferAddress, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::COPY_SRC - | hal::BufferUses::BUFFER_DEVICE_ADDRESS, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }; let staging_buffer = unsafe { device.create_buffer(&staging_buffer_desc).unwrap() }; @@ -273,250 +269,6 @@ impl Example { assert!(mapping.is_coherent); } - let triangle: [f32; 9] = [0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 1.0, 0.0, 0.0]; - - let triangle_size = std::mem::size_of::<[f32; 9]>(); - - dbg!(&triangle_size); - - let indices: [u32; 3] = [0, 1, 2]; - - let indices_size = std::mem::size_of::<[u32; 3]>(); - - let triangle_buffer = unsafe { - device - .create_buffer(&hal::BufferDescriptor { - label: Some("t buf"), - size: triangle_size as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, - memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, - }) - .unwrap() - }; - - let i_buf = unsafe { - device - .create_buffer(&hal::BufferDescriptor { - label: Some("i buf"), - size: indices_size as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, - memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, - }) - .unwrap() - }; - - unsafe { - let mapping = device - .map_buffer(&triangle_buffer, 0..triangle_size as u64) - .unwrap(); - ptr::copy_nonoverlapping( - triangle.as_ptr() as *const u8, - mapping.ptr.as_ptr(), - triangle_size, - ); - device.unmap_buffer(&triangle_buffer).unwrap(); - assert!(mapping.is_coherent); - } - - unsafe { - let mapping = device.map_buffer(&i_buf, 0..indices_size as u64).unwrap(); - ptr::copy_nonoverlapping( - indices.as_ptr() as *const u8, - mapping.ptr.as_ptr(), - indices_size, - ); - device.unmap_buffer(&i_buf).unwrap(); - assert!(mapping.is_coherent); - } - - let geometry = hal::AccelerationStructureGeometry::Triangles { - vertex_buffer: &triangle_buffer, - vertex_format: wgt::VertexFormat::Float32x3, - max_vertex: 3, - vertex_stride: 3 * 4, - indices: Some(hal::AccelerationStructureGeometryIndices { - buffer: &i_buf, - format: wgt::IndexFormat::Uint32, - }), - }; - - let sizes = unsafe { - device.get_acceleration_structure_build_size( - &geometry, - hal::AccelerationStructureFormat::BottomLevel, - hal::AccelerationStructureBuildMode::Build, - (), - 1, - ) - }; - - dbg!(&sizes); - - let blas = unsafe { - device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { - label: Some("my as"), - size: sizes.acceleration_structure_size, - format: hal::AccelerationStructureFormat::BottomLevel, - }) - } - .unwrap(); - - let scratch_buffer = unsafe { - device - .create_buffer(&hal::BufferDescriptor { - label: Some("scratch buffer"), - size: sizes.build_scratch_size, - usage: hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::STORAGE_READ_WRITE, - memory_flags: hal::MemoryFlags::empty(), - }) - .unwrap() - }; - - #[derive(Clone, Copy)] - struct Vec4 { - x: f32, - y: f32, - z: f32, - w: f32, - } - - struct Mat4 { - rows: [Vec4; 4], - } - - impl Mat4 { - const fn from_translation(x: f32, y: f32, z: f32) -> Self { - Mat4 { - rows: [ - Vec4 { - x: 1.0, - y: 0.0, - z: 0.0, - w: 0.0, - }, - Vec4 { - x: 0.0, - y: 1.0, - z: 0.0, - w: 0.0, - }, - Vec4 { - x: 0.0, - y: 0.0, - z: 1.0, - w: 0.0, - }, - Vec4 { x, y, z, w: 1.0 }, - ], - } - } - } - - fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { - let row_0 = matrix.rows[0]; - let row_1 = matrix.rows[1]; - let row_2 = matrix.rows[2]; - [ - row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, - row_2.y, row_2.z, row_2.w, - ] - } - - fn pack_24_8(low_24: u32, high_8: u8) -> u32 { - (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) - } - - #[derive(Debug)] - #[repr(C)] - struct Instance { - transform: [f32; 12], - instance_custom_index_and_mask: u32, - instance_shader_binding_table_record_offset_and_flags: u32, - acceleration_structure_reference: u64, - } - - let instances = unsafe { - [ - Instance { - transform: transpose_matrix_for_acceleration_structure_instance( - Mat4::from_translation(0.0, 0.0, 0.0), - ), - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: device - .get_acceleration_structure_device_address(&blas), - }, - Instance { - transform: transpose_matrix_for_acceleration_structure_instance( - Mat4::from_translation(1.0, 1.0, 1.0), - ), - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: device - .get_acceleration_structure_device_address(&blas), - }, - ] - }; - - let instances_buffer_size = instances.len() * std::mem::size_of::(); - - dbg!(&instances_buffer_size); - - let instances_buffer = unsafe { - device - .create_buffer(&hal::BufferDescriptor { - label: Some("instances_buffer"), - size: instances_buffer_size as u64, - usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, - memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, - }) - .unwrap() - }; - - unsafe { - let mapping = device - .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) - .unwrap(); - ptr::copy_nonoverlapping( - instances.as_ptr() as *const u8, - mapping.ptr.as_ptr(), - instances_buffer_size, - ); - device.unmap_buffer(&instances_buffer).unwrap(); - assert!(mapping.is_coherent); - } - - let instance_geometry: hal::AccelerationStructureGeometry = - hal::AccelerationStructureGeometry::Instances { - buffer: &instances_buffer, - }; - - let instance_sizes = unsafe { - device.get_acceleration_structure_build_size( - &instance_geometry, - hal::AccelerationStructureFormat::TopLevel, - hal::AccelerationStructureBuildMode::Build, - (), - 2, - ) - }; - - let tlas = unsafe { - device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { - label: Some("my tlas"), - size: instance_sizes.acceleration_structure_size, - format: hal::AccelerationStructureFormat::TopLevel, - }) - } - .unwrap(); - let texture_desc = hal::TextureDescriptor { label: None, size: wgt::Extent3d { @@ -539,39 +291,6 @@ impl Example { }; let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; - - unsafe { - // todo: extract out bytes from transmission renderer example and try those. - cmd_encoder.build_acceleration_structures( - &geometry, - hal::AccelerationStructureFormat::BottomLevel, - hal::AccelerationStructureBuildMode::Build, - (), - 1, - 0, - &blas, - &scratch_buffer, - ); - - let as_barrier = hal::BufferBarrier { - buffer: &staging_buffer, - usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT - ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, - }; - cmd_encoder.transition_buffers(iter::once(as_barrier)); - - cmd_encoder.build_acceleration_structures( - &instance_geometry, - hal::AccelerationStructureFormat::TopLevel, - hal::AccelerationStructureBuildMode::Build, - (), - 2, - 0, - &tlas, - &scratch_buffer, - ); - } - { let buffer_barrier = hal::BufferBarrier { buffer: &staging_buffer, @@ -696,6 +415,7 @@ impl Example { buffers: &[global_buffer_binding], samplers: &[&sampler], textures: &[texture_binding], + acceleration_structures: &[], entries: &[ hal::BindGroupEntry { binding: 0, @@ -729,6 +449,7 @@ impl Example { buffers: &[local_buffer_binding], samplers: &[], textures: &[], + acceleration_structures: &[], entries: &[hal::BindGroupEntry { binding: 0, resource_index: 0, @@ -783,8 +504,6 @@ impl Example { context_index: 0, extent: [window_size.0, window_size.1], start: Instant::now(), - buffers: vec![triangle_buffer, i_buf, scratch_buffer, instances_buffer], - acceleration_structures: vec![blas, tlas], }) } @@ -813,15 +532,6 @@ impl Example { self.device.destroy_buffer(self.global_buffer); self.device.destroy_texture_view(self.texture_view); self.device.destroy_texture(self.texture); - - for buffer in self.buffers.drain(..) { - self.device.destroy_buffer(buffer); - } - - for a_s in self.acceleration_structures.drain(..) { - self.device.destroy_acceleration_structure(a_s); - } - self.device.destroy_sampler(self.sampler); self.device.destroy_shader_module(self.shader); self.device.destroy_render_pipeline(self.pipeline); diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs new file mode 100644 index 0000000000..93f62eac04 --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -0,0 +1,829 @@ +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, +}; + +use glam::{Mat4, Vec3}; +use std::{ + borrow::{Borrow, Cow}, + iter, mem, + mem::{align_of, size_of}, + ptr::{self, copy_nonoverlapping}, + time::Instant, +}; + +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_FRAMES: u32 = 3; + +struct ExecutionContext { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec, + used_cmd_bufs: Vec, + frames_recorded: usize, +} + +impl ExecutionContext { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + + contexts: Vec>, + context_index: usize, + extent: [u32; 2], + start: Instant, + pipeline: A::ComputePipeline, + bind_group: A::BindGroup, + //local_group: A::BindGroup, + //global_group_layout: A::BindGroupLayout, + //local_group_layout: A::BindGroupLayout, + pipeline_layout: A::PipelineLayout, + /*shader: A::ShaderModule, + pipeline: A::RenderPipeline, + bunnies: Vec, + local_buffer: A::Buffer, + local_alignment: u32, + global_buffer: A::Buffer, + sampler: A::Sampler, + */ + texture: A::Texture, + /*texture_view: A::TextureView, + contexts: Vec>, + context_index: usize, + extent: [u32; 2], + start: Instant, + buffers: Vec, + acceleration_structures: Vec,*/ +} + +impl Example { + fn init(window: &winit::window::Window) -> Result { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: if cfg!(debug_assertions) { + hal::InstanceFlags::all() + } else { + hal::InstanceFlags::empty() + }, + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let mut surface = unsafe { instance.create_surface(window).unwrap() }; + + let (adapter, _capabilities) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + return Err(hal::InstanceError); + } + let exposed = adapters.swap_remove(0); + (exposed.adapter, exposed.capabilities) + }; + let surface_caps = + unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?; + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, mut queue } = unsafe { + adapter + .open(wgt::Features::empty(), &wgt::Limits::default()) + .unwrap() + }; + + let window_size: (u32, u32) = window.inner_size().into(); + let surface_config = hal::SurfaceConfiguration { + swap_chain_size: DESIRED_FRAMES + .max(*surface_caps.swap_chain_sizes.start()) + .min(*surface_caps.swap_chain_sizes.end()), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: hal::CompositeAlphaMode::Opaque, + format: wgt::TextureFormat::Rgba8Unorm, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + #[allow(dead_code)] + struct Uniforms { + view_inverse: glam::Mat4, + proj_inverse: glam::Mat4, + } + + let bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::WriteOnly, + format: wgt::TextureFormat::Rgba8Unorm, + view_dimension: wgt::TextureViewDimension::D2, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::COMPUTE, + ty: wgt::BindingType::AccelerationStructure, + count: None, + }, + ], + }; + + let bgl = unsafe { device.create_bind_group_layout(&bgl_desc).unwrap() }; + + pub fn make_spirv_raw(data: &[u8]) -> Cow<[u32]> { + const MAGIC_NUMBER: u32 = 0x0723_0203; + assert_eq!( + data.len() % size_of::(), + 0, + "data size is not a multiple of 4" + ); + + //If the data happens to be aligned, directly use the byte array, + // otherwise copy the byte array in an owned vector and use that instead. + let words = if data.as_ptr().align_offset(align_of::()) == 0 { + let (pre, words, post) = unsafe { data.align_to::() }; + debug_assert!(pre.is_empty()); + debug_assert!(post.is_empty()); + Cow::from(words) + } else { + let mut words = vec![0u32; data.len() / size_of::()]; + unsafe { + copy_nonoverlapping(data.as_ptr(), words.as_mut_ptr() as *mut u8, data.len()); + } + Cow::from(words) + }; + + assert_eq!( + words[0], MAGIC_NUMBER, + "wrong magic word {:x}. Make sure you are using a binary SPIRV file.", + words[0] + ); + + words + } + + let shader = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.comp.spv"))), + ) + .unwrap() + }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let pipeline = unsafe { + device.create_compute_pipeline(&hal::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: &pipeline_layout, + stage: hal::ProgrammableStage { + module: &shader, + entry_point: "main", + }, + }) + } + .unwrap(); + + let vertices: [f32; 9] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 0.0]; + + let vertices_size_in_bytes = vertices.len() * 4; + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size_in_bytes = indices.len() * 4; + + let transform_matrix = [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + + let vertices_buffer = unsafe { + let vertices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("vertices buffer"), + size: vertices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&vertices_buffer, 0..vertices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + vertices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + vertices_size_in_bytes, + ); + device.unmap_buffer(&vertices_buffer).unwrap(); + assert!(mapping.is_coherent); + + vertices_buffer + }; + + let indices_buffer = unsafe { + let indices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("indices buffer"), + size: indices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&indices_buffer, 0..indices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size_in_bytes, + ); + device.unmap_buffer(&indices_buffer).unwrap(); + assert!(mapping.is_coherent); + + indices_buffer + }; + + let blas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::AccelerationStructureGeometryInfo::Triangles { + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: 3, + index_format: Some(wgt::IndexFormat::Uint32), + }, + hal::AccelerationStructureFormat::BottomLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 1, + ) + }; + + let tlas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::AccelerationStructureGeometryInfo::Instances, + hal::AccelerationStructureFormat::TopLevel, + hal::AccelerationStructureBuildMode::Build, + (), + 1, + ) + }; + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("tlas"), + size: tlas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + + let uniforms = { + let view = Mat4::look_at_rh(Vec3::new(0.0, 0.0, 2.5), Vec3::ZERO, Vec3::Y); + let proj = Mat4::perspective_rh(59.0_f32.to_radians(), 1.0, 0.001, 1000.0); + + Uniforms { + view_inverse: view.inverse(), + proj_inverse: proj.inverse(), + } + }; + + let uniforms_size = std::mem::size_of::(); + + let uniform_buffer = unsafe { + let uniform_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("uniform buffer"), + size: uniforms_size as u64, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&uniform_buffer, 0..uniforms_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + &uniforms as *const Uniforms as *const u8, + mapping.ptr.as_ptr(), + uniforms_size, + ); + device.unmap_buffer(&uniform_buffer).unwrap(); + assert!(mapping.is_coherent); + uniform_buffer + }; + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8Unorm, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + memory_flags: hal::MemoryFlags::empty(), + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let bind_group = { + let buffer_binding = hal::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::STORAGE_READ_WRITE, + }; + let group_desc = hal::BindGroupDescriptor { + label: Some("bind group"), + layout: &bgl, + buffers: &[buffer_binding], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[&tlas], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&group_desc).unwrap() } + }; + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: blas_sizes + .build_scratch_size + .max(tlas_sizes.build_scratch_size), + usage: hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::STORAGE_READ_WRITE, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + + fn pack_24_8(low_24: u32, high_8: u8) -> u32 { + (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) + } + + #[derive(Debug)] + #[repr(C)] + struct Instance { + transform: [f32; 12], + instance_custom_index_and_mask: u32, + instance_shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, + } + + fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { + let row_0 = matrix.row(0); + let row_1 = matrix.row(1); + let row_2 = matrix.row(2); + [ + row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, + row_2.y, row_2.z, row_2.w, + ] + } + + let instances = [ + Instance { + transform: transform_matrix, + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + }, + Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_rotation_y(1.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + }, + Instance { + transform: transpose_matrix_for_acceleration_structure_instance( + Mat4::from_rotation_y(-1.0), + ), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + device.get_acceleration_structure_device_address(&blas) + }, + }, + ]; + + let instances_buffer_size = instances.len() * std::mem::size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BUFFER_DEVICE_ADDRESS + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + + instances_buffer + }; + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + cmd_encoder.build_acceleration_structures( + &hal::AccelerationStructureGeometry::Triangles { + vertex_buffer: &vertices_buffer, + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureGeometryIndices { + buffer: &indices_buffer, + format: wgt::IndexFormat::Uint32, + }), + }, + hal::AccelerationStructureFormat::BottomLevel, + hal::AccelerationStructureBuildMode::Build, + (), + indices.len() as u32 / 3, + 0, + &blas, + &scratch_buffer, + ); + + let as_barrier = hal::BufferBarrier { + buffer: &scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(as_barrier)); + + cmd_encoder.build_acceleration_structures( + &hal::AccelerationStructureGeometry::Instances { + buffer: &instances_buffer, + }, + hal::AccelerationStructureFormat::TopLevel, + hal::AccelerationStructureBuildMode::Build, + (), + instances.len() as u32, + 0, + &tlas, + &scratch_buffer, + ); + + let texture_barrier = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::STORAGE_READ_WRITE, + }; + + cmd_encoder.transition_textures(iter::once(texture_barrier)); + } + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Self { + instance, + adapter, + surface, + surface_format: surface_config.format, + device, + queue, + pipeline, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + pipeline_layout, + bind_group, + texture, + }) + } + + fn update(&mut self, _event: winit::event::WindowEvent) {} + + fn render(&mut self) { + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COPY_DST, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + unsafe { + ctx.encoder + .begin_compute_pass(&hal::ComputePassDescriptor { label: None }); + ctx.encoder.set_compute_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.bind_group, &[]); + ctx.encoder.dispatch([512 / 8, 512 / 8, 1]); + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::PRESENT, + }; + let target_barrier2 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::STORAGE_READ_WRITE..hal::TextureUses::COPY_SRC, + }; + let target_barrier3 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_SRC..hal::TextureUses::STORAGE_READ_WRITE, + }; + unsafe { + ctx.encoder.end_compute_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier2)); + ctx.encoder.copy_texture_to_texture( + &self.texture, + hal::TextureUses::COPY_SRC, + &surface_tex.borrow(), + std::iter::once(hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + dst_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 512, + height: 512, + depth: 1, + }, + }), + ); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + ctx.encoder.transition_textures(iter::once(target_barrier3)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue.present(&mut self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } +} + +#[cfg(all(feature = "metal"))] +type Api = hal::api::Metal; +#[cfg(all(feature = "vulkan", not(feature = "metal")))] +type Api = hal::api::Vulkan; +#[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] +type Api = hal::api::Gles; +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] +type Api = hal::api::Empty; + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-bunnymark") + .with_inner_size(winit::dpi::PhysicalSize { + width: 512, + height: 512, + }) + .build(&event_loop) + .unwrap(); + + let example_result = Example::::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + event_loop.run(move |event, _, control_flow| { + let _ = &window; // force ownership by the closure + *control_flow = winit::event_loop::ControlFlow::Poll; + match event { + winit::event::Event::RedrawEventsCleared => { + window.request_redraw(); + } + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Escape), + state: winit::event::ElementState::Pressed, + .. + }, + .. + } + | winit::event::WindowEvent::CloseRequested => { + *control_flow = winit::event_loop::ControlFlow::Exit; + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::RedrawRequested(_) => { + let ex = example.as_mut().unwrap(); + + ex.render(); + } + winit::event::Event::LoopDestroyed => { + //example.take().unwrap().exit(); + } + _ => {} + } + }); +} diff --git a/wgpu-hal/examples/ray-traced-triangle/shader.comp b/wgpu-hal/examples/ray-traced-triangle/shader.comp new file mode 100644 index 0000000000..d31f29115f --- /dev/null +++ b/wgpu-hal/examples/ray-traced-triangle/shader.comp @@ -0,0 +1,44 @@ +#version 460 +#extension GL_EXT_ray_query : enable + +layout(set = 0, binding = 0) uniform Uniforms +{ + mat4 viewInverse; + mat4 projInverse; +} cam; +layout(set = 0, binding = 1, rgba8) uniform image2D image; +layout(set = 0, binding = 2) uniform accelerationStructureEXT tlas; + +layout(local_size_x = 8, local_size_y = 8) in; + +void main() +{ + uvec2 launch_id = gl_GlobalInvocationID.xy; + uvec2 launch_size = gl_NumWorkGroups.xy * 8; + + const vec2 pixelCenter = vec2(launch_id) + vec2(0.5); + const vec2 inUV = pixelCenter/vec2(launch_size); + vec2 d = inUV * 2.0 - 1.0; + + vec4 origin = cam.viewInverse * vec4(0,0,0,1); + vec4 target = cam.projInverse * vec4(d.x, d.y, 1, 1) ; + vec4 direction = cam.viewInverse*vec4(normalize(target.xyz), 0) ; + + float tmin = 0.001; + float tmax = 10000.0; + + rayQueryEXT rayQuery; + rayQueryInitializeEXT(rayQuery, tlas, gl_RayFlagsOpaqueEXT, 0xff, origin.xyz, tmin, direction.xyz, tmax); + + rayQueryProceedEXT(rayQuery); + + vec3 out_colour = vec3(0.0, 0.0, 0.0); + + if (rayQueryGetIntersectionTypeEXT(rayQuery, true) == gl_RayQueryCommittedIntersectionTriangleEXT ) { + vec2 barycentrics = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true); + + out_colour = vec3(barycentrics.x, barycentrics.y, 1.0 - barycentrics.x - barycentrics.y); + } + + imageStore(image, ivec2(launch_id), vec4(out_colour, 1.0)); +} \ No newline at end of file diff --git a/wgpu-hal/examples/ray-traced-triangle/shader.comp.spv b/wgpu-hal/examples/ray-traced-triangle/shader.comp.spv new file mode 100644 index 0000000000000000000000000000000000000000..345085c948d1fab7ad065d6d92437f40c1c57440 GIT binary patch literal 3520 zcmZ9O`F0dV6o)&>B#}ihYym|KizpHiWfcU%IsqaCLc$_443i-lm`sOG0tvDNL5=@- zj<4af_zFIU%Q+tXzV0p1UGiqyz7DsPIKS$ZbckJG?(+bO?Hbv@N9(6|T=mjuQ zt~KPBf_GAMq&!nFdj{*Z@z;v{9izY;#evevE0JY9<$r%$B*_GtO5oT=KwDmk6$+Q>OKGjTai?iQ1HW^#%! zeC^RXz%ldnb^#DgVwriC$wg=m9Lr&;ljMVQ}rr-L`Cx0-v?YUgA?V0v={N}C%)?GuDHOL-d z?e3I&HMeJ!&70*K>p3!x^wN3b-v@HS`o?SpjJ{xhoSO4ToMSt>F$LiIdy&o|zZQE( zuIt}`?Rm)w<98zMjlBEQ_8jHx=NHHx&;p!adlzt+zZ+?6Hcz(ReL5Mvti*Nf2XXfd;&mi@AcA+Poppru+(7>itfC8kqvnkvUAOD&rK++9!v2-p zShewYz&c*rM@aYL-kOKK4QKl}cNpt>wEadL_Tw4S^D)PH7b6#d_fy!rz1*`<6Ep4I zUod|6*7P3x_PhRIFK@EF!+w9kukD-NY;S3@y^p@z!LRK*+iVw`Y|nh?4cj+1#E+qm zH`&e~=k_7i@1EZVezWJvGH+SV$vf=vVRr65^@TitJN>pp-U@U%;V^HIPFZsm2z`B# zbF1OZ(Pw@8CTD$f-UZG%iQSESFXx_-ue~AXd;`q4&vNGL?*Ve5-bQri2=z9h%Pr*f ze7k#qy!AXMxlr#zbh%&h_>a)#wgUI*oZEokt9YJspP(D7&$^zs+)@0#2YY~T$S&Yq zz72bU{IA6LCVZK5`;pf0j>v@?U!hyW_aW3cgf4H4_oNRv_hDd+-_KFdm&z>WIJz;% zK!`bkE;?Ag-p&Dij=-Hq5AlH;%sFd@JzhXJ-+2A|K-l9&bk}T*dn_X5 z)cqEHQ`Lu&Bfx!KLw*ac=eGMS0nah)^JdP;JHkH0Ih5h7ANq6)-MK@41zk=!-pn6` zb4_zyi_-Vo^&A5?z)fI%YZ&*qtB}68x6!Wx_cM;Hfu~*m2Np$lfj;ltZ}hy1tOI%L z{z*;)>G+2p=$im?zM<+1-Pw0Di5_Y>SBxze?(RKwx#qi@V9SS?sXWHHL(its~LuEg$k8qMIu}nBUFs&|RnN&>#9J|2I7dy->Pl zf2*E?C%N5;oCk}*BmB>h--CHzyx+%j;Ms)x{37QNFS#Fb&N~s}e$2U6WVrV~p_^~K Te)~{i9@pf&`v2u-?Fat@FKh for Context { ) -> DeviceResult { Ok(Resource) } - unsafe fn get_acceleration_structure_build_size( + unsafe fn get_acceleration_structure_build_sizes( &self, - geometry: &crate::AccelerationStructureGeometry, + geometry_info: &crate::AccelerationStructureGeometryInfo, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, flags: (), diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 212a31d848..44d7860e10 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -244,9 +244,9 @@ pub trait Device: Send + Sync { desc: &AccelerationStructureDescriptor, ) -> Result; - unsafe fn get_acceleration_structure_build_size( + unsafe fn get_acceleration_structure_build_sizes( &self, - geometry: &AccelerationStructureGeometry, + geometry_info: &AccelerationStructureGeometryInfo, format: AccelerationStructureFormat, mode: AccelerationStructureBuildMode, flags: (), @@ -550,9 +550,9 @@ pub trait CommandEncoder: Send + Sync { unsafe fn build_acceleration_structures( &mut self, - geometry: &crate::AccelerationStructureGeometry, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, + geometry: &AccelerationStructureGeometry, + format: AccelerationStructureFormat, + mode: AccelerationStructureBuildMode, flags: (), primitive_count: u32, primitive_offset: u32, @@ -993,6 +993,7 @@ pub struct BindGroupDescriptor<'a, A: Api> { pub samplers: &'a [&'a A::Sampler], pub textures: &'a [TextureBinding<'a, A>], pub entries: &'a [BindGroupEntry], + pub acceleration_structures: &'a [&'a A::AccelerationStructure], } #[derive(Clone, Debug)] @@ -1162,6 +1163,15 @@ pub struct BufferCopy { pub size: wgt::BufferSize, } +pub enum AccelerationStructureGeometryInfo { + Triangles { + vertex_format: wgt::VertexFormat, + max_vertex: u32, + index_format: Option, + }, + Instances, +} + pub enum AccelerationStructureGeometry<'a, A: Api> { Triangles { vertex_buffer: &'a A::Buffer, diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 641145dc94..e3073b0643 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -362,17 +362,70 @@ impl crate::CommandEncoder for super::CommandEncoder { None => panic!("Feature `BDA` not enabled"), }; - let geometry = - super::device::map_acceleration_structure_geometry(geometry, &bda_extension).build(); + let geometry = match geometry { + crate::AccelerationStructureGeometry::Instances { buffer } => { + let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( + vk::DeviceOrHostAddressConstKHR { + device_address: bda_extension.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + }, + ); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + &crate::AccelerationStructureGeometry::Triangles { + vertex_buffer, + vertex_format, + max_vertex, + vertex_stride, + ref indices, + } => { + let mut triangles_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: bda_extension.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), + ), + }) + .vertex_format(conv::map_vertex_format(vertex_format)) + .vertex_stride(vertex_stride) + .max_vertex(max_vertex); + + if let Some(indices) = indices { + triangles_data = triangles_data + .index_type(conv::map_index_format(indices.format)) + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: bda_extension.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), + ), + }) + } + + let triangles_data = triangles_data.build(); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + }; - let geometries = &[geometry]; + let geometries = &[*geometry]; let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() .primitive_count(primitive_count) .primitive_offset(primitive_offset) .build(); - let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() .ty(conv::map_acceleration_structure_format(format)) .mode(conv::map_acceleration_structure_build_mode(mode)) .flags(vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE) diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index f8d6e68f1f..e169b4ad29 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -713,6 +713,7 @@ pub fn map_binding_type(ty: wgt::BindingType) -> vk::DescriptorType { wgt::BindingType::Sampler { .. } => vk::DescriptorType::SAMPLER, wgt::BindingType::Texture { .. } => vk::DescriptorType::SAMPLED_IMAGE, wgt::BindingType::StorageTexture { .. } => vk::DescriptorType::STORAGE_IMAGE, + wgt::BindingType::AccelerationStructure => vk::DescriptorType::ACCELERATION_STRUCTURE_KHR, } } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a0cfe725d9..47c20726fc 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -816,9 +816,9 @@ impl crate::Device for super::Device { }) } - unsafe fn get_acceleration_structure_build_size( + unsafe fn get_acceleration_structure_build_sizes( &self, - geometry: &crate::AccelerationStructureGeometry, + geometry_info: &crate::AccelerationStructureGeometryInfo, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, flags: (), @@ -829,12 +829,40 @@ impl crate::Device for super::Device { None => panic!("Feature `RAY_TRACING` not enabled"), }; - let bda_extension = match self.shared.extension_fns.buffer_device_address { - Some(ref extension) => extension, - None => panic!("Feature `BDA` not enabled"), - }; + let geometry = match geometry_info { + crate::AccelerationStructureGeometryInfo::Instances => { + let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); - let geometry = map_acceleration_structure_geometry(geometry, &bda_extension); + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + &crate::AccelerationStructureGeometryInfo::Triangles { + vertex_format, + max_vertex, + index_format, + } => { + let mut triangles_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(vertex_format)) + .max_vertex(max_vertex); + + if let Some(index_format) = index_format { + triangles_data = + triangles_data.index_type(conv::map_index_format(index_format)); + } + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + }; let geometries = &[*geometry]; @@ -1277,6 +1305,9 @@ impl crate::Device for super::Device { wgt::BindingType::StorageTexture { .. } => { desc_count.storage_image += count; } + wgt::BindingType::AccelerationStructure => { + desc_count.acceleration_structure += count; + } } } @@ -1475,6 +1506,10 @@ impl crate::Device for super::Device { let mut buffer_infos = Vec::with_capacity(desc.buffers.len()); let mut sampler_infos = Vec::with_capacity(desc.samplers.len()); let mut image_infos = Vec::with_capacity(desc.textures.len()); + let mut acceleration_structure_infos = + Vec::with_capacity(desc.acceleration_structures.len()); + let mut raw_acceleration_structures = + Vec::with_capacity(desc.acceleration_structures.len()); for entry in desc.entries { let (ty, size) = desc.layout.types[entry.binding as usize]; if size == 0 { @@ -1484,6 +1519,9 @@ impl crate::Device for super::Device { .dst_set(*set.raw()) .dst_binding(entry.binding) .descriptor_type(ty); + + let mut extra_descriptor_count = 0; + write = match ty { vk::DescriptorType::SAMPLER => { let index = sampler_infos.len(); @@ -1532,9 +1570,36 @@ impl crate::Device for super::Device { )); write.buffer_info(&buffer_infos[index..]) } + vk::DescriptorType::ACCELERATION_STRUCTURE_KHR => { + let index = acceleration_structure_infos.len(); + let start = entry.resource_index; + let end = start + entry.count; + + let raw_start = raw_acceleration_structures.len(); + + raw_acceleration_structures.extend( + desc.acceleration_structures[start as usize..end as usize] + .iter() + .map(|acceleration_structure| acceleration_structure.raw), + ); + + acceleration_structure_infos.push( + // todo: this dereference to build the struct is a hack to get around lifetime issues. + *vk::WriteDescriptorSetAccelerationStructureKHR::builder() + .acceleration_structures(&raw_acceleration_structures[raw_start..]), + ); + + extra_descriptor_count += 1; + + write.push_next(&mut acceleration_structure_infos[index]) + } _ => unreachable!(), }; - writes.push(write.build()); + + let mut write = write.build(); + write.descriptor_count += extra_descriptor_count; + + writes.push(write); } self.shared.raw.update_descriptor_sets(&writes, &[]); @@ -2076,63 +2141,3 @@ impl From for crate::DeviceError { Self::OutOfMemory } } - -pub unsafe fn map_acceleration_structure_geometry<'a>( - geometry: &crate::AccelerationStructureGeometry, - buffer_device_address: &ash::extensions::khr::BufferDeviceAddress, -) -> vk::AccelerationStructureGeometryKHRBuilder<'a> { - match geometry { - crate::AccelerationStructureGeometry::Instances { buffer } => { - let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( - vk::DeviceOrHostAddressConstKHR { - device_address: buffer_device_address.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), - ), - }, - ); - - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::INSTANCES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instances, - }) - .flags(vk::GeometryFlagsKHR::empty()) - } - &crate::AccelerationStructureGeometry::Triangles { - vertex_buffer, - vertex_format, - max_vertex, - vertex_stride, - ref indices, - } => { - let mut triangles_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: buffer_device_address.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), - ), - }) - .vertex_format(conv::map_vertex_format(vertex_format)) - .vertex_stride(vertex_stride) - .max_vertex(max_vertex); - - if let Some(indices) = indices { - triangles_data = triangles_data - .index_type(conv::map_index_format(indices.format)) - .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: buffer_device_address.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), - ), - }) - } - - let triangles_data = triangles_data.build(); - - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: triangles_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) - } - } -} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 8718d79794..ab6e9a944d 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -4040,6 +4040,8 @@ pub enum BindingType { /// Dimension of the texture view that is going to be sampled. view_dimension: TextureViewDimension, }, + + AccelerationStructure, } impl BindingType { From 2cc43852d4ed64a3412c55a7c5260a14381176da Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Thu, 15 Sep 2022 11:59:05 +0200 Subject: [PATCH 05/33] Major clean up, allow for acceleration structure updates --- wgpu-hal/examples/ray-traced-triangle/main.rs | 219 +++++++++++++----- wgpu-hal/src/empty.rs | 4 +- wgpu-hal/src/lib.rs | 19 +- wgpu-hal/src/vulkan/adapter.rs | 39 ++-- wgpu-hal/src/vulkan/command.rs | 62 +++-- wgpu-hal/src/vulkan/conv.rs | 34 ++- wgpu-hal/src/vulkan/device.rs | 64 ++--- wgpu-hal/src/vulkan/mod.rs | 8 +- wgpu-types/src/lib.rs | 15 ++ 9 files changed, 321 insertions(+), 143 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 93f62eac04..a4ae25e41c 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -16,6 +16,29 @@ use std::{ const COMMAND_BUFFER_PER_CONTEXT: usize = 100; const DESIRED_FRAMES: u32 = 3; +fn pack_24_8(low_24: u32, high_8: u8) -> u32 { + (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) +} + +#[derive(Debug)] +#[repr(C)] +struct Instance { + transform: [f32; 12], + instance_custom_index_and_mask: u32, + instance_shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, +} + +fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { + let row_0 = matrix.row(0); + let row_1 = matrix.row(1); + let row_2 = matrix.row(2); + [ + row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, row_2.y, + row_2.z, row_2.w, + ] +} + struct ExecutionContext { encoder: A::CommandEncoder, fence: A::Fence, @@ -51,26 +74,20 @@ struct Example { start: Instant, pipeline: A::ComputePipeline, bind_group: A::BindGroup, - //local_group: A::BindGroup, - //global_group_layout: A::BindGroupLayout, - //local_group_layout: A::BindGroupLayout, + bgl: A::BindGroupLayout, + shader_module: A::ShaderModule, + texture_view: A::TextureView, + uniform_buffer: A::Buffer, pipeline_layout: A::PipelineLayout, - /*shader: A::ShaderModule, - pipeline: A::RenderPipeline, - bunnies: Vec, - local_buffer: A::Buffer, - local_alignment: u32, - global_buffer: A::Buffer, - sampler: A::Sampler, - */ + vertices_buffer: A::Buffer, + indices_buffer: A::Buffer, texture: A::Texture, - /*texture_view: A::TextureView, - contexts: Vec>, - context_index: usize, - extent: [u32; 2], - start: Instant, - buffers: Vec, - acceleration_structures: Vec,*/ + instances: [Instance; 1], + instances_buffer: A::Buffer, + blas: A::AccelerationStructure, + tlas: A::AccelerationStructure, + scratch_buffer: A::Buffer, + time: f32, } impl Example { @@ -86,23 +103,21 @@ impl Example { let instance = unsafe { A::Instance::init(&instance_desc)? }; let mut surface = unsafe { instance.create_surface(window).unwrap() }; - let (adapter, _capabilities) = unsafe { + let (adapter, features) = unsafe { let mut adapters = instance.enumerate_adapters(); if adapters.is_empty() { return Err(hal::InstanceError); } let exposed = adapters.swap_remove(0); - (exposed.adapter, exposed.capabilities) + dbg!(exposed.features); + (exposed.adapter, exposed.features) }; let surface_caps = unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?; log::info!("Surface caps: {:#?}", surface_caps); - let hal::OpenDevice { device, mut queue } = unsafe { - adapter - .open(wgt::Features::empty(), &wgt::Limits::default()) - .unwrap() - }; + let hal::OpenDevice { device, mut queue } = + unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; let window_size: (u32, u32) = window.inner_size().into(); let surface_config = hal::SurfaceConfiguration { @@ -196,7 +211,7 @@ impl Example { words } - let shader = unsafe { + let shader_module = unsafe { device .create_shader_module( &hal::ShaderModuleDescriptor { @@ -225,7 +240,7 @@ impl Example { label: Some("pipeline"), layout: &pipeline_layout, stage: hal::ProgrammableStage { - module: &shader, + module: &shader_module, entry_point: "main", }, }) @@ -248,7 +263,6 @@ impl Example { label: Some("vertices buffer"), size: vertices_size_in_bytes as u64, usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) @@ -274,7 +288,6 @@ impl Example { label: Some("indices buffer"), size: indices_size_in_bytes as u64, usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) @@ -303,17 +316,20 @@ impl Example { }, hal::AccelerationStructureFormat::BottomLevel, hal::AccelerationStructureBuildMode::Build, - (), + hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, 1, ) }; + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + let tlas_sizes = unsafe { device.get_acceleration_structure_build_sizes( &hal::AccelerationStructureGeometryInfo::Instances, hal::AccelerationStructureFormat::TopLevel, hal::AccelerationStructureBuildMode::Build, - (), + tlas_flags, 1, ) }; @@ -441,36 +457,12 @@ impl Example { size: blas_sizes .build_scratch_size .max(tlas_sizes.build_scratch_size), - usage: hal::BufferUses::BUFFER_DEVICE_ADDRESS - | hal::BufferUses::STORAGE_READ_WRITE, + usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, memory_flags: hal::MemoryFlags::empty(), }) .unwrap() }; - fn pack_24_8(low_24: u32, high_8: u8) -> u32 { - (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) - } - - #[derive(Debug)] - #[repr(C)] - struct Instance { - transform: [f32; 12], - instance_custom_index_and_mask: u32, - instance_shader_binding_table_record_offset_and_flags: u32, - acceleration_structure_reference: u64, - } - - fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { - let row_0 = matrix.row(0); - let row_1 = matrix.row(1); - let row_2 = matrix.row(2); - [ - row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, - row_2.y, row_2.z, row_2.w, - ] - } - let instances = [ Instance { transform: transform_matrix, @@ -480,7 +472,7 @@ impl Example { device.get_acceleration_structure_device_address(&blas) }, }, - Instance { + /*Instance { transform: transpose_matrix_for_acceleration_structure_instance( Mat4::from_rotation_y(1.0), ), @@ -499,7 +491,7 @@ impl Example { acceleration_structure_reference: unsafe { device.get_acceleration_structure_device_address(&blas) }, - }, + },*/ ]; let instances_buffer_size = instances.len() * std::mem::size_of::(); @@ -510,7 +502,6 @@ impl Example { label: Some("instances_buffer"), size: instances_buffer_size as u64, usage: hal::BufferUses::MAP_WRITE - | hal::BufferUses::BUFFER_DEVICE_ADDRESS | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, }) @@ -552,7 +543,7 @@ impl Example { }, hal::AccelerationStructureFormat::BottomLevel, hal::AccelerationStructureBuildMode::Build, - (), + hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, indices.len() as u32 / 3, 0, &blas, @@ -572,7 +563,7 @@ impl Example { }, hal::AccelerationStructureFormat::TopLevel, hal::AccelerationStructureBuildMode::Build, - (), + tlas_flags, instances.len() as u32, 0, &tlas, @@ -622,6 +613,18 @@ impl Example { pipeline_layout, bind_group, texture, + instances, + instances_buffer, + blas, + tlas, + scratch_buffer, + time: 0.0, + indices_buffer, + vertices_buffer, + uniform_buffer, + texture_view, + bgl, + shader_module, }) } @@ -637,8 +640,63 @@ impl Example { range: wgt::ImageSubresourceRange::default(), usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, }; + + let instances_buffer_size = self.instances.len() * std::mem::size_of::(); + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + self.time += 1.0 / 60.0; + + self.instances[0] = Instance { + transform: transpose_matrix_for_acceleration_structure_instance(Mat4::from_rotation_y( + self.time, + )), + instance_custom_index_and_mask: pack_24_8(0, 0xff), + instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), + acceleration_structure_reference: unsafe { + self.device + .get_acceleration_structure_device_address(&self.blas) + }, + }; + + unsafe { + let mapping = self + .device + .map_buffer(&self.instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + self.instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + self.device.unmap_buffer(&self.instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + unsafe { ctx.encoder.begin_encoding(Some("frame")).unwrap(); + + ctx.encoder.build_acceleration_structures( + &hal::AccelerationStructureGeometry::Instances { + buffer: &self.instances_buffer, + }, + hal::AccelerationStructureFormat::TopLevel, + hal::AccelerationStructureBuildMode::Build, + tlas_flags, + self.instances.len() as u32, + 0, + &self.tlas, + &self.scratch_buffer, + ); + + let as_barrier = hal::BufferBarrier { + buffer: &self.scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + ctx.encoder.transition_buffers(iter::once(as_barrier)); + ctx.encoder.transition_textures(iter::once(target_barrier0)); } @@ -752,6 +810,43 @@ impl Example { next.fence_value = old_fence_value + 1; } } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.bind_group); + self.device.destroy_buffer(self.scratch_buffer); + self.device.destroy_buffer(self.instances_buffer); + self.device.destroy_buffer(self.indices_buffer); + self.device.destroy_buffer(self.vertices_buffer); + self.device.destroy_buffer(self.uniform_buffer); + self.device.destroy_acceleration_structure(self.tlas); + self.device.destroy_acceleration_structure(self.blas); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_compute_pipeline(self.pipeline); + self.device.destroy_pipeline_layout(self.pipeline_layout); + self.device.destroy_bind_group_layout(self.bgl); + self.device.destroy_shader_module(self.shader_module); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } } #[cfg(all(feature = "metal"))] @@ -821,7 +916,7 @@ fn main() { ex.render(); } winit::event::Event::LoopDestroyed => { - //example.take().unwrap().exit(); + example.take().unwrap().exit(); } _ => {} } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 5d5fa43d22..6448157a18 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -130,7 +130,7 @@ impl crate::Device for Context { geometry_info: &crate::AccelerationStructureGeometryInfo, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, ) -> crate::AccelerationStructureBuildSizes { Default::default() @@ -421,7 +421,7 @@ impl crate::CommandEncoder for Encoder { geometry: &crate::AccelerationStructureGeometry, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, primitive_offset: u32, destination_acceleration_structure: &Resource, diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 44d7860e10..a0c4597254 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -249,7 +249,7 @@ pub trait Device: Send + Sync { geometry_info: &AccelerationStructureGeometryInfo, format: AccelerationStructureFormat, mode: AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, ) -> AccelerationStructureBuildSizes; @@ -553,7 +553,7 @@ pub trait CommandEncoder: Send + Sync { geometry: &AccelerationStructureGeometry, format: AccelerationStructureFormat, mode: AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, primitive_offset: u32, destination_acceleration_structure: &A::AccelerationStructure, @@ -695,7 +695,7 @@ bitflags::bitflags! { const STORAGE_READ_WRITE = 1 << 8; /// The indirect or count buffer in a indirect draw or dispatch. const INDIRECT = 1 << 9; - const BUFFER_DEVICE_ADDRESS = 1 << 10; + const ACCELERATION_STRUCTURE_SCRATCH = 1 << 10; const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 11; const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; /// The combination of states that a buffer may be in _at the same time_. @@ -856,13 +856,13 @@ pub struct AccelerationStructureDescriptor<'a> { pub format: AccelerationStructureFormat, } -#[derive(Clone, Copy, Debug)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum AccelerationStructureFormat { TopLevel, BottomLevel, } -#[derive(Clone, Debug)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum AccelerationStructureBuildMode { Build, Update, @@ -1190,6 +1190,15 @@ pub struct AccelerationStructureGeometryIndices<'a, A: Api> { pub buffer: &'a A::Buffer, } +bitflags!( + pub struct AccelerationStructureBuildFlags: u32 { + const PREFER_FAST_TRACE = 1 << 0; + const PREFER_FAST_BUILD = 1 << 1; + const ALLOW_UPDATE = 1 << 2; + const LOW_MEMORY = 1 << 3; + } +); + #[derive(Clone, Debug)] pub struct TextureCopyBase { pub mip_level: u32, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index c5f75864a9..025eb4cb4b 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -535,6 +535,14 @@ impl PhysicalDeviceFeatures { ), ); + features.set( + F::RAY_TRACING, + caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) + && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()) + && caps.supports_extension(vk::KhrRayQueryFn::name()), + ); + (features, dl_flags) } @@ -623,7 +631,7 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDrawIndirectCountFn::name()); } - if true { + if requested_features.contains(wgt::Features::RAY_TRACING) { extensions.push(vk::KhrDeferredHostOperationsFn::name()); extensions.push(vk::KhrAccelerationStructureFn::name()); extensions.push(vk::KhrBufferDeviceAddressFn::name()); @@ -1165,24 +1173,22 @@ impl super::Adapter { } else { None }; - let acceleration_structure_fn = - if enabled_extensions.contains(&khr::AccelerationStructure::name()) { - Some(khr::AccelerationStructure::new( + let ray_tracing_fns = if enabled_extensions.contains(&khr::AccelerationStructure::name()) + && enabled_extensions.contains(&khr::BufferDeviceAddress::name()) + { + Some(super::RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure::new( &self.instance.raw, &raw_device, - )) - } else { - None - }; - let buffer_device_address_fn = - if enabled_extensions.contains(&khr::BufferDeviceAddress::name()) { - Some(khr::BufferDeviceAddress::new( + ), + buffer_device_address: khr::BufferDeviceAddress::new( &self.instance.raw, &raw_device, - )) - } else { - None - }; + ), + }) + } else { + None + }; let naga_options = { use naga::back::spv; @@ -1275,8 +1281,7 @@ impl super::Adapter { extension_fns: super::DeviceExtensionFunctions { draw_indirect_count: indirect_count_fn, timeline_semaphore: timeline_semaphore_fn, - acceleration_structure: acceleration_structure_fn, - buffer_device_address: buffer_device_address_fn, + ray_tracing: ray_tracing_fns, }, vendor_id: self.phd_capabilities.properties.vendor_id, timestamp_period: self.phd_capabilities.properties.limits.timestamp_period, diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index e3073b0643..a5b35f765e 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -346,29 +346,26 @@ impl crate::CommandEncoder for super::CommandEncoder { geometry: &crate::AccelerationStructureGeometry, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, primitive_offset: u32, destination_acceleration_structure: &super::AccelerationStructure, scratch_buffer: &super::Buffer, ) { - let extension = match self.device.extension_fns.acceleration_structure { - Some(ref extension) => extension, + let ray_tracing_functions = match self.device.extension_fns.ray_tracing { + Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - let bda_extension = match self.device.extension_fns.buffer_device_address { - Some(ref extension) => extension, - None => panic!("Feature `BDA` not enabled"), - }; - let geometry = match geometry { crate::AccelerationStructureGeometry::Instances { buffer } => { let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( vk::DeviceOrHostAddressConstKHR { - device_address: bda_extension.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), - ), + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), }, ); @@ -389,9 +386,12 @@ impl crate::CommandEncoder for super::CommandEncoder { let mut triangles_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: bda_extension.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), - ), + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(vertex_buffer.raw), + ), }) .vertex_format(conv::map_vertex_format(vertex_format)) .vertex_stride(vertex_stride) @@ -401,9 +401,12 @@ impl crate::CommandEncoder for super::CommandEncoder { triangles_data = triangles_data .index_type(conv::map_index_format(indices.format)) .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: bda_extension.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), - ), + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(indices.buffer.raw), + ), }) } @@ -425,24 +428,33 @@ impl crate::CommandEncoder for super::CommandEncoder { .primitive_offset(primitive_offset) .build(); - let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() .ty(conv::map_acceleration_structure_format(format)) .mode(conv::map_acceleration_structure_build_mode(mode)) - .flags(vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE) + .flags(conv::map_acceleration_structure_flags(flags)) .geometries(geometries) .dst_acceleration_structure(destination_acceleration_structure.raw) .scratch_data(vk::DeviceOrHostAddressKHR { - device_address: bda_extension.get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(scratch_buffer.raw), - ), - }) - .build(); + device_address: ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(scratch_buffer.raw), + ), + }); + + if mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = destination_acceleration_structure.raw; + } + + let geometry_info = geometry_info.build(); let range = &[range][..]; let range = &[range][..]; let geometry_info = &[geometry_info]; - extension.cmd_build_acceleration_structures(self.active, geometry_info, range); + ray_tracing_functions + .acceleration_structure + .cmd_build_acceleration_structures(self.active, geometry_info, range); } // render diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index e169b4ad29..7677a2ab56 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -491,14 +491,15 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { if usage.contains(crate::BufferUses::INDIRECT) { flags |= vk::BufferUsageFlags::INDIRECT_BUFFER; } - if usage.contains(crate::BufferUses::BUFFER_DEVICE_ADDRESS) { - flags |= vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + if usage.contains(crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH) { + flags |= vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } if usage.intersects( crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, ) { - flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR; + flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } flags } @@ -554,7 +555,8 @@ pub fn map_buffer_usage_to_barrier( } if usage.intersects( crate::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT - | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + | crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT + | crate::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, ) { stages |= vk::PipelineStageFlags::ACCELERATION_STRUCTURE_BUILD_KHR; access |= vk::AccessFlags::ACCELERATION_STRUCTURE_READ_KHR @@ -865,3 +867,27 @@ pub fn map_acceleration_structure_build_mode( } } } + +pub fn map_acceleration_structure_flags( + flags: crate::AccelerationStructureBuildFlags, +) -> vk::BuildAccelerationStructureFlagsKHR { + let mut vk_flags = vk::BuildAccelerationStructureFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_TRACE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::PREFER_FAST_BUILD) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_BUILD; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_UPDATE) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_UPDATE; + } + + if flags.contains(crate::AccelerationStructureBuildFlags::LOW_MEMORY) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; + } + + vk_flags +} diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 47c20726fc..42b2257c24 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -821,11 +821,11 @@ impl crate::Device for super::Device { geometry_info: &crate::AccelerationStructureGeometryInfo, format: crate::AccelerationStructureFormat, mode: crate::AccelerationStructureBuildMode, - flags: (), + flags: crate::AccelerationStructureBuildFlags, primitive_count: u32, ) -> crate::AccelerationStructureBuildSizes { - let extension = match self.shared.extension_fns.acceleration_structure { - Some(ref extension) => extension, + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; @@ -869,14 +869,16 @@ impl crate::Device for super::Device { let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() .ty(conv::map_acceleration_structure_format(format)) .mode(conv::map_acceleration_structure_build_mode(mode)) - .flags(vk::BuildAccelerationStructureFlagsKHR::PREFER_FAST_TRACE) + .flags(conv::map_acceleration_structure_flags(flags)) .geometries(geometries); - let raw = extension.get_acceleration_structure_build_sizes( - vk::AccelerationStructureBuildTypeKHR::DEVICE, - &geometry_info, - &[primitive_count], - ); + let raw = ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &[primitive_count], + ); crate::AccelerationStructureBuildSizes { acceleration_structure_size: raw.acceleration_structure_size, @@ -889,23 +891,25 @@ impl crate::Device for super::Device { &self, acceleration_structure: &super::AccelerationStructure, ) -> wgt::BufferAddress { - let extension = match self.shared.extension_fns.acceleration_structure { - Some(ref extension) => extension, + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - extension.get_acceleration_structure_device_address( - &vk::AccelerationStructureDeviceAddressInfoKHR::builder() - .acceleration_structure(acceleration_structure.raw), - ) + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) } unsafe fn create_acceleration_structure( &self, desc: &crate::AccelerationStructureDescriptor, ) -> Result { - let extension = match self.shared.extension_fns.acceleration_structure { - Some(ref extension) => extension, + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; @@ -942,7 +946,9 @@ impl crate::Device for super::Device { .size(desc.size) .ty(conv::map_acceleration_structure_format(desc.format)); - let raw_acceleration_structure = extension.create_acceleration_structure(&vk_info, None)?; + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; if let Some(label) = desc.label { self.shared.set_object_name( @@ -970,12 +976,14 @@ impl crate::Device for super::Device { &self, acceleration_structure: super::AccelerationStructure, ) { - let extension = match self.shared.extension_fns.acceleration_structure { - Some(ref extension) => extension, + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - extension.destroy_acceleration_structure(acceleration_structure.raw, None); + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); self.shared .raw .destroy_buffer(acceleration_structure.buffer, None); @@ -1583,11 +1591,15 @@ impl crate::Device for super::Device { .map(|acceleration_structure| acceleration_structure.raw), ); - acceleration_structure_infos.push( - // todo: this dereference to build the struct is a hack to get around lifetime issues. - *vk::WriteDescriptorSetAccelerationStructureKHR::builder() - .acceleration_structures(&raw_acceleration_structures[raw_start..]), - ); + let acceleration_structure_info = + vk::WriteDescriptorSetAccelerationStructureKHR::builder() + .acceleration_structures(&raw_acceleration_structures[raw_start..]); + + // todo: Dereference the struct to get around lifetime issues. Safe as long as we never resize + // `raw_acceleration_structures`. + let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; + + acceleration_structure_infos.push(acceleration_structure_info); extra_descriptor_count += 1; diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 5732fbfb60..c552a6179b 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -148,8 +148,12 @@ enum ExtensionFn { struct DeviceExtensionFunctions { draw_indirect_count: Option, timeline_semaphore: Option>, - acceleration_structure: Option, - buffer_device_address: Option, + ray_tracing: Option, +} + +struct RayTracingDeviceExtensionFunctions { + acceleration_structure: khr::AccelerationStructure, + buffer_device_address: khr::BufferDeviceAddress, } /// Set of internal capabilities, which don't show up in the exposed diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index ab6e9a944d..b33702e6ec 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -638,6 +638,14 @@ bitflags::bitflags! { /// - DX12 /// - Metal (Intel and AMD GPUs) const WRITE_TIMESTAMP_INSIDE_PASSES = 1 << 41; + + /// Allows for the creation of ray-tracing acceleration structures and ray queries within shaders. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_TRACING = 1 << 42; } } @@ -4041,6 +4049,13 @@ pub enum BindingType { view_dimension: TextureViewDimension, }, + /// A ray-tracing acceleration structure binding. + /// + /// Example GLSL syntax: + /// ```cpp,ignore + /// layout(binding = 0) + /// uniform accelerationStructureEXT as; + /// ``` AccelerationStructure, } From cd7c80755b259ea201166bf7915b247192071c5a Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Thu, 15 Sep 2022 12:09:55 +0200 Subject: [PATCH 06/33] Fix trait implementaiton on gles --- wgpu-hal/src/empty.rs | 46 ++++++++++++++++++------------------ wgpu-hal/src/gles/command.rs | 14 +++++++++++ wgpu-hal/src/gles/device.rs | 31 +++++++++++++++++++----- wgpu-hal/src/lib.rs | 4 ++-- 4 files changed, 64 insertions(+), 31 deletions(-) diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 6448157a18..54e791b680 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -119,29 +119,6 @@ impl crate::Device for Context { unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { Ok(Resource) } - unsafe fn create_acceleration_structure( - &self, - desc: &crate::AccelerationStructureDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn get_acceleration_structure_build_sizes( - &self, - geometry_info: &crate::AccelerationStructureGeometryInfo, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, - primitive_count: u32, - ) -> crate::AccelerationStructureBuildSizes { - Default::default() - } - unsafe fn get_acceleration_structure_device_address( - &self, - _acceleration_structure: &Resource, - ) -> wgt::BufferAddress { - Default::default() - } - unsafe fn destroy_acceleration_structure(&self, buffer: Resource) {} unsafe fn destroy_buffer(&self, buffer: Resource) {} unsafe fn map_buffer( &self, @@ -253,6 +230,29 @@ impl crate::Device for Context { false } unsafe fn stop_capture(&self) {} + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + geometry_info: &crate::AccelerationStructureGeometryInfo, + format: crate::AccelerationStructureFormat, + mode: crate::AccelerationStructureBuildMode, + flags: crate::AccelerationStructureBuildFlags, + primitive_count: u32, + ) -> crate::AccelerationStructureBuildSizes { + Default::default() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &Resource, + ) -> wgt::BufferAddress { + Default::default() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {} } impl crate::CommandEncoder for Encoder { diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index beaf600e6e..a7ab8bef04 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1000,4 +1000,18 @@ impl crate::CommandEncoder for super::CommandEncoder { indirect_offset: offset, }); } + + unsafe fn build_acceleration_structures( + &mut self, + _geometry: &crate::AccelerationStructureGeometry, + _format: crate::AccelerationStructureFormat, + _mode: crate::AccelerationStructureBuildMode, + _flags: crate::AccelerationStructureBuildFlags, + _primitive_count: u32, + _primitive_offset: u32, + _destination_acceleration_structure: &(), + _scratch_buffer: &super::Buffer, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index b8fd7d3842..c765d95cca 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -455,12 +455,6 @@ impl crate::Device for super::Device { data, }) } - unsafe fn create_acceleration_structure( - &self, - _desc: &crate::AccelerationStructureDescriptor, - ) -> Result<(), crate::DeviceError> { - unimplemented!() - } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { if let Some(raw) = buffer.raw { let gl = &self.shared.context.lock(); @@ -867,6 +861,7 @@ impl crate::Device for super::Device { ty: wgt::BufferBindingType::Storage { .. }, .. } => &mut num_storage_buffers, + wgt::BindingType::AccelerationStructure => unimplemented!(), }; binding_to_slot[entry.binding as usize] = *counter; @@ -947,6 +942,7 @@ impl crate::Device for super::Device { format: format_desc.internal, }) } + wgt::BindingType::AccelerationStructure => unimplemented!(), }; contents.push(binding); } @@ -1167,6 +1163,29 @@ impl crate::Device for super::Device { self.render_doc .end_frame_capture(ptr::null_mut(), ptr::null_mut()) } + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result<(), crate::DeviceError> { + unimplemented!() + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + _geometry_info: &crate::AccelerationStructureGeometryInfo, + _format: crate::AccelerationStructureFormat, + _mode: crate::AccelerationStructureBuildMode, + _flags: crate::AccelerationStructureBuildFlags, + _primitive_count: u32, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &(), + ) -> wgt::BufferAddress { + unimplemented!() + } + unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {} } // SAFE: WASM doesn't have threads diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index a0c4597254..ea953cefb2 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -249,7 +249,7 @@ pub trait Device: Send + Sync { geometry_info: &AccelerationStructureGeometryInfo, format: AccelerationStructureFormat, mode: AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, + flags: AccelerationStructureBuildFlags, primitive_count: u32, ) -> AccelerationStructureBuildSizes; @@ -553,7 +553,7 @@ pub trait CommandEncoder: Send + Sync { geometry: &AccelerationStructureGeometry, format: AccelerationStructureFormat, mode: AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, + flags: AccelerationStructureBuildFlags, primitive_count: u32, primitive_offset: u32, destination_acceleration_structure: &A::AccelerationStructure, From 4ef32a4dce94151fbe6a2aab27b46e82f123aac3 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Thu, 15 Sep 2022 19:47:39 +0200 Subject: [PATCH 07/33] Put larger function call param lists into descriptors for easier dummy implementation, run cargo clippy, fix wgpu/wgpu build --- wgpu-core/src/binding_model.rs | 1 + wgpu-core/src/device/mod.rs | 2 + wgpu-hal/examples/ray-traced-triangle/main.rs | 95 ++++++----- wgpu-hal/src/empty.rs | 15 +- wgpu-hal/src/gles/command.rs | 9 +- wgpu-hal/src/gles/device.rs | 6 +- wgpu-hal/src/lib.rs | 159 +++++++++--------- wgpu-hal/src/vulkan/command.rs | 33 ++-- wgpu-hal/src/vulkan/device.rs | 18 +- 9 files changed, 161 insertions(+), 177 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 71f95a723d..7ef9acac2a 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -328,6 +328,7 @@ impl BindingTypeMaxCountValidator { wgt::BindingType::StorageTexture { .. } => { self.storage_textures.add(binding.visibility, count); } + wgt::BindingType::AccelerationStructure => todo!(), } } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a5c5cbe51c..68d8b8fa59 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -1504,6 +1504,7 @@ impl Device { }, ) } + Bt::AccelerationStructure => todo!(), }; // Validate the count parameter @@ -1977,6 +1978,7 @@ impl Device { buffers: &hal_buffers, samplers: &hal_samplers, textures: &hal_textures, + acceleration_structures: &[], }; let raw = unsafe { self.raw diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index a4ae25e41c..fa749de870 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -309,15 +309,17 @@ impl Example { let blas_sizes = unsafe { device.get_acceleration_structure_build_sizes( - &hal::AccelerationStructureGeometryInfo::Triangles { - vertex_format: wgt::VertexFormat::Float32x3, - max_vertex: 3, - index_format: Some(wgt::IndexFormat::Uint32), + &hal::GetAccelerationStructureBuildSizesDescriptor { + geometry_info: hal::AccelerationStructureGeometryInfo::Triangles { + vertex_format: wgt::VertexFormat::Float32x3, + max_vertex: 3, + index_format: Some(wgt::IndexFormat::Uint32), + }, + format: hal::AccelerationStructureFormat::BottomLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + primitive_count: 1, }, - hal::AccelerationStructureFormat::BottomLevel, - hal::AccelerationStructureBuildMode::Build, - hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, - 1, ) }; @@ -326,11 +328,13 @@ impl Example { let tlas_sizes = unsafe { device.get_acceleration_structure_build_sizes( - &hal::AccelerationStructureGeometryInfo::Instances, - hal::AccelerationStructureFormat::TopLevel, - hal::AccelerationStructureBuildMode::Build, - tlas_flags, - 1, + &hal::GetAccelerationStructureBuildSizesDescriptor { + geometry_info: hal::AccelerationStructureGeometryInfo::Instances, + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + primitive_count: 1, + }, ) }; @@ -530,8 +534,8 @@ impl Example { unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; unsafe { - cmd_encoder.build_acceleration_structures( - &hal::AccelerationStructureGeometry::Triangles { + cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Triangles { vertex_buffer: &vertices_buffer, vertex_format: wgt::VertexFormat::Float32x3, max_vertex: vertices.len() as u32, @@ -541,14 +545,14 @@ impl Example { format: wgt::IndexFormat::Uint32, }), }, - hal::AccelerationStructureFormat::BottomLevel, - hal::AccelerationStructureBuildMode::Build, - hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, - indices.len() as u32 / 3, - 0, - &blas, - &scratch_buffer, - ); + format: hal::AccelerationStructureFormat::BottomLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + primitive_count: indices.len() as u32 / 3, + primitive_offset: 0, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + }); let as_barrier = hal::BufferBarrier { buffer: &scratch_buffer, @@ -557,18 +561,18 @@ impl Example { }; cmd_encoder.transition_buffers(iter::once(as_barrier)); - cmd_encoder.build_acceleration_structures( - &hal::AccelerationStructureGeometry::Instances { + cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Instances { buffer: &instances_buffer, }, - hal::AccelerationStructureFormat::TopLevel, - hal::AccelerationStructureBuildMode::Build, - tlas_flags, - instances.len() as u32, - 0, - &tlas, - &scratch_buffer, - ); + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + primitive_count: instances.len() as u32, + primitive_offset: 0, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + }); let texture_barrier = hal::TextureBarrier { texture: &texture, @@ -677,18 +681,19 @@ impl Example { unsafe { ctx.encoder.begin_encoding(Some("frame")).unwrap(); - ctx.encoder.build_acceleration_structures( - &hal::AccelerationStructureGeometry::Instances { - buffer: &self.instances_buffer, - }, - hal::AccelerationStructureFormat::TopLevel, - hal::AccelerationStructureBuildMode::Build, - tlas_flags, - self.instances.len() as u32, - 0, - &self.tlas, - &self.scratch_buffer, - ); + ctx.encoder + .build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + geometry: &hal::AccelerationStructureGeometry::Instances { + buffer: &self.instances_buffer, + }, + format: hal::AccelerationStructureFormat::TopLevel, + mode: hal::AccelerationStructureBuildMode::Update, + flags: tlas_flags, + primitive_count: self.instances.len() as u32, + primitive_offset: 0, + destination_acceleration_structure: &self.tlas, + scratch_buffer: &self.scratch_buffer, + }); let as_barrier = hal::BufferBarrier { buffer: &self.scratch_buffer, diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 54e791b680..aff9640a79 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -238,11 +238,7 @@ impl crate::Device for Context { } unsafe fn get_acceleration_structure_build_sizes( &self, - geometry_info: &crate::AccelerationStructureGeometryInfo, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, - primitive_count: u32, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { Default::default() } @@ -418,14 +414,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn build_acceleration_structures( &mut self, - geometry: &crate::AccelerationStructureGeometry, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, - primitive_count: u32, - primitive_offset: u32, - destination_acceleration_structure: &Resource, - scratch_buffer: &Resource, + _desc: &crate::BuildAccelerationStructureDescriptor, ) { } } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index a7ab8bef04..e61e70357b 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1003,14 +1003,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn build_acceleration_structures( &mut self, - _geometry: &crate::AccelerationStructureGeometry, - _format: crate::AccelerationStructureFormat, - _mode: crate::AccelerationStructureBuildMode, - _flags: crate::AccelerationStructureBuildFlags, - _primitive_count: u32, - _primitive_offset: u32, - _destination_acceleration_structure: &(), - _scratch_buffer: &super::Buffer, + _desc: &crate::BuildAccelerationStructureDescriptor, ) { unimplemented!() } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index c765d95cca..1858825463 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1171,11 +1171,7 @@ impl crate::Device for super::Device { } unsafe fn get_acceleration_structure_build_sizes( &self, - _geometry_info: &crate::AccelerationStructureGeometryInfo, - _format: crate::AccelerationStructureFormat, - _mode: crate::AccelerationStructureBuildMode, - _flags: crate::AccelerationStructureBuildFlags, - _primitive_count: u32, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { unimplemented!() } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index ea953cefb2..019c6b4b9f 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -246,11 +246,7 @@ pub trait Device: Send + Sync { unsafe fn get_acceleration_structure_build_sizes( &self, - geometry_info: &AccelerationStructureGeometryInfo, - format: AccelerationStructureFormat, - mode: AccelerationStructureBuildMode, - flags: AccelerationStructureBuildFlags, - primitive_count: u32, + desc: &GetAccelerationStructureBuildSizesDescriptor, ) -> AccelerationStructureBuildSizes; unsafe fn get_acceleration_structure_device_address( @@ -550,14 +546,7 @@ pub trait CommandEncoder: Send + Sync { unsafe fn build_acceleration_structures( &mut self, - geometry: &AccelerationStructureGeometry, - format: AccelerationStructureFormat, - mode: AccelerationStructureBuildMode, - flags: AccelerationStructureBuildFlags, - primitive_count: u32, - primitive_offset: u32, - destination_acceleration_structure: &A::AccelerationStructure, - scratch_buffer: &A::Buffer, + desc: &BuildAccelerationStructureDescriptor, ); } @@ -849,32 +838,6 @@ pub struct BufferDescriptor<'a> { pub memory_flags: MemoryFlags, } -#[derive(Clone, Debug)] -pub struct AccelerationStructureDescriptor<'a> { - pub label: Label<'a>, - pub size: wgt::BufferAddress, - pub format: AccelerationStructureFormat, -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum AccelerationStructureFormat { - TopLevel, - BottomLevel, -} - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum AccelerationStructureBuildMode { - Build, - Update, -} - -#[derive(Clone, Debug, Default)] -pub struct AccelerationStructureBuildSizes { - pub acceleration_structure_size: wgt::BufferAddress, - pub update_scratch_size: wgt::BufferAddress, - pub build_scratch_size: wgt::BufferAddress, -} - #[derive(Clone, Debug)] pub struct TextureDescriptor<'a> { pub label: Label<'a>, @@ -1163,42 +1126,6 @@ pub struct BufferCopy { pub size: wgt::BufferSize, } -pub enum AccelerationStructureGeometryInfo { - Triangles { - vertex_format: wgt::VertexFormat, - max_vertex: u32, - index_format: Option, - }, - Instances, -} - -pub enum AccelerationStructureGeometry<'a, A: Api> { - Triangles { - vertex_buffer: &'a A::Buffer, - vertex_format: wgt::VertexFormat, - max_vertex: u32, - vertex_stride: wgt::BufferAddress, - indices: Option>, - }, - Instances { - buffer: &'a A::Buffer, - }, -} - -pub struct AccelerationStructureGeometryIndices<'a, A: Api> { - pub format: wgt::IndexFormat, - pub buffer: &'a A::Buffer, -} - -bitflags!( - pub struct AccelerationStructureBuildFlags: u32 { - const PREFER_FAST_TRACE = 1 << 0; - const PREFER_FAST_BUILD = 1 << 1; - const ALLOW_UPDATE = 1 << 2; - const LOW_MEMORY = 1 << 3; - } -); - #[derive(Clone, Debug)] pub struct TextureCopyBase { pub mip_level: u32, @@ -1329,3 +1256,85 @@ fn test_default_limits() { let limits = wgt::Limits::default(); assert!(limits.max_bind_groups <= MAX_BIND_GROUPS as u32); } + +#[derive(Clone, Debug)] +pub struct AccelerationStructureDescriptor<'a> { + pub label: Label<'a>, + pub size: wgt::BufferAddress, + pub format: AccelerationStructureFormat, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureFormat { + TopLevel, + BottomLevel, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum AccelerationStructureBuildMode { + Build, + Update, +} + +#[derive(Clone, Debug, Default)] +pub struct AccelerationStructureBuildSizes { + pub acceleration_structure_size: wgt::BufferAddress, + pub update_scratch_size: wgt::BufferAddress, + pub build_scratch_size: wgt::BufferAddress, +} + +pub struct GetAccelerationStructureBuildSizesDescriptor { + pub geometry_info: AccelerationStructureGeometryInfo, + pub format: AccelerationStructureFormat, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub primitive_count: u32, +} + +#[derive(Clone, Copy)] +pub enum AccelerationStructureGeometryInfo { + Triangles { + vertex_format: wgt::VertexFormat, + max_vertex: u32, + index_format: Option, + }, + Instances, +} + +pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { + pub geometry: &'a AccelerationStructureGeometry<'a, A>, + pub format: AccelerationStructureFormat, + pub mode: AccelerationStructureBuildMode, + pub flags: AccelerationStructureBuildFlags, + pub primitive_count: u32, + pub primitive_offset: u32, + pub destination_acceleration_structure: &'a A::AccelerationStructure, + pub scratch_buffer: &'a A::Buffer, +} + +pub enum AccelerationStructureGeometry<'a, A: Api> { + Triangles { + vertex_buffer: &'a A::Buffer, + vertex_format: wgt::VertexFormat, + max_vertex: u32, + vertex_stride: wgt::BufferAddress, + indices: Option>, + }, + Instances { + buffer: &'a A::Buffer, + }, +} + +pub struct AccelerationStructureGeometryIndices<'a, A: Api> { + pub format: wgt::IndexFormat, + pub buffer: &'a A::Buffer, +} + +bitflags!( + pub struct AccelerationStructureBuildFlags: u32 { + const PREFER_FAST_TRACE = 1 << 0; + const PREFER_FAST_BUILD = 1 << 1; + const ALLOW_UPDATE = 1 << 2; + const LOW_MEMORY = 1 << 3; + } +); diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index a5b35f765e..02768f3c74 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -343,21 +343,14 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn build_acceleration_structures( &mut self, - geometry: &crate::AccelerationStructureGeometry, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, - primitive_count: u32, - primitive_offset: u32, - destination_acceleration_structure: &super::AccelerationStructure, - scratch_buffer: &super::Buffer, + desc: &crate::BuildAccelerationStructureDescriptor, ) { let ray_tracing_functions = match self.device.extension_fns.ray_tracing { Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - let geometry = match geometry { + let geometry = match *desc.geometry { crate::AccelerationStructureGeometry::Instances { buffer } => { let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( vk::DeviceOrHostAddressConstKHR { @@ -376,7 +369,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }) .flags(vk::GeometryFlagsKHR::empty()) } - &crate::AccelerationStructureGeometry::Triangles { + crate::AccelerationStructureGeometry::Triangles { vertex_buffer, vertex_format, max_vertex, @@ -397,7 +390,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .vertex_stride(vertex_stride) .max_vertex(max_vertex); - if let Some(indices) = indices { + if let Some(ref indices) = *indices { triangles_data = triangles_data .index_type(conv::map_index_format(indices.format)) .index_data(vk::DeviceOrHostAddressConstKHR { @@ -424,26 +417,26 @@ impl crate::CommandEncoder for super::CommandEncoder { let geometries = &[*geometry]; let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(primitive_count) - .primitive_offset(primitive_offset) + .primitive_count(desc.primitive_count) + .primitive_offset(desc.primitive_offset) .build(); let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(conv::map_acceleration_structure_format(format)) - .mode(conv::map_acceleration_structure_build_mode(mode)) - .flags(conv::map_acceleration_structure_flags(flags)) + .ty(conv::map_acceleration_structure_format(desc.format)) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) .geometries(geometries) - .dst_acceleration_structure(destination_acceleration_structure.raw) + .dst_acceleration_structure(desc.destination_acceleration_structure.raw) .scratch_data(vk::DeviceOrHostAddressKHR { device_address: ray_tracing_functions .buffer_device_address .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(scratch_buffer.raw), + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), ), }); - if mode == crate::AccelerationStructureBuildMode::Update { - geometry_info.src_acceleration_structure = destination_acceleration_structure.raw; + if desc.mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = desc.destination_acceleration_structure.raw; } let geometry_info = geometry_info.build(); diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 42b2257c24..83c9e6ac58 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -818,18 +818,14 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes( &self, - geometry_info: &crate::AccelerationStructureGeometryInfo, - format: crate::AccelerationStructureFormat, - mode: crate::AccelerationStructureBuildMode, - flags: crate::AccelerationStructureBuildFlags, - primitive_count: u32, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - let geometry = match geometry_info { + let geometry = match desc.geometry_info { crate::AccelerationStructureGeometryInfo::Instances => { let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); @@ -840,7 +836,7 @@ impl crate::Device for super::Device { }) .flags(vk::GeometryFlagsKHR::empty()) } - &crate::AccelerationStructureGeometryInfo::Triangles { + crate::AccelerationStructureGeometryInfo::Triangles { vertex_format, max_vertex, index_format, @@ -867,9 +863,9 @@ impl crate::Device for super::Device { let geometries = &[*geometry]; let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(conv::map_acceleration_structure_format(format)) - .mode(conv::map_acceleration_structure_build_mode(mode)) - .flags(conv::map_acceleration_structure_flags(flags)) + .ty(conv::map_acceleration_structure_format(desc.format)) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) .geometries(geometries); let raw = ray_tracing_functions @@ -877,7 +873,7 @@ impl crate::Device for super::Device { .get_acceleration_structure_build_sizes( vk::AccelerationStructureBuildTypeKHR::DEVICE, &geometry_info, - &[primitive_count], + &[desc.primitive_count], ); crate::AccelerationStructureBuildSizes { From 8318b5f77a538d6db692b2e1f760ad154b6f37bf Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Thu, 15 Sep 2022 19:55:53 +0200 Subject: [PATCH 08/33] Fix wasm build --- wgpu/src/backend/web.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index f816125148..6b1a8cfd37 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -1443,7 +1443,8 @@ impl crate::Context for Context { storage_texture.access(mapped_access); storage_texture.view_dimension(map_texture_view_dimension(view_dimension)); mapped_entry.storage_texture(&storage_texture); - } + }, + wgt::BindingType::AccelerationStructure => todo!(), } mapped_entry From fecb5382e7e50e451f1b4e10459992d436d4324f Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Thu, 15 Sep 2022 20:48:43 +0200 Subject: [PATCH 09/33] Shuffle some code around, add a dummy implementation for DX12 with some links to implementation resources. --- wgpu-hal/src/dx11/command.rs | 7 + wgpu-hal/src/dx11/device.rs | 25 +++ wgpu-hal/src/dx11/mod.rs | 4 + wgpu-hal/src/dx12/command.rs | 9 + wgpu-hal/src/dx12/conv.rs | 1 + wgpu-hal/src/dx12/device.rs | 36 ++++ wgpu-hal/src/dx12/mod.rs | 5 + wgpu-hal/src/lib.rs | 35 ++-- wgpu-hal/src/vulkan/device.rs | 331 +++++++++++++++++----------------- wgpu/src/backend/web.rs | 2 +- 10 files changed, 270 insertions(+), 185 deletions(-) diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 1c73f3c325..3ec95d0c33 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -265,4 +265,11 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { todo!() } + + unsafe fn build_acceleration_structures( + &mut self, + desc: &crate::BuildAccelerationStructureDescriptor, + ) { + todo!() + } } diff --git a/wgpu-hal/src/dx11/device.rs b/wgpu-hal/src/dx11/device.rs index 7b095ba1df..ee73329412 100644 --- a/wgpu-hal/src/dx11/device.rs +++ b/wgpu-hal/src/dx11/device.rs @@ -200,6 +200,31 @@ impl crate::Device for super::Device { unsafe fn stop_capture(&self) { todo!() } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + todo!() + } + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + todo!() + } + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + todo!() + } + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + todo!() + } } impl crate::Queue for super::Queue { diff --git a/wgpu-hal/src/dx11/mod.rs b/wgpu-hal/src/dx11/mod.rs index a77bb95919..e67feb3fad 100644 --- a/wgpu-hal/src/dx11/mod.rs +++ b/wgpu-hal/src/dx11/mod.rs @@ -36,6 +36,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } pub struct Instance { @@ -106,6 +108,8 @@ pub struct BindGroup {} pub struct PipelineLayout {} #[derive(Debug)] pub struct ShaderModule {} +#[derive(Debug)] +pub struct AccelerationStructure {} pub struct RenderPipeline {} pub struct ComputePipeline {} diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ca2f036430..4f843146f8 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1019,4 +1019,13 @@ impl crate::CommandEncoder for super::CommandEncoder { 0, ); } + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + // Implement using `BuildRaytracingAccelerationStructure`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure + todo!() + } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 4114fba002..bbe14cefff 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -107,6 +107,7 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { .. } | Bt::StorageTexture { .. } => native::DescriptorRangeType::UAV, + Bt::AccelerationStructure => todo!(), } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index de81b4e1bd..106f815711 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -706,6 +706,7 @@ impl crate::Device for super::Device { num_texture_views += count } wgt::BindingType::Sampler { .. } => num_samplers += count, + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1189,6 +1190,7 @@ impl crate::Device for super::Device { cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); } } + wgt::BindingType::AccelerationStructure => todo!(), } } @@ -1567,4 +1569,38 @@ impl crate::Device for super::Device { self.render_doc .end_frame_capture(self.raw.as_mut_ptr() as *mut _, ptr::null_mut()) } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: + // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo + todo!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + // Implement using `GetGPUVirtualAddress`: + // https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12resource-getgpuvirtualaddress + todo!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + // Create a D3D12 resource as per-usual. + todo!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + // Destroy a D3D12 resource as per-usual. + todo!() + } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 61d2ad9576..195fd429fe 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -79,6 +79,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. @@ -524,6 +526,9 @@ pub struct ComputePipeline { unsafe impl Send for ComputePipeline {} unsafe impl Sync for ComputePipeline {} +#[derive(Debug)] +pub struct AccelerationStructure {} + impl SwapChain { unsafe fn release_resources(self) -> native::WeakPtr { for resource in self.resources { diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 019c6b4b9f..c91560cca9 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -238,24 +238,6 @@ pub trait Device: Send + Sync { /// /// The initial usage is `BufferUses::empty()`. unsafe fn create_buffer(&self, desc: &BufferDescriptor) -> Result; - - unsafe fn create_acceleration_structure( - &self, - desc: &AccelerationStructureDescriptor, - ) -> Result; - - unsafe fn get_acceleration_structure_build_sizes( - &self, - desc: &GetAccelerationStructureBuildSizesDescriptor, - ) -> AccelerationStructureBuildSizes; - - unsafe fn get_acceleration_structure_device_address( - &self, - acceleration_structure: &A::AccelerationStructure, - ) -> wgt::BufferAddress; - - unsafe fn destroy_acceleration_structure(&self, buffer: A::AccelerationStructure); - unsafe fn destroy_buffer(&self, buffer: A::Buffer); //TODO: clarify if zero-sized mapping is allowed unsafe fn map_buffer( @@ -343,6 +325,23 @@ pub trait Device: Send + Sync { unsafe fn start_capture(&self) -> bool; unsafe fn stop_capture(&self); + + unsafe fn create_acceleration_structure( + &self, + desc: &AccelerationStructureDescriptor, + ) -> Result; + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &GetAccelerationStructureBuildSizesDescriptor, + ) -> AccelerationStructureBuildSizes; + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &A::AccelerationStructure, + ) -> wgt::BufferAddress; + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: A::AccelerationStructure, + ); } pub trait Queue: Send + Sync { diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 83c9e6ac58..afff881b18 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -815,152 +815,6 @@ impl crate::Device for super::Device { block: Mutex::new(block), }) } - - unsafe fn get_acceleration_structure_build_sizes( - &self, - desc: &crate::GetAccelerationStructureBuildSizesDescriptor, - ) -> crate::AccelerationStructureBuildSizes { - let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { - Some(ref functions) => functions, - None => panic!("Feature `RAY_TRACING` not enabled"), - }; - - let geometry = match desc.geometry_info { - crate::AccelerationStructureGeometryInfo::Instances => { - let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); - - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::INSTANCES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instances_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) - } - crate::AccelerationStructureGeometryInfo::Triangles { - vertex_format, - max_vertex, - index_format, - } => { - let mut triangles_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_format(conv::map_vertex_format(vertex_format)) - .max_vertex(max_vertex); - - if let Some(index_format) = index_format { - triangles_data = - triangles_data.index_type(conv::map_index_format(index_format)); - } - - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: *triangles_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) - } - }; - - let geometries = &[*geometry]; - - let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(conv::map_acceleration_structure_format(desc.format)) - .mode(conv::map_acceleration_structure_build_mode(desc.mode)) - .flags(conv::map_acceleration_structure_flags(desc.flags)) - .geometries(geometries); - - let raw = ray_tracing_functions - .acceleration_structure - .get_acceleration_structure_build_sizes( - vk::AccelerationStructureBuildTypeKHR::DEVICE, - &geometry_info, - &[desc.primitive_count], - ); - - crate::AccelerationStructureBuildSizes { - acceleration_structure_size: raw.acceleration_structure_size, - update_scratch_size: raw.update_scratch_size, - build_scratch_size: raw.build_scratch_size, - } - } - - unsafe fn get_acceleration_structure_device_address( - &self, - acceleration_structure: &super::AccelerationStructure, - ) -> wgt::BufferAddress { - let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { - Some(ref functions) => functions, - None => panic!("Feature `RAY_TRACING` not enabled"), - }; - - ray_tracing_functions - .acceleration_structure - .get_acceleration_structure_device_address( - &vk::AccelerationStructureDeviceAddressInfoKHR::builder() - .acceleration_structure(acceleration_structure.raw), - ) - } - - unsafe fn create_acceleration_structure( - &self, - desc: &crate::AccelerationStructureDescriptor, - ) -> Result { - let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { - Some(ref functions) => functions, - None => panic!("Feature `RAY_TRACING` not enabled"), - }; - - let vk_buffer_info = vk::BufferCreateInfo::builder() - .size(desc.size) - .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) - .sharing_mode(vk::SharingMode::EXCLUSIVE); - - let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; - let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); - - let block = self.mem_allocator.lock().alloc( - &*self.shared, - gpu_alloc::Request { - size: req.size, - align_mask: req.alignment - 1, - usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, - memory_types: req.memory_type_bits & self.valid_ash_memory_types, - }, - )?; - - self.shared - .raw - .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; - - if let Some(label) = desc.label { - self.shared - .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); - } - - let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() - .buffer(raw_buffer) - .offset(0) - .size(desc.size) - .ty(conv::map_acceleration_structure_format(desc.format)); - - let raw_acceleration_structure = ray_tracing_functions - .acceleration_structure - .create_acceleration_structure(&vk_info, None)?; - - if let Some(label) = desc.label { - self.shared.set_object_name( - vk::ObjectType::ACCELERATION_STRUCTURE_KHR, - raw_acceleration_structure, - label, - ); - } - - Ok(super::AccelerationStructure { - raw: raw_acceleration_structure, - buffer: raw_buffer, - block: Mutex::new(block), - }) - } - unsafe fn destroy_buffer(&self, buffer: super::Buffer) { self.shared.raw.destroy_buffer(buffer.raw, None); self.mem_allocator @@ -968,26 +822,6 @@ impl crate::Device for super::Device { .dealloc(&*self.shared, buffer.block.into_inner()); } - unsafe fn destroy_acceleration_structure( - &self, - acceleration_structure: super::AccelerationStructure, - ) { - let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { - Some(ref functions) => functions, - None => panic!("Feature `RAY_TRACING` not enabled"), - }; - - ray_tracing_functions - .acceleration_structure - .destroy_acceleration_structure(acceleration_structure.raw, None); - self.shared - .raw - .destroy_buffer(acceleration_structure.buffer, None); - self.mem_allocator - .lock() - .dealloc(&*self.shared, acceleration_structure.block.into_inner()); - } - unsafe fn map_buffer( &self, buffer: &super::Buffer, @@ -2117,6 +1951,171 @@ impl crate::Device for super::Device { .end_frame_capture(raw_vk_instance_dispatch_table, ptr::null_mut()) } } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let geometry = match desc.geometry_info { + crate::AccelerationStructureGeometryInfo::Instances => { + let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instances_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + crate::AccelerationStructureGeometryInfo::Triangles { + vertex_format, + max_vertex, + index_format, + } => { + let mut triangles_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(vertex_format)) + .max_vertex(max_vertex); + + if let Some(index_format) = index_format { + triangles_data = + triangles_data.index_type(conv::map_index_format(index_format)); + } + + vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangles_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + } + }; + + let geometries = &[*geometry]; + + let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(conv::map_acceleration_structure_format(desc.format)) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(geometries); + + let raw = ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &[desc.primitive_count], + ); + + crate::AccelerationStructureBuildSizes { + acceleration_structure_size: raw.acceleration_structure_size, + update_scratch_size: raw.update_scratch_size, + build_scratch_size: raw.build_scratch_size, + } + } + + unsafe fn get_acceleration_structure_device_address( + &self, + acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) + } + + unsafe fn create_acceleration_structure( + &self, + desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let vk_buffer_info = vk::BufferCreateInfo::builder() + .size(desc.size) + .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; + + self.shared + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } + + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); + + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; + + if let Some(label) = desc.label { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); + } + + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) + } + + unsafe fn destroy_acceleration_structure( + &self, + acceleration_structure: super::AccelerationStructure, + ) { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } } impl From for crate::DeviceError { diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index 6b1a8cfd37..5cb9057208 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -1443,7 +1443,7 @@ impl crate::Context for Context { storage_texture.access(mapped_access); storage_texture.view_dimension(map_texture_view_dimension(view_dimension)); mapped_entry.storage_texture(&storage_texture); - }, + } wgt::BindingType::AccelerationStructure => todo!(), } From 7905dc582f90ece798f069fe481cfa581411cf93 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Fri, 16 Sep 2022 22:28:09 +0200 Subject: [PATCH 10/33] Add dummy implementation for metal --- wgpu-hal/src/metal/command.rs | 7 +++++++ wgpu-hal/src/metal/device.rs | 28 ++++++++++++++++++++++++++++ wgpu-hal/src/metal/mod.rs | 5 +++++ 3 files changed, 40 insertions(+) diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 49337ee7ea..6048aca0f4 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -962,4 +962,11 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.compute.as_ref().unwrap(); encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); } + + unsafe fn build_acceleration_structures( + &mut self, + _desc: &crate::BuildAccelerationStructureDescriptor, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 81b9461f87..7064068acb 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1123,4 +1123,32 @@ impl crate::Device for super::Device { } shared_capture_manager.stop_capture(); } + + unsafe fn get_acceleration_structure_build_sizes( + &self, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + ) -> crate::AccelerationStructureBuildSizes { + unimplemented!() + } + + unsafe fn get_acceleration_structure_device_address( + &self, + _acceleration_structure: &super::AccelerationStructure, + ) -> wgt::BufferAddress { + unimplemented!() + } + + unsafe fn create_acceleration_structure( + &self, + _desc: &crate::AccelerationStructureDescriptor, + ) -> Result { + unimplemented!() + } + + unsafe fn destroy_acceleration_structure( + &self, + _acceleration_structure: super::AccelerationStructure, + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index dee9467e74..42466fc3c8 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -59,6 +59,8 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + + type AccelerationStructure = AccelerationStructure; } pub struct Instance { @@ -733,3 +735,6 @@ pub struct CommandBuffer { unsafe impl Send for CommandBuffer {} unsafe impl Sync for CommandBuffer {} + +#[derive(Debug)] +pub struct AccelerationStructure; From 6f2b07a00c3e462da85a6c15fe88adcc45b30ae4 Mon Sep 17 00:00:00 2001 From: Ashley Ruglys Date: Sun, 18 Sep 2022 15:08:19 +0200 Subject: [PATCH 11/33] Fix example, hopefully have fixed metal code --- wgpu-hal/examples/ray-traced-triangle/main.rs | 7 ++++++- wgpu-hal/src/metal/device.rs | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index fa749de870..49fb58686b 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -3,6 +3,7 @@ extern crate wgpu_hal as hal; use hal::{ Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, }; +use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; use glam::{Mat4, Vec3}; use std::{ @@ -101,7 +102,11 @@ impl Example { }, }; let instance = unsafe { A::Instance::init(&instance_desc)? }; - let mut surface = unsafe { instance.create_surface(window).unwrap() }; + let mut surface = unsafe { + instance + .create_surface(window.raw_display_handle(), window.raw_window_handle()) + .unwrap() + }; let (adapter, features) = unsafe { let mut adapters = instance.enumerate_adapters(); diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 81b9461f87..da7ecd4830 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -595,6 +595,7 @@ impl crate::Device for super::Device { wgt::StorageTextureAccess::ReadWrite => true, }; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } let br = naga::ResourceBinding { @@ -755,6 +756,7 @@ impl crate::Device for super::Device { ); counter.textures += size; } + wgt::BindingType::AccelerationStructure => unimplemented!(), } } } From 3828af7765f42f288befe9ec7a354e6f97f5af1b Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Fri, 3 Feb 2023 04:42:34 +0100 Subject: [PATCH 12/33] Changed surface_format in example. --- wgpu-hal/examples/ray-traced-triangle/main.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 49fb58686b..dacd484eaa 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -125,13 +125,19 @@ impl Example { unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; let window_size: (u32, u32) = window.inner_size().into(); + dbg!(&surface_caps.formats); + let surface_format = if surface_caps.formats.contains(&wgt::TextureFormat::Rgba8Snorm) { + wgt::TextureFormat::Rgba8Unorm + }else{ + *surface_caps.formats.first().unwrap() + }; let surface_config = hal::SurfaceConfiguration { swap_chain_size: DESIRED_FRAMES .max(*surface_caps.swap_chain_sizes.start()) .min(*surface_caps.swap_chain_sizes.end()), present_mode: wgt::PresentMode::Fifo, composite_alpha_mode: hal::CompositeAlphaMode::Opaque, - format: wgt::TextureFormat::Rgba8Unorm, + format: surface_format, extent: wgt::Extent3d { width: window_size.0, height: window_size.1, From e2de5a3f0c26eb6bec485e13099386d1f0dd4371 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Fri, 3 Feb 2023 04:45:08 +0100 Subject: [PATCH 13/33] Changed based on PR 3020#pullrequestreview-1139691697 feedback. --- wgpu-hal/src/lib.rs | 6 ++++-- wgpu-hal/src/vulkan/command.rs | 8 ++++---- wgpu-hal/src/vulkan/device.rs | 17 +++++++++-------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 321d8664ee..8888508d4e 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -690,9 +690,11 @@ bitflags::bitflags! { /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits | Self::COPY_SRC.bits | Self::INDEX.bits | Self::VERTEX.bits | Self::UNIFORM.bits | - Self::STORAGE_READ.bits | Self::INDIRECT.bits; + Self::STORAGE_READ.bits | Self::INDIRECT.bits | + Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits; /// The combination of states that a buffer must exclusively be in. - const EXCLUSIVE = Self::MAP_WRITE.bits | Self::COPY_DST.bits | Self::STORAGE_READ_WRITE.bits; + const EXCLUSIVE = Self::MAP_WRITE.bits | Self::COPY_DST.bits | + Self::STORAGE_READ_WRITE.bits | Self::ACCELERATION_STRUCTURE_SCRATCH.bits; /// The combination of all usages that the are guaranteed to be be ordered by the hardware. /// If a usage is ordered, then if the buffer state doesn't change between draw calls, there /// are no barriers needed for synchronization. diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 02768f3c74..2f5e1ea59e 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -441,13 +441,13 @@ impl crate::CommandEncoder for super::CommandEncoder { let geometry_info = geometry_info.build(); - let range = &[range][..]; - let range = &[range][..]; - let geometry_info = &[geometry_info]; + //each geometry has multiple ranges; building requires a vector of geometry_infos and a vector of vectors of ranges + let ranges : &[&[vk::AccelerationStructureBuildRangeInfoKHR]] = &[&[range]]; + let geometry_infos = &[geometry_info]; ray_tracing_functions .acceleration_structure - .cmd_build_acceleration_structures(self.active, geometry_info, range); + .cmd_build_acceleration_structures(self.active, geometry_infos, ranges); } // render diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index afff881b18..428554ecbc 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -784,18 +784,18 @@ impl crate::Device for super::Device { desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), ); + let alignment_mask = if desc.usage + .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT){ + 16 + } else { + req.alignment + } - 1; + let block = self.mem_allocator.lock().alloc( &*self.shared, gpu_alloc::Request { size: req.size, - align_mask: if desc - .usage - .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT) - { - 16 - } else { - req.alignment - } - 1, + align_mask: alignment_mask, usage: alloc_usage, memory_types: req.memory_type_bits & self.valid_ash_memory_types, }, @@ -1429,6 +1429,7 @@ impl crate::Device for super::Device { // `raw_acceleration_structures`. let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; + assert!(index < desc.acceleration_structures.len(), "Encountered more acceleration structures then expected"); acceleration_structure_infos.push(acceleration_structure_info); extra_descriptor_count += 1; From 074ebe560d2f0ea2cf2e1b6ab5e0d502fa222b06 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 21 Feb 2023 23:01:44 +0100 Subject: [PATCH 14/33] unsafe blocks in unsafe functions --- wgpu-hal/src/vulkan/command.rs | 60 ++++++++++------- wgpu-hal/src/vulkan/device.rs | 118 ++++++++++++++++++--------------- 2 files changed, 101 insertions(+), 77 deletions(-) diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 1bb4c0c16a..9e72f00653 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -377,13 +377,16 @@ impl crate::CommandEncoder for super::CommandEncoder { let geometry = match *desc.geometry { crate::AccelerationStructureGeometry::Instances { buffer } => { + let device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ) + }; let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( vk::DeviceOrHostAddressConstKHR { - device_address: ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), - ), + device_address }, ); @@ -401,30 +404,36 @@ impl crate::CommandEncoder for super::CommandEncoder { vertex_stride, ref indices, } => { + let device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(vertex_buffer.raw), + ) + }; let mut triangles_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(vertex_buffer.raw), - ), + device_address }) .vertex_format(conv::map_vertex_format(vertex_format)) .vertex_stride(vertex_stride) .max_vertex(max_vertex); if let Some(ref indices) = *indices { + let device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(indices.buffer.raw), + ) + }; triangles_data = triangles_data .index_type(conv::map_index_format(indices.format)) .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(indices.buffer.raw), - ), + device_address }) } @@ -446,6 +455,13 @@ impl crate::CommandEncoder for super::CommandEncoder { .primitive_offset(desc.primitive_offset) .build(); + let device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), + ) + }; let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() .ty(conv::map_acceleration_structure_format(desc.format)) .mode(conv::map_acceleration_structure_build_mode(desc.mode)) @@ -453,11 +469,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .geometries(geometries) .dst_acceleration_structure(desc.destination_acceleration_structure.raw) .scratch_data(vk::DeviceOrHostAddressKHR { - device_address: ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), - ), + device_address }); if desc.mode == crate::AccelerationStructureBuildMode::Update { @@ -470,9 +482,11 @@ impl crate::CommandEncoder for super::CommandEncoder { let ranges : &[&[vk::AccelerationStructureBuildRangeInfoKHR]] = &[&[range]]; let geometry_infos = &[geometry_info]; - ray_tracing_functions + unsafe { + ray_tracing_functions .acceleration_structure .cmd_build_acceleration_structures(self.active, geometry_infos, ranges); + } } // render diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index d80ef1e90c..2fa1e34fa3 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2106,13 +2106,15 @@ impl crate::Device for super::Device { .flags(conv::map_acceleration_structure_flags(desc.flags)) .geometries(geometries); - let raw = ray_tracing_functions - .acceleration_structure - .get_acceleration_structure_build_sizes( - vk::AccelerationStructureBuildTypeKHR::DEVICE, - &geometry_info, - &[desc.primitive_count], - ); + let raw = unsafe { + ray_tracing_functions + .acceleration_structure + .get_acceleration_structure_build_sizes( + vk::AccelerationStructureBuildTypeKHR::DEVICE, + &geometry_info, + &[desc.primitive_count], + ) + }; crate::AccelerationStructureBuildSizes { acceleration_structure_size: raw.acceleration_structure_size, @@ -2130,12 +2132,14 @@ impl crate::Device for super::Device { None => panic!("Feature `RAY_TRACING` not enabled"), }; - ray_tracing_functions + unsafe { + ray_tracing_functions .acceleration_structure .get_acceleration_structure_device_address( &vk::AccelerationStructureDeviceAddressInfoKHR::builder() .acceleration_structure(acceleration_structure.raw), ) + } } unsafe fn create_acceleration_structure( @@ -2152,51 +2156,55 @@ impl crate::Device for super::Device { .usage(vk::BufferUsageFlags::ACCELERATION_STRUCTURE_STORAGE_KHR) .sharing_mode(vk::SharingMode::EXCLUSIVE); - let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; - let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + unsafe { - let block = self.mem_allocator.lock().alloc( - &*self.shared, - gpu_alloc::Request { - size: req.size, - align_mask: req.alignment - 1, - usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, - memory_types: req.memory_type_bits & self.valid_ash_memory_types, - }, - )?; + let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; + let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); - self.shared - .raw - .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; + let block = self.mem_allocator.lock().alloc( + &*self.shared, + gpu_alloc::Request { + size: req.size, + align_mask: req.alignment - 1, + usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, + memory_types: req.memory_type_bits & self.valid_ash_memory_types, + }, + )?; - if let Some(label) = desc.label { self.shared - .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); - } + .raw + .bind_buffer_memory(raw_buffer, *block.memory(), block.offset())?; - let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() - .buffer(raw_buffer) - .offset(0) - .size(desc.size) - .ty(conv::map_acceleration_structure_format(desc.format)); + if let Some(label) = desc.label { + self.shared + .set_object_name(vk::ObjectType::BUFFER, raw_buffer, label); + } - let raw_acceleration_structure = ray_tracing_functions - .acceleration_structure - .create_acceleration_structure(&vk_info, None)?; + let vk_info = vk::AccelerationStructureCreateInfoKHR::builder() + .buffer(raw_buffer) + .offset(0) + .size(desc.size) + .ty(conv::map_acceleration_structure_format(desc.format)); + + let raw_acceleration_structure = ray_tracing_functions + .acceleration_structure + .create_acceleration_structure(&vk_info, None)?; + + if let Some(label) = desc.label { + self.shared.set_object_name( + vk::ObjectType::ACCELERATION_STRUCTURE_KHR, + raw_acceleration_structure, + label, + ); + } - if let Some(label) = desc.label { - self.shared.set_object_name( - vk::ObjectType::ACCELERATION_STRUCTURE_KHR, - raw_acceleration_structure, - label, - ); - } + Ok(super::AccelerationStructure { + raw: raw_acceleration_structure, + buffer: raw_buffer, + block: Mutex::new(block), + }) - Ok(super::AccelerationStructure { - raw: raw_acceleration_structure, - buffer: raw_buffer, - block: Mutex::new(block), - }) + } } unsafe fn destroy_acceleration_structure( @@ -2208,15 +2216,17 @@ impl crate::Device for super::Device { None => panic!("Feature `RAY_TRACING` not enabled"), }; - ray_tracing_functions - .acceleration_structure - .destroy_acceleration_structure(acceleration_structure.raw, None); - self.shared - .raw - .destroy_buffer(acceleration_structure.buffer, None); - self.mem_allocator - .lock() - .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + unsafe{ + ray_tracing_functions + .acceleration_structure + .destroy_acceleration_structure(acceleration_structure.raw, None); + self.shared + .raw + .destroy_buffer(acceleration_structure.buffer, None); + self.mem_allocator + .lock() + .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + } } } From 8ce204bc59543a4e19db1e284ef8908a52064fc1 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 21 Feb 2023 23:14:49 +0100 Subject: [PATCH 15/33] formatted code --- wgpu-hal/examples/ray-traced-triangle/main.rs | 11 ++++--- wgpu-hal/src/vulkan/adapter.rs | 2 +- wgpu-hal/src/vulkan/command.rs | 31 ++++++------------- wgpu-hal/src/vulkan/device.rs | 25 ++++++++------- 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index dd120868df..b9de4a01ae 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -100,7 +100,7 @@ impl Example { } else { hal::InstanceFlags::empty() }, - dx12_shader_compiler: wgt::Dx12Compiler::Fxc + dx12_shader_compiler: wgt::Dx12Compiler::Fxc, }; let instance = unsafe { A::Instance::init(&instance_desc)? }; let mut surface = unsafe { @@ -127,9 +127,12 @@ impl Example { let window_size: (u32, u32) = window.inner_size().into(); dbg!(&surface_caps.formats); - let surface_format = if surface_caps.formats.contains(&wgt::TextureFormat::Rgba8Snorm) { + let surface_format = if surface_caps + .formats + .contains(&wgt::TextureFormat::Rgba8Snorm) + { wgt::TextureFormat::Rgba8Unorm - }else{ + } else { *surface_caps.formats.first().unwrap() }; let surface_config = hal::SurfaceConfiguration { @@ -417,7 +420,7 @@ impl Example { format: wgt::TextureFormat::Rgba8Unorm, usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, memory_flags: hal::MemoryFlags::empty(), - view_formats: vec![wgt::TextureFormat::Rgba8Unorm] + view_formats: vec![wgt::TextureFormat::Rgba8Unorm], }; let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 39faf4ca28..b024e1388d 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -874,7 +874,7 @@ impl super::InstanceShared { .insert(vk::PhysicalDeviceAccelerationStructurePropertiesKHR::default()); builder = builder.push_next(next); } - + if supports_driver_properties { let next = capabilities .driver diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 9e72f00653..27727b6070 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -384,11 +384,8 @@ impl crate::CommandEncoder for super::CommandEncoder { &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), ) }; - let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( - vk::DeviceOrHostAddressConstKHR { - device_address - }, - ); + let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { device_address }); vk::AccelerationStructureGeometryKHR::builder() .geometry_type(vk::GeometryTypeKHR::INSTANCES) @@ -408,15 +405,12 @@ impl crate::CommandEncoder for super::CommandEncoder { ray_tracing_functions .buffer_device_address .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(vertex_buffer.raw), + &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), ) }; let mut triangles_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address - }) + .vertex_data(vk::DeviceOrHostAddressConstKHR { device_address }) .vertex_format(conv::map_vertex_format(vertex_format)) .vertex_stride(vertex_stride) .max_vertex(max_vertex); @@ -426,15 +420,12 @@ impl crate::CommandEncoder for super::CommandEncoder { ray_tracing_functions .buffer_device_address .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(indices.buffer.raw), + &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), ) }; triangles_data = triangles_data .index_type(conv::map_index_format(indices.format)) - .index_data(vk::DeviceOrHostAddressConstKHR { - device_address - }) + .index_data(vk::DeviceOrHostAddressConstKHR { device_address }) } let triangles_data = triangles_data.build(); @@ -468,9 +459,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .flags(conv::map_acceleration_structure_flags(desc.flags)) .geometries(geometries) .dst_acceleration_structure(desc.destination_acceleration_structure.raw) - .scratch_data(vk::DeviceOrHostAddressKHR { - device_address - }); + .scratch_data(vk::DeviceOrHostAddressKHR { device_address }); if desc.mode == crate::AccelerationStructureBuildMode::Update { geometry_info.src_acceleration_structure = desc.destination_acceleration_structure.raw; @@ -479,13 +468,13 @@ impl crate::CommandEncoder for super::CommandEncoder { let geometry_info = geometry_info.build(); //each geometry has multiple ranges; building requires a vector of geometry_infos and a vector of vectors of ranges - let ranges : &[&[vk::AccelerationStructureBuildRangeInfoKHR]] = &[&[range]]; + let ranges: &[&[vk::AccelerationStructureBuildRangeInfoKHR]] = &[&[range]]; let geometry_infos = &[geometry_info]; unsafe { ray_tracing_functions - .acceleration_structure - .cmd_build_acceleration_structures(self.active, geometry_infos, ranges); + .acceleration_structure + .cmd_build_acceleration_structures(self.active, geometry_infos, ranges); } } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 2fa1e34fa3..434110aefe 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -841,8 +841,10 @@ impl crate::Device for super::Device { desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), ); - let alignment_mask = if desc.usage - .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT){ + let alignment_mask = if desc + .usage + .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT) + { 16 } else { req.alignment @@ -1510,7 +1512,10 @@ impl crate::Device for super::Device { // `raw_acceleration_structures`. let acceleration_structure_info: vk::WriteDescriptorSetAccelerationStructureKHR = *acceleration_structure_info; - assert!(index < desc.acceleration_structures.len(), "Encountered more acceleration structures then expected"); + assert!( + index < desc.acceleration_structures.len(), + "Encountered more acceleration structures then expected" + ); acceleration_structure_infos.push(acceleration_structure_info); extra_descriptor_count += 1; @@ -2134,11 +2139,11 @@ impl crate::Device for super::Device { unsafe { ray_tracing_functions - .acceleration_structure - .get_acceleration_structure_device_address( - &vk::AccelerationStructureDeviceAddressInfoKHR::builder() - .acceleration_structure(acceleration_structure.raw), - ) + .acceleration_structure + .get_acceleration_structure_device_address( + &vk::AccelerationStructureDeviceAddressInfoKHR::builder() + .acceleration_structure(acceleration_structure.raw), + ) } } @@ -2157,7 +2162,6 @@ impl crate::Device for super::Device { .sharing_mode(vk::SharingMode::EXCLUSIVE); unsafe { - let raw_buffer = self.shared.raw.create_buffer(&vk_buffer_info, None)?; let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); @@ -2203,7 +2207,6 @@ impl crate::Device for super::Device { buffer: raw_buffer, block: Mutex::new(block), }) - } } @@ -2216,7 +2219,7 @@ impl crate::Device for super::Device { None => panic!("Feature `RAY_TRACING` not enabled"), }; - unsafe{ + unsafe { ray_tracing_functions .acceleration_structure .destroy_acceleration_structure(acceleration_structure.raw, None); From f52a70b59c558a2ea0ab5533926c7e28f74e3999 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 22 Feb 2023 06:22:23 +0100 Subject: [PATCH 16/33] added utility functions to AccelerationStructureInstance (bit packing) --- wgpu-hal/examples/ray-traced-triangle/main.rs | 248 +++++++++++++----- 1 file changed, 187 insertions(+), 61 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index b9de4a01ae..29bfa391dd 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -5,7 +5,7 @@ use hal::{ }; use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; -use glam::{Mat4, Vec3}; +use glam::{Affine3A, Mat4, Vec3}; use std::{ borrow::{Borrow, Cow}, iter, mem, @@ -17,27 +17,152 @@ use std::{ const COMMAND_BUFFER_PER_CONTEXT: usize = 100; const DESIRED_FRAMES: u32 = 3; -fn pack_24_8(low_24: u32, high_8: u8) -> u32 { - (low_24 & 0x00ff_ffff) | (u32::from(high_8) << 24) -} - -#[derive(Debug)] +/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) +/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) +#[derive(Clone)] #[repr(C)] -struct Instance { +struct AccelerationStructureInstance { transform: [f32; 12], - instance_custom_index_and_mask: u32, - instance_shader_binding_table_record_offset_and_flags: u32, + custom_index_and_mask: u32, + shader_binding_table_record_offset_and_flags: u32, acceleration_structure_reference: u64, } -fn transpose_matrix_for_acceleration_structure_instance(matrix: Mat4) -> [f32; 12] { - let row_0 = matrix.row(0); - let row_1 = matrix.row(1); - let row_2 = matrix.row(2); - [ - row_0.x, row_0.y, row_0.z, row_0.w, row_1.x, row_1.y, row_1.z, row_1.w, row_2.x, row_2.y, - row_2.z, row_2.w, - ] +impl std::fmt::Debug for AccelerationStructureInstance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instance") + .field("transform", &self.transform) + .field("custom_index()", &self.custom_index()) + .field("mask()", &self.mask()) + .field( + "shader_binding_table_record_offset()", + &self.shader_binding_table_record_offset(), + ) + .field("flags()", &self.flags()) + .field( + "acceleration_structure_reference", + &self.acceleration_structure_reference, + ) + .finish() + } +} + +#[allow(dead_code)] +impl AccelerationStructureInstance { + const LOW_24_MASK: u32 = 0x00ff_ffff; + const MAX_U24: u32 = (1u32 << 24u32) - 1u32; + + #[inline] + fn affine_to_rows(mat: &Affine3A) -> [f32; 12] { + let row_0 = mat.matrix3.row(0); + let row_1 = mat.matrix3.row(1); + let row_2 = mat.matrix3.row(2); + let translation = mat.translation; + [ + row_0.x, + row_0.y, + row_0.z, + translation.x, + row_1.x, + row_1.y, + row_1.z, + translation.y, + row_2.x, + row_2.y, + row_2.z, + translation.z, + ] + } + + #[inline] + fn rows_to_affine(rows: &[f32; 12]) -> Affine3A { + Affine3A::from_cols_array(&[ + rows[0], rows[3], rows[6], rows[9], rows[1], rows[4], rows[7], rows[10], rows[2], + rows[5], rows[8], rows[11], + ]) + } + + pub fn transform_as_affine(&self) -> Affine3A { + Self::rows_to_affine(&self.transform) + } + pub fn set_transform(&mut self, transform: &Affine3A) { + self.transform = Self::affine_to_rows(&transform); + } + + pub fn custom_index(&self) -> u32 { + self.custom_index_and_mask & Self::LOW_24_MASK + } + + pub fn mask(&self) -> u8 { + (self.custom_index_and_mask >> 24) as u8 + } + + pub fn shader_binding_table_record_offset(&self) -> u32 { + self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK + } + + pub fn flags(&self) -> u8 { + (self.shader_binding_table_record_offset_and_flags >> 24) as u8 + } + + pub fn set_custom_index(&mut self, custom_index: u32) { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + self.custom_index_and_mask = + (custom_index & Self::LOW_24_MASK) | (self.custom_index_and_mask & !Self::LOW_24_MASK) + } + + pub fn set_mask(&mut self, mask: u8) { + self.custom_index_and_mask = + (self.custom_index_and_mask & Self::LOW_24_MASK) | (u32::from(mask) << 24) + } + + pub fn set_shader_binding_table_record_offset( + &mut self, + shader_binding_table_record_offset: u32, + ) { + debug_assert!(shader_binding_table_record_offset <= Self::MAX_U24, "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24); + self.shader_binding_table_record_offset_and_flags = (shader_binding_table_record_offset + & Self::LOW_24_MASK) + | (self.shader_binding_table_record_offset_and_flags & !Self::LOW_24_MASK) + } + + pub fn set_flags(&mut self, flags: u8) { + self.shader_binding_table_record_offset_and_flags = + (self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK) + | (u32::from(flags) << 24) + } + + pub fn new( + transform: &Affine3A, + custom_index: u32, + mask: u8, + shader_binding_table_record_offset: u32, + flags: u8, + acceleration_structure_reference: u64, + ) -> Self { + dbg!(transform); + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + debug_assert!( + shader_binding_table_record_offset <= Self::MAX_U24, + "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24 + ); + AccelerationStructureInstance { + transform: Self::affine_to_rows(transform), + custom_index_and_mask: (custom_index & Self::MAX_U24) | (u32::from(mask) << 24), + shader_binding_table_record_offset_and_flags: (shader_binding_table_record_offset + & Self::MAX_U24) + | (u32::from(flags) << 24), + acceleration_structure_reference, + } + } } struct ExecutionContext { @@ -83,7 +208,7 @@ struct Example { vertices_buffer: A::Buffer, indices_buffer: A::Buffer, texture: A::Texture, - instances: [Instance; 1], + instances: [AccelerationStructureInstance; 3], instances_buffer: A::Buffer, blas: A::AccelerationStructure, tlas: A::AccelerationStructure, @@ -271,8 +396,6 @@ impl Example { let indices_size_in_bytes = indices.len() * 4; - let transform_matrix = [1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]; - let vertices_buffer = unsafe { let vertices_buffer = device .create_buffer(&hal::BufferDescriptor { @@ -485,37 +608,48 @@ impl Example { }; let instances = [ - Instance { - transform: transform_matrix, - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: unsafe { - device.get_acceleration_structure_device_address(&blas) - }, - }, - /*Instance { - transform: transpose_matrix_for_acceleration_structure_instance( - Mat4::from_rotation_y(1.0), - ), - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: unsafe { - device.get_acceleration_structure_device_address(&blas) - }, - }, - Instance { - transform: transpose_matrix_for_acceleration_structure_instance( - Mat4::from_rotation_y(-1.0), - ), - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: unsafe { - device.get_acceleration_structure_device_address(&blas) - }, - },*/ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), ]; - let instances_buffer_size = instances.len() * std::mem::size_of::(); + dbg!(&instances[0]); + + let instances_buffer_size = + instances.len() * std::mem::size_of::(); let instances_buffer = unsafe { let instances_buffer = device @@ -662,24 +796,15 @@ impl Example { usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, }; - let instances_buffer_size = self.instances.len() * std::mem::size_of::(); + let instances_buffer_size = + self.instances.len() * std::mem::size_of::(); let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; self.time += 1.0 / 60.0; - self.instances[0] = Instance { - transform: transpose_matrix_for_acceleration_structure_instance(Mat4::from_rotation_y( - self.time, - )), - instance_custom_index_and_mask: pack_24_8(0, 0xff), - instance_shader_binding_table_record_offset_and_flags: pack_24_8(0, 0), - acceleration_structure_reference: unsafe { - self.device - .get_acceleration_structure_device_address(&self.blas) - }, - }; + self.instances[0].set_transform(&Affine3A::from_rotation_y(self.time)); unsafe { let mapping = self @@ -902,6 +1027,7 @@ fn main() { width: 512, height: 512, }) + .with_resizable(false) .build(&event_loop) .unwrap(); From f15e865801cd119aaf1ec74a8f853c163a36cef9 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Mon, 27 Feb 2023 14:32:38 +0100 Subject: [PATCH 17/33] WIP api change build_acceleration_structures --- wgpu-hal/examples/ray-traced-triangle/main.rs | 56 +++--- wgpu-hal/src/lib.rs | 52 ++++-- wgpu-hal/src/vulkan/command.rs | 162 +++++++++++------- 3 files changed, 166 insertions(+), 104 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 29bfa391dd..dc0b09810f 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -144,7 +144,6 @@ impl AccelerationStructureInstance { flags: u8, acceleration_structure_reference: u64, ) -> Self { - dbg!(transform); debug_assert!( custom_index <= Self::MAX_U24, "custom_index uses more than 24 bits! {custom_index} > {}", @@ -646,8 +645,6 @@ impl Example { ), ]; - dbg!(&instances[0]); - let instances_buffer_size = instances.len() * std::mem::size_of::(); @@ -685,24 +682,27 @@ impl Example { unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; unsafe { + let geometry = hal::AccelerationStructureTriangles { + vertex_buffer: &vertices_buffer, + first_vertex: 0, + vertex_format: wgt::VertexFormat::Float32x3, + vertex_count: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureTriangleIndices { + buffer: &indices_buffer, + format: wgt::IndexFormat::Uint32, + offset: 0, + count: indices.len() as u32, + }), + transforms: None, + }; cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { - geometry: &hal::AccelerationStructureGeometry::Triangles { - vertex_buffer: &vertices_buffer, - vertex_format: wgt::VertexFormat::Float32x3, - max_vertex: vertices.len() as u32, - vertex_stride: 3 * 4, - indices: Some(hal::AccelerationStructureGeometryIndices { - buffer: &indices_buffer, - format: wgt::IndexFormat::Uint32, - }), - }, - format: hal::AccelerationStructureFormat::BottomLevel, mode: hal::AccelerationStructureBuildMode::Build, flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, - primitive_count: indices.len() as u32 / 3, - primitive_offset: 0, destination_acceleration_structure: &blas, scratch_buffer: &scratch_buffer, + entries: &hal::AccelerationStructureEntries::Triangles(&vec![geometry]), + source_acceleration_structure: None, }); let as_barrier = hal::BufferBarrier { @@ -712,17 +712,17 @@ impl Example { }; cmd_encoder.transition_buffers(iter::once(as_barrier)); + let instances = hal::AccelerationStructureInstances { + buffer: &instances_buffer, + count: instances.len() as u32, + }; cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { - geometry: &hal::AccelerationStructureGeometry::Instances { - buffer: &instances_buffer, - }, - format: hal::AccelerationStructureFormat::TopLevel, mode: hal::AccelerationStructureBuildMode::Build, flags: tlas_flags, - primitive_count: instances.len() as u32, - primitive_offset: 0, destination_acceleration_structure: &tlas, scratch_buffer: &scratch_buffer, + entries: &hal::AccelerationStructureEntries::Instances(&instances), + source_acceleration_structure: None, }); let texture_barrier = hal::TextureBarrier { @@ -823,18 +823,18 @@ impl Example { unsafe { ctx.encoder.begin_encoding(Some("frame")).unwrap(); + let instances = hal::AccelerationStructureInstances { + buffer: &self.instances_buffer, + count: self.instances.len() as u32, + }; ctx.encoder .build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { - geometry: &hal::AccelerationStructureGeometry::Instances { - buffer: &self.instances_buffer, - }, - format: hal::AccelerationStructureFormat::TopLevel, mode: hal::AccelerationStructureBuildMode::Update, flags: tlas_flags, - primitive_count: self.instances.len() as u32, - primitive_offset: 0, destination_acceleration_structure: &self.tlas, scratch_buffer: &self.scratch_buffer, + entries: &hal::AccelerationStructureEntries::Instances(&instances), + source_acceleration_structure: Some(&self.tlas), }); let as_barrier = hal::BufferBarrier { diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 63d0a689f6..4d0dca7533 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1392,32 +1392,52 @@ pub enum AccelerationStructureGeometryInfo { } pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { - pub geometry: &'a AccelerationStructureGeometry<'a, A>, - pub format: AccelerationStructureFormat, + pub entries: &'a AccelerationStructureEntries<'a, A>, pub mode: AccelerationStructureBuildMode, pub flags: AccelerationStructureBuildFlags, - pub primitive_count: u32, - pub primitive_offset: u32, + pub source_acceleration_structure: Option<&'a A::AccelerationStructure>, pub destination_acceleration_structure: &'a A::AccelerationStructure, pub scratch_buffer: &'a A::Buffer, } -pub enum AccelerationStructureGeometry<'a, A: Api> { - Triangles { - vertex_buffer: &'a A::Buffer, - vertex_format: wgt::VertexFormat, - max_vertex: u32, - vertex_stride: wgt::BufferAddress, - indices: Option>, - }, - Instances { - buffer: &'a A::Buffer, - }, +pub enum AccelerationStructureEntries<'a, A: Api> { + Instances(&'a AccelerationStructureInstances<'a, A>), + Triangles(&'a [AccelerationStructureTriangles<'a, A>]), + AABBs(&'a [AccelerationStructureAABBs]), } -pub struct AccelerationStructureGeometryIndices<'a, A: Api> { +// TODO: flags +pub struct AccelerationStructureTriangles<'a, A: Api> { + pub vertex_buffer: &'a A::Buffer, + pub first_vertex: u32, + pub vertex_format: wgt::VertexFormat, + pub vertex_count: u32, + pub vertex_stride: wgt::BufferAddress, + pub indices: Option>, + pub transforms: Option>, +} + +// TODO: * +pub struct AccelerationStructureAABBs { + pub count: u32, //TODO +} + +// TODO: offset +pub struct AccelerationStructureInstances<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub count: u32, +} + +pub struct AccelerationStructureTriangleIndices<'a, A: Api> { pub format: wgt::IndexFormat, pub buffer: &'a A::Buffer, + pub offset: u32, + pub count: u32, +} + +pub struct AccelerationStructureTriangleTransforms<'a, A: Api> { + pub buffer: &'a A::Buffer, + pub offset: u32, } bitflags!( diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 27727b6070..2e12eea09f 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -375,106 +375,148 @@ impl crate::CommandEncoder for super::CommandEncoder { None => panic!("Feature `RAY_TRACING` not enabled"), }; - let geometry = match *desc.geometry { - crate::AccelerationStructureGeometry::Instances { buffer } => { + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(instances) => { let device_address = unsafe { ray_tracing_functions .buffer_device_address .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + &vk::BufferDeviceAddressInfo::builder().buffer(instances.buffer.raw), ) }; - let instances = vk::AccelerationStructureGeometryInstancesDataKHR::builder() + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder() .data(vk::DeviceOrHostAddressConstKHR { device_address }); - vk::AccelerationStructureGeometryKHR::builder() + let geometry = vk::AccelerationStructureGeometryKHR::builder() .geometry_type(vk::GeometryTypeKHR::INSTANCES) .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instances, + instances: *instance_data, }) - .flags(vk::GeometryFlagsKHR::empty()) + .flags(vk::GeometryFlagsKHR::empty()); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count); + + (vec![*geometry], vec![*range]) } - crate::AccelerationStructureGeometry::Triangles { - vertex_buffer, - vertex_format, - max_vertex, - vertex_stride, - ref indices, - } => { - let device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(vertex_buffer.raw), - ) - }; - let mut triangles_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { device_address }) - .vertex_format(conv::map_vertex_format(vertex_format)) - .vertex_stride(vertex_stride) - .max_vertex(max_vertex); - - if let Some(ref indices) = *indices { - let device_address = unsafe { + crate::AccelerationStructureEntries::Triangles(in_geometries) => { + let mut ranges = Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + for triangles in in_geometries { + let vertex_device_address = unsafe { ray_tracing_functions .buffer_device_address .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(indices.buffer.raw), + &vk::BufferDeviceAddressInfo::builder() + .buffer(triangles.vertex_buffer.raw), ) }; - triangles_data = triangles_data - .index_type(conv::map_index_format(indices.format)) - .index_data(vk::DeviceOrHostAddressConstKHR { device_address }) - } - - let triangles_data = triangles_data.build(); + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: vertex_device_address, + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(indices) = &triangles.indices { + let index_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(indices.buffer.raw), + ) + }; + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: index_device_address, + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + if let Some(transform) = &triangles.transforms { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = + triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: triangles_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + .build(); + geometries.push(geometry); + ranges.push(*range); + } + (geometries, ranges) } + crate::AccelerationStructureEntries::AABBs(_) => todo!(), }; - let geometries = &[*geometry]; - - let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(desc.primitive_count) - .primitive_offset(desc.primitive_offset) - .build(); - - let device_address = unsafe { + let scratch_device_address = unsafe { ray_tracing_functions .buffer_device_address .get_buffer_device_address( &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), ) }; + let ty = match desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(conv::map_acceleration_structure_format(desc.format)) + .ty(ty) .mode(conv::map_acceleration_structure_build_mode(desc.mode)) .flags(conv::map_acceleration_structure_flags(desc.flags)) - .geometries(geometries) + .geometries(&geometries) .dst_acceleration_structure(desc.destination_acceleration_structure.raw) - .scratch_data(vk::DeviceOrHostAddressKHR { device_address }); + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: scratch_device_address, + }); if desc.mode == crate::AccelerationStructureBuildMode::Update { - geometry_info.src_acceleration_structure = desc.destination_acceleration_structure.raw; + geometry_info.src_acceleration_structure = desc + .source_acceleration_structure + .expect("Acceleration tructure update: source structure required") + .raw; } let geometry_info = geometry_info.build(); - //each geometry has multiple ranges; building requires a vector of geometry_infos and a vector of vectors of ranges - let ranges: &[&[vk::AccelerationStructureBuildRangeInfoKHR]] = &[&[range]]; - let geometry_infos = &[geometry_info]; - unsafe { ray_tracing_functions .acceleration_structure - .cmd_build_acceleration_structures(self.active, geometry_infos, ranges); + .cmd_build_acceleration_structures(self.active, &[geometry_info], &[&ranges]); } } From a3c0ffb35f0f825e4999ac4bf2ebe4ffb4942e46 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 28 Feb 2023 08:33:27 +0100 Subject: [PATCH 18/33] preperation for changes in get_acceleration_structure_build_sizes --- wgpu-hal/examples/ray-traced-triangle/main.rs | 8 +- wgpu-hal/src/lib.rs | 11 +- wgpu-hal/src/vulkan/command.rs | 217 +++++++++--------- 3 files changed, 125 insertions(+), 111 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index dc0b09810f..da2e446d47 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -683,13 +683,13 @@ impl Example { unsafe { let geometry = hal::AccelerationStructureTriangles { - vertex_buffer: &vertices_buffer, + vertex_buffer: Some(&vertices_buffer), first_vertex: 0, vertex_format: wgt::VertexFormat::Float32x3, vertex_count: vertices.len() as u32, vertex_stride: 3 * 4, indices: Some(hal::AccelerationStructureTriangleIndices { - buffer: &indices_buffer, + buffer: Some(&indices_buffer), format: wgt::IndexFormat::Uint32, offset: 0, count: indices.len() as u32, @@ -713,7 +713,7 @@ impl Example { cmd_encoder.transition_buffers(iter::once(as_barrier)); let instances = hal::AccelerationStructureInstances { - buffer: &instances_buffer, + buffer: Some(&instances_buffer), count: instances.len() as u32, }; cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { @@ -824,7 +824,7 @@ impl Example { ctx.encoder.begin_encoding(Some("frame")).unwrap(); let instances = hal::AccelerationStructureInstances { - buffer: &self.instances_buffer, + buffer: Some(&self.instances_buffer), count: self.instances.len() as u32, }; ctx.encoder diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 4d0dca7533..a26cf87cd6 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1400,6 +1400,11 @@ pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { pub scratch_buffer: &'a A::Buffer, } +/// +/// Usage for buffer size requirements: +/// All Buffers, BufferAdresses and offsets will be ignored. +/// Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Trinagles/AABBs in coresponding groups), +/// may result in reduced size requirements. pub enum AccelerationStructureEntries<'a, A: Api> { Instances(&'a AccelerationStructureInstances<'a, A>), Triangles(&'a [AccelerationStructureTriangles<'a, A>]), @@ -1408,7 +1413,7 @@ pub enum AccelerationStructureEntries<'a, A: Api> { // TODO: flags pub struct AccelerationStructureTriangles<'a, A: Api> { - pub vertex_buffer: &'a A::Buffer, + pub vertex_buffer: Option<&'a A::Buffer>, pub first_vertex: u32, pub vertex_format: wgt::VertexFormat, pub vertex_count: u32, @@ -1424,13 +1429,13 @@ pub struct AccelerationStructureAABBs { // TODO: offset pub struct AccelerationStructureInstances<'a, A: Api> { - pub buffer: &'a A::Buffer, + pub buffer: Option<&'a A::Buffer>, pub count: u32, } pub struct AccelerationStructureTriangleIndices<'a, A: Api> { pub format: wgt::IndexFormat, - pub buffer: &'a A::Buffer, + pub buffer: Option<&'a A::Buffer>, pub offset: u32, pub count: u32, } diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 2e12eea09f..b80b557ffb 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -375,110 +375,8 @@ impl crate::CommandEncoder for super::CommandEncoder { None => panic!("Feature `RAY_TRACING` not enabled"), }; - let (geometries, ranges) = match *desc.entries { - crate::AccelerationStructureEntries::Instances(instances) => { - let device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(instances.buffer.raw), - ) - }; - let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder() - .data(vk::DeviceOrHostAddressConstKHR { device_address }); - - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::INSTANCES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instance_data, - }) - .flags(vk::GeometryFlagsKHR::empty()); - - let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(instances.count); - - (vec![*geometry], vec![*range]) - } - crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut ranges = Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = - Vec::::with_capacity(in_geometries.len()); - for triangles in in_geometries { - let vertex_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(triangles.vertex_buffer.raw), - ) - }; - let mut triangle_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: vertex_device_address, - }) - .vertex_format(conv::map_vertex_format(triangles.vertex_format)) - .max_vertex(triangles.vertex_count) - .vertex_stride(triangles.vertex_stride); - - let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); - - if let Some(indices) = &triangles.indices { - let index_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(indices.buffer.raw), - ) - }; - triangle_data = triangle_data - .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: index_device_address, - }) - .index_type(conv::map_index_format(indices.format)); - - range = range - .primitive_count(indices.count / 3) - .primitive_offset(indices.offset) - .first_vertex(triangles.first_vertex); - } else { - range = range - .primitive_count(triangles.vertex_count) - .first_vertex(triangles.first_vertex); - } - if let Some(transform) = &triangles.transforms { - let transform_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(transform.buffer.raw), - ) - }; - triangle_data = - triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { - device_address: transform_device_address, - }); - - range = range.transform_offset(transform.offset); - } - - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: *triangle_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) - .build(); - geometries.push(geometry); - ranges.push(*range); - } - (geometries, ranges) - } - crate::AccelerationStructureEntries::AABBs(_) => todo!(), + let (geometries, ranges) = unsafe { + to_raw_acceleration_structure_geometry_and_range_info(desc, ray_tracing_functions, true) }; let scratch_device_address = unsafe { @@ -971,6 +869,117 @@ impl crate::CommandEncoder for super::CommandEncoder { } } +unsafe fn to_raw_acceleration_structure_geometry_and_range_info( + desc: &crate::BuildAccelerationStructureDescriptor, + ray_tracing_functions: &super::RayTracingDeviceExtensionFunctions, + require_buffers: bool, +) -> ( + Vec, + Vec, +) { + let get_device_address = |buffer: Option<&super::Buffer>| { + if let Some(buffer) = buffer { + unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ) + } + } else { + if require_buffers { + panic!("Buffers are required to build acceleration structures"); + } + 0u64 + } + }; + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( + vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(instances.buffer), + }, + ); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instance_data, + }) + .flags(vk::GeometryFlagsKHR::empty()); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count); + + (vec![*geometry], vec![*range]) + } + crate::AccelerationStructureEntries::Triangles(in_geometries) => { + let mut ranges = Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(triangles.vertex_buffer), + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(indices) = &triangles.indices { + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(indices.buffer), + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + if let Some(transform) = &triangles.transforms { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + .build(); + geometries.push(geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(_) => todo!(), + }; + (geometries, ranges) +} + #[test] fn check_dst_image_layout() { assert_eq!( From a59d8882be2d59be1c4fa0737911a3f5608d6670 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 28 Feb 2023 11:51:48 +0100 Subject: [PATCH 19/33] WIP api change get_acceleration_structure_build_sizes --- wgpu-hal/examples/ray-traced-triangle/main.rs | 68 +++--- wgpu-hal/src/dx11/command.rs | 2 +- wgpu-hal/src/dx11/device.rs | 10 +- wgpu-hal/src/dx12/device.rs | 2 +- wgpu-hal/src/empty.rs | 2 +- wgpu-hal/src/gles/device.rs | 2 +- wgpu-hal/src/lib.rs | 29 +-- wgpu-hal/src/vulkan/command.rs | 209 ++++++++---------- wgpu-hal/src/vulkan/device.rs | 81 ++++--- 9 files changed, 197 insertions(+), 208 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index da2e446d47..1d41a74182 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -445,18 +445,33 @@ impl Example { indices_buffer }; + let blas_triangles = vec![hal::AccelerationStructureTriangles { + vertex_buffer: Some(&vertices_buffer), + first_vertex: 0, + vertex_format: wgt::VertexFormat::Float32x3, + vertex_count: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureTriangleIndices { + buffer: Some(&indices_buffer), + format: wgt::IndexFormat::Uint32, + offset: 0, + count: indices.len() as u32, + }), + transforms: None, + }]; + let blas_entries = hal::AccelerationStructureEntries::Triangles(&blas_triangles); + + let mut tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: None, + count: 3, + }); + let blas_sizes = unsafe { device.get_acceleration_structure_build_sizes( &hal::GetAccelerationStructureBuildSizesDescriptor { - geometry_info: hal::AccelerationStructureGeometryInfo::Triangles { - vertex_format: wgt::VertexFormat::Float32x3, - max_vertex: 3, - index_format: Some(wgt::IndexFormat::Uint32), - }, - format: hal::AccelerationStructureFormat::BottomLevel, - mode: hal::AccelerationStructureBuildMode::Build, + entries: &blas_entries, flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, - primitive_count: 1, }, ) }; @@ -467,11 +482,8 @@ impl Example { let tlas_sizes = unsafe { device.get_acceleration_structure_build_sizes( &hal::GetAccelerationStructureBuildSizesDescriptor { - geometry_info: hal::AccelerationStructureGeometryInfo::Instances, - format: hal::AccelerationStructureFormat::TopLevel, - mode: hal::AccelerationStructureBuildMode::Build, + entries: &tlas_entries, flags: tlas_flags, - primitive_count: 1, }, ) }; @@ -673,6 +685,14 @@ impl Example { instances_buffer }; + if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { + i.buffer = Some(&instances_buffer); + assert!( + instances.len() <= i.count as usize, + "Tlas allocation to small" + ); + } + let cmd_encoder_desc = hal::CommandEncoderDescriptor { label: None, queue: &queue, @@ -682,26 +702,12 @@ impl Example { unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; unsafe { - let geometry = hal::AccelerationStructureTriangles { - vertex_buffer: Some(&vertices_buffer), - first_vertex: 0, - vertex_format: wgt::VertexFormat::Float32x3, - vertex_count: vertices.len() as u32, - vertex_stride: 3 * 4, - indices: Some(hal::AccelerationStructureTriangleIndices { - buffer: Some(&indices_buffer), - format: wgt::IndexFormat::Uint32, - offset: 0, - count: indices.len() as u32, - }), - transforms: None, - }; cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Build, flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, destination_acceleration_structure: &blas, scratch_buffer: &scratch_buffer, - entries: &hal::AccelerationStructureEntries::Triangles(&vec![geometry]), + entries: &blas_entries, source_acceleration_structure: None, }); @@ -712,16 +718,12 @@ impl Example { }; cmd_encoder.transition_buffers(iter::once(as_barrier)); - let instances = hal::AccelerationStructureInstances { - buffer: Some(&instances_buffer), - count: instances.len() as u32, - }; cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Build, flags: tlas_flags, destination_acceleration_structure: &tlas, scratch_buffer: &scratch_buffer, - entries: &hal::AccelerationStructureEntries::Instances(&instances), + entries: &tlas_entries, source_acceleration_structure: None, }); @@ -833,7 +835,7 @@ impl Example { flags: tlas_flags, destination_acceleration_structure: &self.tlas, scratch_buffer: &self.scratch_buffer, - entries: &hal::AccelerationStructureEntries::Instances(&instances), + entries: &hal::AccelerationStructureEntries::Instances(instances), source_acceleration_structure: Some(&self.tlas), }); diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 3ec95d0c33..20bf0cc9d0 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -270,6 +270,6 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, desc: &crate::BuildAccelerationStructureDescriptor, ) { - todo!() + unimplemented!() } } diff --git a/wgpu-hal/src/dx11/device.rs b/wgpu-hal/src/dx11/device.rs index 847b002aa2..373ae7d0b0 100644 --- a/wgpu-hal/src/dx11/device.rs +++ b/wgpu-hal/src/dx11/device.rs @@ -205,25 +205,25 @@ impl crate::Device for super::Device { &self, desc: &crate::AccelerationStructureDescriptor, ) -> Result { - todo!() + unimplemented!() } unsafe fn get_acceleration_structure_build_sizes( &self, - desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { - todo!() + unimplemented!() } unsafe fn get_acceleration_structure_device_address( &self, acceleration_structure: &super::AccelerationStructure, ) -> wgt::BufferAddress { - todo!() + unimplemented!() } unsafe fn destroy_acceleration_structure( &self, acceleration_structure: super::AccelerationStructure, ) { - todo!() + unimplemented!() } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index fd0922670f..9e74177fb3 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1597,7 +1597,7 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { // Implement using `GetRaytracingAccelerationStructurePrebuildInfo`: // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#getraytracingaccelerationstructureprebuildinfo diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 049382e77e..670705c834 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -245,7 +245,7 @@ impl crate::Device for Context { } unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { Default::default() } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 1aed0a2232..b81c165749 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1326,7 +1326,7 @@ impl crate::Device for super::Device { } unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { unimplemented!() } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index a26cf87cd6..d5ea92a39a 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -357,7 +357,7 @@ pub trait Device: Send + Sync { ) -> Result; unsafe fn get_acceleration_structure_build_sizes( &self, - desc: &GetAccelerationStructureBuildSizesDescriptor, + desc: &GetAccelerationStructureBuildSizesDescriptor, ) -> AccelerationStructureBuildSizes; unsafe fn get_acceleration_structure_device_address( &self, @@ -1373,24 +1373,6 @@ pub struct AccelerationStructureBuildSizes { pub build_scratch_size: wgt::BufferAddress, } -pub struct GetAccelerationStructureBuildSizesDescriptor { - pub geometry_info: AccelerationStructureGeometryInfo, - pub format: AccelerationStructureFormat, - pub mode: AccelerationStructureBuildMode, - pub flags: AccelerationStructureBuildFlags, - pub primitive_count: u32, -} - -#[derive(Clone, Copy)] -pub enum AccelerationStructureGeometryInfo { - Triangles { - vertex_format: wgt::VertexFormat, - max_vertex: u32, - index_format: Option, - }, - Instances, -} - pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { pub entries: &'a AccelerationStructureEntries<'a, A>, pub mode: AccelerationStructureBuildMode, @@ -1400,13 +1382,20 @@ pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { pub scratch_buffer: &'a A::Buffer, } +pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> { + pub entries: &'a AccelerationStructureEntries<'a, A>, + pub flags: AccelerationStructureBuildFlags, +} + /// /// Usage for buffer size requirements: /// All Buffers, BufferAdresses and offsets will be ignored. +/// The build mode will be ignored. /// Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Trinagles/AABBs in coresponding groups), /// may result in reduced size requirements. +/// Any other change may result in a bigger or smaller size requirement. pub enum AccelerationStructureEntries<'a, A: Api> { - Instances(&'a AccelerationStructureInstances<'a, A>), + Instances(AccelerationStructureInstances<'a, A>), Triangles(&'a [AccelerationStructureTriangles<'a, A>]), AABBs(&'a [AccelerationStructureAABBs]), } diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index b80b557ffb..e5d36e4ad9 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -375,8 +375,100 @@ impl crate::CommandEncoder for super::CommandEncoder { None => panic!("Feature `RAY_TRACING` not enabled"), }; - let (geometries, ranges) = unsafe { - to_raw_acceleration_structure_geometry_and_range_info(desc, ray_tracing_functions, true) + let get_device_address = |buffer: Option<&super::Buffer>| unsafe { + match buffer { + Some(buffer) => ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ), + None => panic!("Buffers are required to build acceleration structures"), + } + }; + + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(instances.buffer), + }); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instance_data, + }) + .flags(vk::GeometryFlagsKHR::empty()); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count); + + (vec![*geometry], vec![*range]) + } + crate::AccelerationStructureEntries::Triangles(in_geometries) => { + let mut ranges = Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(triangles.vertex_buffer), + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(ref indices) = triangles.indices { + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(indices.buffer), + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + if let Some(ref transform) = triangles.transforms { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = + triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + .build(); + geometries.push(geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(_) => todo!(), }; let scratch_device_address = unsafe { @@ -386,7 +478,7 @@ impl crate::CommandEncoder for super::CommandEncoder { &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), ) }; - let ty = match desc.entries { + let ty = match *desc.entries { crate::AccelerationStructureEntries::Instances(_) => { vk::AccelerationStructureTypeKHR::TOP_LEVEL } @@ -869,117 +961,6 @@ impl crate::CommandEncoder for super::CommandEncoder { } } -unsafe fn to_raw_acceleration_structure_geometry_and_range_info( - desc: &crate::BuildAccelerationStructureDescriptor, - ray_tracing_functions: &super::RayTracingDeviceExtensionFunctions, - require_buffers: bool, -) -> ( - Vec, - Vec, -) { - let get_device_address = |buffer: Option<&super::Buffer>| { - if let Some(buffer) = buffer { - unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), - ) - } - } else { - if require_buffers { - panic!("Buffers are required to build acceleration structures"); - } - 0u64 - } - }; - let (geometries, ranges) = match *desc.entries { - crate::AccelerationStructureEntries::Instances(instances) => { - let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder().data( - vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(instances.buffer), - }, - ); - - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::INSTANCES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instance_data, - }) - .flags(vk::GeometryFlagsKHR::empty()); - - let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(instances.count); - - (vec![*geometry], vec![*range]) - } - crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut ranges = Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = - Vec::::with_capacity(in_geometries.len()); - for triangles in in_geometries { - let mut triangle_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(triangles.vertex_buffer), - }) - .vertex_format(conv::map_vertex_format(triangles.vertex_format)) - .max_vertex(triangles.vertex_count) - .vertex_stride(triangles.vertex_stride); - - let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); - - if let Some(indices) = &triangles.indices { - triangle_data = triangle_data - .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(indices.buffer), - }) - .index_type(conv::map_index_format(indices.format)); - - range = range - .primitive_count(indices.count / 3) - .primitive_offset(indices.offset) - .first_vertex(triangles.first_vertex); - } else { - range = range - .primitive_count(triangles.vertex_count) - .first_vertex(triangles.first_vertex); - } - if let Some(transform) = &triangles.transforms { - let transform_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(transform.buffer.raw), - ) - }; - triangle_data = triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { - device_address: transform_device_address, - }); - - range = range.transform_offset(transform.offset); - } - - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: *triangle_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) - .build(); - geometries.push(geometry); - ranges.push(*range); - } - (geometries, ranges) - } - crate::AccelerationStructureEntries::AABBs(_) => todo!(), - }; - (geometries, ranges) -} - #[test] fn check_dst_image_layout() { assert_eq!( diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 434110aefe..01c0358cec 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2061,55 +2061,72 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes( &self, - desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), }; - let geometry = match desc.geometry_info { - crate::AccelerationStructureGeometryInfo::Instances => { - let instances_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); + let (geometries, primitive_counts) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); - vk::AccelerationStructureGeometryKHR::builder() + let geometry = vk::AccelerationStructureGeometryKHR::builder() .geometry_type(vk::GeometryTypeKHR::INSTANCES) .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instances_data, + instances: *instance_data, }) - .flags(vk::GeometryFlagsKHR::empty()) + .flags(vk::GeometryFlagsKHR::empty()); + + (vec![*geometry], vec![instances.count]) } - crate::AccelerationStructureGeometryInfo::Triangles { - vertex_format, - max_vertex, - index_format, - } => { - let mut triangles_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_format(conv::map_vertex_format(vertex_format)) - .max_vertex(max_vertex); - - if let Some(index_format) = index_format { - triangles_data = - triangles_data.index_type(conv::map_index_format(index_format)); - } + crate::AccelerationStructureEntries::Triangles(in_geometries) => { + let mut primitive_counts = Vec::::with_capacity(in_geometries.len()); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let pritive_count = if let Some(ref indices) = triangles.indices { + triangle_data = + triangle_data.index_type(conv::map_index_format(indices.format)); + indices.count / 3 + } else { + triangles.vertex_count + }; - vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: *triangles_data, - }) - .flags(vk::GeometryFlagsKHR::empty()) + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, + }) + .flags(vk::GeometryFlagsKHR::empty()) + .build(); + geometries.push(geometry); + primitive_counts.push(pritive_count); + } + (geometries, primitive_counts) } + crate::AccelerationStructureEntries::AABBs(_) => todo!(), }; - let geometries = &[*geometry]; + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL + } + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; let geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(conv::map_acceleration_structure_format(desc.format)) - .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .ty(ty) .flags(conv::map_acceleration_structure_flags(desc.flags)) - .geometries(geometries); + .geometries(&geometries); let raw = unsafe { ray_tracing_functions @@ -2117,7 +2134,7 @@ impl crate::Device for super::Device { .get_acceleration_structure_build_sizes( vk::AccelerationStructureBuildTypeKHR::DEVICE, &geometry_info, - &[desc.primitive_count], + &primitive_counts, ) }; From 93a347dfd94d80490ae0a0665db966a353b32ade Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 1 Mar 2023 06:48:54 +0100 Subject: [PATCH 20/33] added aabbs and geometry flag support --- wgpu-hal/examples/ray-traced-triangle/main.rs | 3 ++ wgpu-hal/src/lib.rs | 30 +++++++++---- wgpu-hal/src/vulkan/command.rs | 43 ++++++++++++++++--- wgpu-hal/src/vulkan/conv.rs | 20 +++++++++ wgpu-hal/src/vulkan/device.rs | 34 +++++++++++---- 5 files changed, 107 insertions(+), 23 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 1d41a74182..4eabf3e433 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -458,6 +458,7 @@ impl Example { count: indices.len() as u32, }), transforms: None, + flags: hal::AccelerationStructureGeometryFlags::OPAQUE, }]; let blas_entries = hal::AccelerationStructureEntries::Triangles(&blas_triangles); @@ -465,6 +466,7 @@ impl Example { hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { buffer: None, count: 3, + offset: 0, }); let blas_sizes = unsafe { @@ -828,6 +830,7 @@ impl Example { let instances = hal::AccelerationStructureInstances { buffer: Some(&self.instances_buffer), count: self.instances.len() as u32, + offset: 0, }; ctx.encoder .build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index d5ea92a39a..cbd8dc2d99 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1397,28 +1397,34 @@ pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> { pub enum AccelerationStructureEntries<'a, A: Api> { Instances(AccelerationStructureInstances<'a, A>), Triangles(&'a [AccelerationStructureTriangles<'a, A>]), - AABBs(&'a [AccelerationStructureAABBs]), + AABBs(&'a [AccelerationStructureAABBs<'a, A>]), } // TODO: flags pub struct AccelerationStructureTriangles<'a, A: Api> { pub vertex_buffer: Option<&'a A::Buffer>, - pub first_vertex: u32, pub vertex_format: wgt::VertexFormat, + pub first_vertex: u32, pub vertex_count: u32, pub vertex_stride: wgt::BufferAddress, pub indices: Option>, pub transforms: Option>, + pub flags: AccelerationStructureGeometryFlags, } // TODO: * -pub struct AccelerationStructureAABBs { - pub count: u32, //TODO +pub struct AccelerationStructureAABBs<'a, A: Api> { + pub buffer: Option<&'a A::Buffer>, + pub offset: u32, + pub count: u32, + pub stride: wgt::BufferAddress, + pub flags: AccelerationStructureGeometryFlags, } // TODO: offset pub struct AccelerationStructureInstances<'a, A: Api> { pub buffer: Option<&'a A::Buffer>, + pub offset: u32, pub count: u32, } @@ -1436,9 +1442,17 @@ pub struct AccelerationStructureTriangleTransforms<'a, A: Api> { bitflags!( pub struct AccelerationStructureBuildFlags: u32 { - const PREFER_FAST_TRACE = 1 << 0; - const PREFER_FAST_BUILD = 1 << 1; - const ALLOW_UPDATE = 1 << 2; - const LOW_MEMORY = 1 << 3; + const ALLOW_UPDATE = 1 << 0; + const ALLOW_COMPACTION = 1 << 1; + const PREFER_FAST_TRACE = 1 << 2; + const PREFER_FAST_BUILD = 1 << 3; + const LOW_MEMORY = 1 << 4; + } +); + +bitflags!( + pub struct AccelerationStructureGeometryFlags: u32 { + const OPAQUE = 1 << 0; + const NO_DUPLICATE_ANY_HIT_INVOCATION = 1 << 1; } ); diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index e5d36e4ad9..c1f7b8152a 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -397,11 +397,11 @@ impl crate::CommandEncoder for super::CommandEncoder { .geometry_type(vk::GeometryTypeKHR::INSTANCES) .geometry(vk::AccelerationStructureGeometryDataKHR { instances: *instance_data, - }) - .flags(vk::GeometryFlagsKHR::empty()); + }); let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(instances.count); + .primitive_count(instances.count) + .primitive_offset(instances.offset); (vec![*geometry], vec![*range]) } @@ -439,6 +439,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .primitive_count(triangles.vertex_count) .first_vertex(triangles.first_vertex); } + if let Some(ref transform) = triangles.transforms { let transform_device_address = unsafe { ray_tracing_functions @@ -461,14 +462,42 @@ impl crate::CommandEncoder for super::CommandEncoder { .geometry(vk::AccelerationStructureGeometryDataKHR { triangles: *triangle_data, }) - .flags(vk::GeometryFlagsKHR::empty()) - .build(); - geometries.push(geometry); + .flags(conv::map_acceleration_structure_geomety_flags( + triangles.flags, + )); + + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(in_geometries) => { + let mut ranges = Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(aabb.buffer), + }) + .stride(aabb.stride); + + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(aabb.count) + .primitive_offset(aabb.offset); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { aabbs: *aabbs_data }) + .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); + + geometries.push(*geometry); ranges.push(*range); } (geometries, ranges) } - crate::AccelerationStructureEntries::AABBs(_) => todo!(), }; let scratch_device_address = unsafe { diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 2719a56924..2269eed978 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -889,5 +889,25 @@ pub fn map_acceleration_structure_flags( vk_flags |= vk::BuildAccelerationStructureFlagsKHR::LOW_MEMORY; } + if flags.contains(crate::AccelerationStructureBuildFlags::ALLOW_COMPACTION) { + vk_flags |= vk::BuildAccelerationStructureFlagsKHR::ALLOW_COMPACTION + } + + vk_flags +} + +pub fn map_acceleration_structure_geomety_flags( + flags: crate::AccelerationStructureGeometryFlags, +) -> vk::GeometryFlagsKHR { + let mut vk_flags = vk::GeometryFlagsKHR::empty(); + + if flags.contains(crate::AccelerationStructureGeometryFlags::OPAQUE) { + vk_flags |= vk::GeometryFlagsKHR::OPAQUE; + } + + if flags.contains(crate::AccelerationStructureGeometryFlags::NO_DUPLICATE_ANY_HIT_INVOCATION) { + vk_flags |= vk::GeometryFlagsKHR::NO_DUPLICATE_ANY_HIT_INVOCATION; + } + vk_flags } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 01c0358cec..ddc5e4a1d1 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2070,14 +2070,13 @@ impl crate::Device for super::Device { let (geometries, primitive_counts) = match *desc.entries { crate::AccelerationStructureEntries::Instances(ref instances) => { - let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder(); + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::default(); let geometry = vk::AccelerationStructureGeometryKHR::builder() .geometry_type(vk::GeometryTypeKHR::INSTANCES) .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instance_data, - }) - .flags(vk::GeometryFlagsKHR::empty()); + instances: instance_data, + }); (vec![*geometry], vec![instances.count]) } @@ -2106,14 +2105,33 @@ impl crate::Device for super::Device { .geometry(vk::AccelerationStructureGeometryDataKHR { triangles: *triangle_data, }) - .flags(vk::GeometryFlagsKHR::empty()) - .build(); - geometries.push(geometry); + .flags(conv::map_acceleration_structure_geomety_flags( + triangles.flags, + )); + + geometries.push(*geometry); primitive_counts.push(pritive_count); } (geometries, primitive_counts) } - crate::AccelerationStructureEntries::AABBs(_) => todo!(), + crate::AccelerationStructureEntries::AABBs(in_geometries) => { + let mut primitive_counts = Vec::::with_capacity(in_geometries.len()); + let mut geometries = + Vec::::with_capacity(in_geometries.len()); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .stride(aabb.stride); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { aabbs: *aabbs_data }) + .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); + + geometries.push(*geometry); + primitive_counts.push(aabb.count); + } + (geometries, primitive_counts) + } }; let ty = match *desc.entries { From 16cc4eb572a4fcd8b074ffa0229067877dc48c06 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 1 Mar 2023 07:23:10 +0100 Subject: [PATCH 21/33] fixed RAY_TRACING feature collision + seperated RAY_QUERY --- wgpu-hal/src/vulkan/adapter.rs | 13 +++++++++++-- wgpu-types/src/lib.rs | 21 ++++++++++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index b024e1388d..2e5e40c2ba 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -573,8 +573,12 @@ impl PhysicalDeviceFeatures { F::RAY_TRACING, caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) - && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()) - && caps.supports_extension(vk::KhrRayQueryFn::name()), + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()), + ); + + features.set( + F::RAY_QUERY, + caps.supports_extension(vk::KhrRayQueryFn::name()), ); (features, dl_flags) @@ -735,10 +739,15 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDrawIndirectCountFn::name()); } + // Require `VK_KHR_deferred_host_operations`, `VK_KHR_acceleration_structure` and `VK_KHR_buffer_device_address` if the feature `RAY_TRACING` was requested if requested_features.contains(wgt::Features::RAY_TRACING) { extensions.push(vk::KhrDeferredHostOperationsFn::name()); extensions.push(vk::KhrAccelerationStructureFn::name()); extensions.push(vk::KhrBufferDeviceAddressFn::name()); + } + + // Require `VK_KHR_ray_query` if the associated feature was requested + if requested_features.contains(wgt::Features::RAY_QUERY) { extensions.push(vk::KhrRayQueryFn::name()); } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index c57e8b5d61..9692dfde6d 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -687,13 +687,6 @@ bitflags::bitflags! { /// This is currently unimplemented on Metal. /// When implemented, it will be supported on Metal on AMD and Intel GPUs, but not Apple GPUs. const WRITE_TIMESTAMP_INSIDE_PASSES = 1 << 41; - /// Allows for the creation of ray-tracing acceleration structures and ray queries within shaders. - /// - /// Supported platforms: - /// - Vulkan - /// - /// This is a native-only feature. - const RAY_TRACING = 1 << 42; /// Allows shaders to use i16. Not currently supported in naga, only available through `spirv-passthrough`. const SHADER_INT16 = 1 << 42; /// Allows shaders to use the `early_depth_test` attribute. @@ -703,6 +696,20 @@ bitflags::bitflags! { /// /// This is a native-only feature. const SHADER_EARLY_DEPTH_TEST = 1 << 43; + /// Allows for the creation of ray-tracing acceleration structures. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_TRACING = 1 << 44; + /// Allows for the creation of ray-tracing queries within shaders. + /// + /// Supported platforms: + /// - Vulkan + /// + /// This is a native-only feature. + const RAY_QUERY = 1 << 45; } } From 5b8423892810754ac85bdde1b8fcfeca97656931 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 1 Mar 2023 07:27:27 +0100 Subject: [PATCH 22/33] renamed to build_acceleration_structure --- wgpu-hal/examples/ray-traced-triangle/main.rs | 6 +++--- wgpu-hal/src/dx11/command.rs | 2 +- wgpu-hal/src/dx12/command.rs | 2 +- wgpu-hal/src/empty.rs | 2 +- wgpu-hal/src/gles/command.rs | 2 +- wgpu-hal/src/lib.rs | 2 +- wgpu-hal/src/metal/command.rs | 2 +- wgpu-hal/src/vulkan/command.rs | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 4eabf3e433..bfb148cb8d 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -704,7 +704,7 @@ impl Example { unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; unsafe { - cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + cmd_encoder.build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Build, flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, destination_acceleration_structure: &blas, @@ -720,7 +720,7 @@ impl Example { }; cmd_encoder.transition_buffers(iter::once(as_barrier)); - cmd_encoder.build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + cmd_encoder.build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Build, flags: tlas_flags, destination_acceleration_structure: &tlas, @@ -833,7 +833,7 @@ impl Example { offset: 0, }; ctx.encoder - .build_acceleration_structures(&hal::BuildAccelerationStructureDescriptor { + .build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Update, flags: tlas_flags, destination_acceleration_structure: &self.tlas, diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 20bf0cc9d0..5949b7ec8a 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -266,7 +266,7 @@ impl crate::CommandEncoder for super::CommandEncoder { todo!() } - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, desc: &crate::BuildAccelerationStructureDescriptor, ) { diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index eeabe19587..9062ffc142 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1144,7 +1144,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }; } - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, _desc: &crate::BuildAccelerationStructureDescriptor, ) { diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 670705c834..2124c52f84 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -431,7 +431,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn dispatch(&mut self, count: [u32; 3]) {} unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, _desc: &crate::BuildAccelerationStructureDescriptor, ) { diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index b529eea801..6a931756f6 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1061,7 +1061,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }); } - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, _desc: &crate::BuildAccelerationStructureDescriptor, ) { diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index cbd8dc2d99..cd45d9474b 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -582,7 +582,7 @@ pub trait CommandEncoder: Send + Sync + fmt::Debug { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, desc: &BuildAccelerationStructureDescriptor, ); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 16e8abbf4b..31ef925c7f 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -970,7 +970,7 @@ impl crate::CommandEncoder for super::CommandEncoder { encoder.dispatch_thread_groups_indirect(&buffer.raw, offset, self.state.raw_wg_size); } - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, _desc: &crate::BuildAccelerationStructureDescriptor, ) { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index c1f7b8152a..a1b349f36c 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -366,7 +366,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }; } - unsafe fn build_acceleration_structures( + unsafe fn build_acceleration_structure( &mut self, desc: &crate::BuildAccelerationStructureDescriptor, ) { From caa4ae7ab89ceb1192281435ff3aea3eeb1ac24a Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Wed, 1 Mar 2023 19:48:03 +0100 Subject: [PATCH 23/33] fixed missing type parameter for metal --- wgpu-hal/src/metal/device.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 55104f2e7c..f9259bec97 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1158,7 +1158,7 @@ impl crate::Device for super::Device { unsafe fn get_acceleration_structure_build_sizes( &self, - _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, + _desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { unimplemented!() } From bee8a36c5b71008d6abee5b7deed6800dd26548a Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Sun, 19 Mar 2023 14:32:55 +0100 Subject: [PATCH 24/33] multiple AS builds at once (better allocation strategy required) --- wgpu-hal/examples/ray-traced-triangle/main.rs | 43 +-- wgpu-hal/src/dx11/command.rs | 4 +- wgpu-hal/src/dx12/command.rs | 4 +- wgpu-hal/src/empty.rs | 4 +- wgpu-hal/src/gles/command.rs | 4 +- wgpu-hal/src/lib.rs | 4 +- wgpu-hal/src/vulkan/command.rs | 313 +++++++++++------- 7 files changed, 220 insertions(+), 156 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index bfb148cb8d..d7d372c84a 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -704,14 +704,16 @@ impl Example { unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; unsafe { - cmd_encoder.build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { - mode: hal::AccelerationStructureBuildMode::Build, - flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, - destination_acceleration_structure: &blas, - scratch_buffer: &scratch_buffer, - entries: &blas_entries, - source_acceleration_structure: None, - }); + cmd_encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + entries: &blas_entries, + source_acceleration_structure: None, + }, + ]); let as_barrier = hal::BufferBarrier { buffer: &scratch_buffer, @@ -720,14 +722,16 @@ impl Example { }; cmd_encoder.transition_buffers(iter::once(as_barrier)); - cmd_encoder.build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { - mode: hal::AccelerationStructureBuildMode::Build, - flags: tlas_flags, - destination_acceleration_structure: &tlas, - scratch_buffer: &scratch_buffer, - entries: &tlas_entries, - source_acceleration_structure: None, - }); + cmd_encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + entries: &tlas_entries, + source_acceleration_structure: None, + }, + ]); let texture_barrier = hal::TextureBarrier { texture: &texture, @@ -832,15 +836,16 @@ impl Example { count: self.instances.len() as u32, offset: 0, }; - ctx.encoder - .build_acceleration_structure(&hal::BuildAccelerationStructureDescriptor { + ctx.encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { mode: hal::AccelerationStructureBuildMode::Update, flags: tlas_flags, destination_acceleration_structure: &self.tlas, scratch_buffer: &self.scratch_buffer, entries: &hal::AccelerationStructureEntries::Instances(instances), source_acceleration_structure: Some(&self.tlas), - }); + }, + ]); let as_barrier = hal::BufferBarrier { buffer: &self.scratch_buffer, diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 5949b7ec8a..113a14e179 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -266,9 +266,9 @@ impl crate::CommandEncoder for super::CommandEncoder { todo!() } - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - desc: &crate::BuildAccelerationStructureDescriptor, + desc: &[&crate::BuildAccelerationStructureDescriptor], ) { unimplemented!() } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 2521db7a7d..0b2068323d 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1141,9 +1141,9 @@ impl crate::CommandEncoder for super::CommandEncoder { }; } - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - _desc: &crate::BuildAccelerationStructureDescriptor, + _desc: &[&crate::BuildAccelerationStructureDescriptor], ) { // Implement using `BuildRaytracingAccelerationStructure`: // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 2124c52f84..b86d2f90a1 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -431,9 +431,9 @@ impl crate::CommandEncoder for Encoder { unsafe fn dispatch(&mut self, count: [u32; 3]) {} unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - _desc: &crate::BuildAccelerationStructureDescriptor, + _desc: &[&crate::BuildAccelerationStructureDescriptor], ) { } } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 6a931756f6..afff9c08b4 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1061,9 +1061,9 @@ impl crate::CommandEncoder for super::CommandEncoder { }); } - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - _desc: &crate::BuildAccelerationStructureDescriptor, + _desc: &[&crate::BuildAccelerationStructureDescriptor], ) { unimplemented!() } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 1c1f71068c..3d721d81f8 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -568,9 +568,9 @@ pub trait CommandEncoder: Send + Sync + fmt::Debug { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - desc: &BuildAccelerationStructureDescriptor, + descriptors: &[&BuildAccelerationStructureDescriptor], ); } diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 136d1319c7..1c67ca4e9e 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -366,9 +366,9 @@ impl crate::CommandEncoder for super::CommandEncoder { }; } - unsafe fn build_acceleration_structure( + unsafe fn build_acceleration_structures( &mut self, - desc: &crate::BuildAccelerationStructureDescriptor, + descriptors: &[&crate::BuildAccelerationStructureDescriptor], ) { let ray_tracing_functions = match self.device.extension_fns.ray_tracing { Some(ref functions) => functions, @@ -386,157 +386,216 @@ impl crate::CommandEncoder for super::CommandEncoder { } }; - let (geometries, ranges) = match *desc.entries { - crate::AccelerationStructureEntries::Instances(ref instances) => { - let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder() + // storage to all the data required for cmd_build_acceleration_structures + let mut ranges_storage = + Vec::>::with_capacity( + descriptors.len(), + ); + let mut geometries_storage = + Vec::>::with_capacity(descriptors.len()); + + // pointers to all the data required for cmd_build_acceleration_structures + + let mut geometry_infos = + Vec::::with_capacity(descriptors.len()); + + let mut ranges_ptrs = + Vec::<&[vk::AccelerationStructureBuildRangeInfoKHR]>::with_capacity(descriptors.len()); + + for desc in descriptors { + let (geometries, ranges) = match *desc.entries { + crate::AccelerationStructureEntries::Instances(ref instances) => { + let instance_data = vk::AccelerationStructureGeometryInstancesDataKHR::builder( + ) .data(vk::DeviceOrHostAddressConstKHR { device_address: get_device_address(instances.buffer), }); - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::INSTANCES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - instances: *instance_data, - }); + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::INSTANCES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + instances: *instance_data, + }); - let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(instances.count) - .primitive_offset(instances.offset); + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(instances.count) + .primitive_offset(instances.offset); - (vec![*geometry], vec![*range]) - } - crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut ranges = Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = - Vec::::with_capacity(in_geometries.len()); - for triangles in in_geometries { - let mut triangle_data = - vk::AccelerationStructureGeometryTrianglesDataKHR::builder() - .vertex_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(triangles.vertex_buffer), + (vec![*geometry], vec![*range]) + } + crate::AccelerationStructureEntries::Triangles(in_geometries) => { + let mut ranges = + Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = Vec::::with_capacity( + in_geometries.len(), + ); + for triangles in in_geometries { + let mut triangle_data = + vk::AccelerationStructureGeometryTrianglesDataKHR::builder() + .vertex_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(triangles.vertex_buffer), + }) + .vertex_format(conv::map_vertex_format(triangles.vertex_format)) + .max_vertex(triangles.vertex_count) + .vertex_stride(triangles.vertex_stride); + + let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + + if let Some(ref indices) = triangles.indices { + triangle_data = triangle_data + .index_data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(indices.buffer), + }) + .index_type(conv::map_index_format(indices.format)); + + range = range + .primitive_count(indices.count / 3) + .primitive_offset(indices.offset) + .first_vertex(triangles.first_vertex); + } else { + range = range + .primitive_count(triangles.vertex_count) + .first_vertex(triangles.first_vertex); + } + + if let Some(ref transform) = triangles.transforms { + let transform_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder() + .buffer(transform.buffer.raw), + ) + }; + triangle_data = + triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { + device_address: transform_device_address, + }); + + range = range.transform_offset(transform.offset); + } + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::TRIANGLES) + .geometry(vk::AccelerationStructureGeometryDataKHR { + triangles: *triangle_data, }) - .vertex_format(conv::map_vertex_format(triangles.vertex_format)) - .max_vertex(triangles.vertex_count) - .vertex_stride(triangles.vertex_stride); + .flags(conv::map_acceleration_structure_geomety_flags( + triangles.flags, + )); - let mut range = vk::AccelerationStructureBuildRangeInfoKHR::builder(); + geometries.push(*geometry); + ranges.push(*range); + } + (geometries, ranges) + } + crate::AccelerationStructureEntries::AABBs(in_geometries) => { + let mut ranges = + Vec::::with_capacity( + in_geometries.len(), + ); + let mut geometries = Vec::::with_capacity( + in_geometries.len(), + ); + for aabb in in_geometries { + let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() + .data(vk::DeviceOrHostAddressConstKHR { + device_address: get_device_address(aabb.buffer), + }) + .stride(aabb.stride); - if let Some(ref indices) = triangles.indices { - triangle_data = triangle_data - .index_data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(indices.buffer), + let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() + .primitive_count(aabb.count) + .primitive_offset(aabb.offset); + + let geometry = vk::AccelerationStructureGeometryKHR::builder() + .geometry_type(vk::GeometryTypeKHR::AABBS) + .geometry(vk::AccelerationStructureGeometryDataKHR { + aabbs: *aabbs_data, }) - .index_type(conv::map_index_format(indices.format)); - - range = range - .primitive_count(indices.count / 3) - .primitive_offset(indices.offset) - .first_vertex(triangles.first_vertex); - } else { - range = range - .primitive_count(triangles.vertex_count) - .first_vertex(triangles.first_vertex); - } + .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); - if let Some(ref transform) = triangles.transforms { - let transform_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder() - .buffer(transform.buffer.raw), - ) - }; - triangle_data = - triangle_data.transform_data(vk::DeviceOrHostAddressConstKHR { - device_address: transform_device_address, - }); - - range = range.transform_offset(transform.offset); + geometries.push(*geometry); + ranges.push(*range); } - - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::TRIANGLES) - .geometry(vk::AccelerationStructureGeometryDataKHR { - triangles: *triangle_data, - }) - .flags(conv::map_acceleration_structure_geomety_flags( - triangles.flags, - )); - - geometries.push(*geometry); - ranges.push(*range); + (geometries, ranges) } - (geometries, ranges) - } - crate::AccelerationStructureEntries::AABBs(in_geometries) => { - let mut ranges = Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = - Vec::::with_capacity(in_geometries.len()); - for aabb in in_geometries { - let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() - .data(vk::DeviceOrHostAddressConstKHR { - device_address: get_device_address(aabb.buffer), - }) - .stride(aabb.stride); - - let range = vk::AccelerationStructureBuildRangeInfoKHR::builder() - .primitive_count(aabb.count) - .primitive_offset(aabb.offset); + }; - let geometry = vk::AccelerationStructureGeometryKHR::builder() - .geometry_type(vk::GeometryTypeKHR::AABBS) - .geometry(vk::AccelerationStructureGeometryDataKHR { aabbs: *aabbs_data }) - .flags(conv::map_acceleration_structure_geomety_flags(aabb.flags)); + ranges_storage.push(ranges); + geometries_storage.push(geometries); + } - geometries.push(*geometry); - ranges.push(*range); + for (i, desc) in descriptors.iter().enumerate() { + let scratch_device_address = unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), + ) + }; + let ty = match *desc.entries { + crate::AccelerationStructureEntries::Instances(_) => { + vk::AccelerationStructureTypeKHR::TOP_LEVEL } - (geometries, ranges) - } - }; + _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, + }; + let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() + .ty(ty) + .mode(conv::map_acceleration_structure_build_mode(desc.mode)) + .flags(conv::map_acceleration_structure_flags(desc.flags)) + .geometries(&geometries_storage[i]) // pointer must live + .dst_acceleration_structure(desc.destination_acceleration_structure.raw) + .scratch_data(vk::DeviceOrHostAddressKHR { + device_address: scratch_device_address, + }); - let scratch_device_address = unsafe { - ray_tracing_functions - .buffer_device_address - .get_buffer_device_address( - &vk::BufferDeviceAddressInfo::builder().buffer(desc.scratch_buffer.raw), - ) - }; - let ty = match *desc.entries { - crate::AccelerationStructureEntries::Instances(_) => { - vk::AccelerationStructureTypeKHR::TOP_LEVEL + if desc.mode == crate::AccelerationStructureBuildMode::Update { + geometry_info.src_acceleration_structure = desc + .source_acceleration_structure + .expect("Acceleration tructure update: source structure required") + .raw; } - _ => vk::AccelerationStructureTypeKHR::BOTTOM_LEVEL, - }; - let mut geometry_info = vk::AccelerationStructureBuildGeometryInfoKHR::builder() - .ty(ty) - .mode(conv::map_acceleration_structure_build_mode(desc.mode)) - .flags(conv::map_acceleration_structure_flags(desc.flags)) - .geometries(&geometries) - .dst_acceleration_structure(desc.destination_acceleration_structure.raw) - .scratch_data(vk::DeviceOrHostAddressKHR { - device_address: scratch_device_address, - }); - if desc.mode == crate::AccelerationStructureBuildMode::Update { - geometry_info.src_acceleration_structure = desc - .source_acceleration_structure - .expect("Acceleration tructure update: source structure required") - .raw; + geometry_infos.push(*geometry_info); + ranges_ptrs.push(&ranges_storage[i]); } - let geometry_info = geometry_info.build(); + // let mut geometry_infos = + // Vec::::with_capacity(descriptors.len()); + + // let mut ranges_vec = + // Vec::<&[vk::AccelerationStructureBuildRangeInfoKHR]>::with_capacity(descriptors.len()); + + // let mut ranges_storage = + // Vec::>::with_capacity(descriptors.len()); + + // for desc in descriptors { + // let (ranges, geometry_info) = prepare_geometry_info_and_ranges(desc); + // geometry_infos.push(geometry_info); + // ranges_storage.push(ranges); + + // } + + // for i in 0..descriptors.len() { + // ranges_vec.push(&ranges_storage[i]); + // } + + // let (ranges, geometry_info) = prepare_geometry_info_and_ranges(descriptors[0]); unsafe { ray_tracing_functions .acceleration_structure - .cmd_build_acceleration_structures(self.active, &[geometry_info], &[&ranges]); + .cmd_build_acceleration_structures(self.active, &geometry_infos, &ranges_ptrs); } + + // unsafe { + // ray_tracing_functions + // .acceleration_structure + // .cmd_build_acceleration_structures(self.active, &geometry_infos, &ranges_vec); + // } } // render From a420b87d56af29282eaf7d4b786ad8bf06736698 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Sun, 19 Mar 2023 15:49:38 +0100 Subject: [PATCH 25/33] switched to using smallvec --- wgpu-hal/src/vulkan/command.rs | 57 ++++++++++++++++++---------------- wgpu-hal/src/vulkan/device.rs | 12 ++++--- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 1c67ca4e9e..f269f5b0c0 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -370,6 +370,9 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, descriptors: &[&crate::BuildAccelerationStructureDescriptor], ) { + const CAPACITY_OUTER: usize = 8; + const CAPACITY_INNER: usize = 1; + let ray_tracing_functions = match self.device.extension_fns.ray_tracing { Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), @@ -387,20 +390,22 @@ impl crate::CommandEncoder for super::CommandEncoder { }; // storage to all the data required for cmd_build_acceleration_structures - let mut ranges_storage = - Vec::>::with_capacity( - descriptors.len(), - ); - let mut geometries_storage = - Vec::>::with_capacity(descriptors.len()); + let mut ranges_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptors.len()); + let mut geometries_storage = smallvec::SmallVec::< + [smallvec::SmallVec<[vk::AccelerationStructureGeometryKHR; CAPACITY_INNER]>; + CAPACITY_OUTER], + >::with_capacity(descriptors.len()); // pointers to all the data required for cmd_build_acceleration_structures - - let mut geometry_infos = - Vec::::with_capacity(descriptors.len()); - - let mut ranges_ptrs = - Vec::<&[vk::AccelerationStructureBuildRangeInfoKHR]>::with_capacity(descriptors.len()); + let mut geometry_infos = smallvec::SmallVec::< + [vk::AccelerationStructureBuildGeometryInfoKHR; CAPACITY_OUTER], + >::with_capacity(descriptors.len()); + let mut ranges_ptrs = smallvec::SmallVec::< + [&[vk::AccelerationStructureBuildRangeInfoKHR]; CAPACITY_OUTER], + >::with_capacity(descriptors.len()); for desc in descriptors { let (geometries, ranges) = match *desc.entries { @@ -421,16 +426,15 @@ impl crate::CommandEncoder for super::CommandEncoder { .primitive_count(instances.count) .primitive_offset(instances.offset); - (vec![*geometry], vec![*range]) + (smallvec::smallvec![*geometry], smallvec::smallvec![*range]) } crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut ranges = - Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = Vec::::with_capacity( - in_geometries.len(), - ); + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); for triangles in in_geometries { let mut triangle_data = vk::AccelerationStructureGeometryTrianglesDataKHR::builder() @@ -492,13 +496,12 @@ impl crate::CommandEncoder for super::CommandEncoder { (geometries, ranges) } crate::AccelerationStructureEntries::AABBs(in_geometries) => { - let mut ranges = - Vec::::with_capacity( - in_geometries.len(), - ); - let mut geometries = Vec::::with_capacity( - in_geometries.len(), - ); + let mut ranges = smallvec::SmallVec::< + [vk::AccelerationStructureBuildRangeInfoKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY_INNER], + >::with_capacity(in_geometries.len()); for aabb in in_geometries { let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() .data(vk::DeviceOrHostAddressConstKHR { diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index ddc5e4a1d1..6bd79e2f33 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2063,6 +2063,8 @@ impl crate::Device for super::Device { &self, desc: &crate::GetAccelerationStructureBuildSizesDescriptor, ) -> crate::AccelerationStructureBuildSizes { + const CAPACITY: usize = 8; + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { Some(ref functions) => functions, None => panic!("Feature `RAY_TRACING` not enabled"), @@ -2078,12 +2080,12 @@ impl crate::Device for super::Device { instances: instance_data, }); - (vec![*geometry], vec![instances.count]) + (smallvec::smallvec![*geometry], smallvec::smallvec![instances.count]) } crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut primitive_counts = Vec::::with_capacity(in_geometries.len()); + let mut primitive_counts = smallvec::SmallVec::<[u32;CAPACITY]>::with_capacity(in_geometries.len()); let mut geometries = - Vec::::with_capacity(in_geometries.len()); + smallvec::SmallVec::<[vk::AccelerationStructureGeometryKHR;CAPACITY]>::with_capacity(in_geometries.len()); for triangles in in_geometries { let mut triangle_data = @@ -2115,9 +2117,9 @@ impl crate::Device for super::Device { (geometries, primitive_counts) } crate::AccelerationStructureEntries::AABBs(in_geometries) => { - let mut primitive_counts = Vec::::with_capacity(in_geometries.len()); + let mut primitive_counts = smallvec::SmallVec::<[u32;CAPACITY]>::with_capacity(in_geometries.len()); let mut geometries = - Vec::::with_capacity(in_geometries.len()); + smallvec::SmallVec::<[vk::AccelerationStructureGeometryKHR;CAPACITY]>::with_capacity(in_geometries.len()); for aabb in in_geometries { let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() .stride(aabb.stride); From e48f7bd3cb088376d69fcf47426b6bb92ecf3c6d Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Sun, 19 Mar 2023 17:25:21 +0100 Subject: [PATCH 26/33] small api change + documentation --- wgpu-hal/examples/ray-traced-triangle/main.rs | 2 +- wgpu-hal/src/lib.rs | 56 ++++++++++++++----- wgpu-hal/src/vulkan/command.rs | 4 +- wgpu-hal/src/vulkan/device.rs | 21 ++++--- 4 files changed, 60 insertions(+), 23 deletions(-) diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index d7d372c84a..88c66c4bba 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -457,7 +457,7 @@ impl Example { offset: 0, count: indices.len() as u32, }), - transforms: None, + transform: None, flags: hal::AccelerationStructureGeometryFlags::OPAQUE, }]; let blas_entries = hal::AccelerationStructureEntries::Triangles(&blas_triangles); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 3d721d81f8..785544b557 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -568,6 +568,13 @@ pub trait CommandEncoder: Send + Sync + fmt::Debug { unsafe fn dispatch(&mut self, count: [u32; 3]); unsafe fn dispatch_indirect(&mut self, buffer: &A::Buffer, offset: wgt::BufferAddress); + /// To get the required sizes for the buffer allocations use `get_acceleration_structure_build_sizes` per descriptor + /// All buffers must be synchronized externally + /// All buffer regions, which are written to may only be passed once per function call, + /// with the exertion of updates in the same descriptor. + /// Consequences of this limitation: + /// - scratch buffers need to be unique + /// - a tlas can't be build in the same call with a blas it contains unsafe fn build_acceleration_structures( &mut self, descriptors: &[&BuildAccelerationStructureDescriptor], @@ -1352,13 +1359,17 @@ pub enum AccelerationStructureBuildMode { Update, } -#[derive(Clone, Debug, Default)] +/// Information of the required size for a corresponding entries struct (+ flags) +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct AccelerationStructureBuildSizes { pub acceleration_structure_size: wgt::BufferAddress, pub update_scratch_size: wgt::BufferAddress, pub build_scratch_size: wgt::BufferAddress, } +/// Updates use source_acceleration_structure if present, else the update will be performed in place. +/// For updates, only the data is allowed to change (not the meta data or sizes). +#[derive(Clone, Debug)] pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { pub entries: &'a AccelerationStructureEntries<'a, A>, pub mode: AccelerationStructureBuildMode, @@ -1368,25 +1379,32 @@ pub struct BuildAccelerationStructureDescriptor<'a, A: Api> { pub scratch_buffer: &'a A::Buffer, } +/// - All buffers, buffer addresses and offsets will be ignored. +/// - The build mode will be ignored. +/// - Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Triangles/AABBs in corresponding groups), +/// may result in reduced size requirements. +/// - Any other change may result in a bigger or smaller size requirement. +#[derive(Clone, Debug)] pub struct GetAccelerationStructureBuildSizesDescriptor<'a, A: Api> { pub entries: &'a AccelerationStructureEntries<'a, A>, pub flags: AccelerationStructureBuildFlags, } -/// -/// Usage for buffer size requirements: -/// All Buffers, BufferAdresses and offsets will be ignored. -/// The build mode will be ignored. -/// Reducing the amount of Instances, Triangle groups or AABB groups (or the number of Trinagles/AABBs in coresponding groups), -/// may result in reduced size requirements. -/// Any other change may result in a bigger or smaller size requirement. +/// Entries for a single descriptor +/// * `Instances` - Multiple instances for a top level acceleration structure +/// * `Triangles` - Multiple triangle meshes for a bottom level acceleration structure +/// * `AABBs` - List of list of axis aligned bounding boxes for a bottom level acceleration structure +#[derive(Debug)] pub enum AccelerationStructureEntries<'a, A: Api> { Instances(AccelerationStructureInstances<'a, A>), Triangles(&'a [AccelerationStructureTriangles<'a, A>]), AABBs(&'a [AccelerationStructureAABBs<'a, A>]), } -// TODO: flags +/// * `first_vertex` - offset in the vertex buffer (as number of vertices) +/// * `indices` - optional index buffer with attributes +/// * `transform` - optional transform +#[derive(Clone, Debug)] pub struct AccelerationStructureTriangles<'a, A: Api> { pub vertex_buffer: Option<&'a A::Buffer>, pub vertex_format: wgt::VertexFormat, @@ -1394,11 +1412,12 @@ pub struct AccelerationStructureTriangles<'a, A: Api> { pub vertex_count: u32, pub vertex_stride: wgt::BufferAddress, pub indices: Option>, - pub transforms: Option>, + pub transform: Option>, pub flags: AccelerationStructureGeometryFlags, } -// TODO: * +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] pub struct AccelerationStructureAABBs<'a, A: Api> { pub buffer: Option<&'a A::Buffer>, pub offset: u32, @@ -1407,13 +1426,16 @@ pub struct AccelerationStructureAABBs<'a, A: Api> { pub flags: AccelerationStructureGeometryFlags, } -// TODO: offset +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] pub struct AccelerationStructureInstances<'a, A: Api> { pub buffer: Option<&'a A::Buffer>, pub offset: u32, pub count: u32, } +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] pub struct AccelerationStructureTriangleIndices<'a, A: Api> { pub format: wgt::IndexFormat, pub buffer: Option<&'a A::Buffer>, @@ -1421,17 +1443,25 @@ pub struct AccelerationStructureTriangleIndices<'a, A: Api> { pub count: u32, } -pub struct AccelerationStructureTriangleTransforms<'a, A: Api> { +/// * `offset` - offset in bytes +#[derive(Clone, Debug)] +pub struct AccelerationStructureTriangleTransform<'a, A: Api> { pub buffer: &'a A::Buffer, pub offset: u32, } bitflags!( + /// Flags for acceleration structures pub struct AccelerationStructureBuildFlags: u32 { + /// Allow for incremental updates (no change in size) const ALLOW_UPDATE = 1 << 0; + /// Allow the acceleration structure to be compacted in a copy operation const ALLOW_COMPACTION = 1 << 1; + /// Optimize for fast ray tracing performance const PREFER_FAST_TRACE = 1 << 2; + /// Optimize for fast build time const PREFER_FAST_BUILD = 1 << 3; + /// Optimize for low memory footprint (scratch and output) const LOW_MEMORY = 1 << 4; } ); diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index f269f5b0c0..0c7833cb5a 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -464,7 +464,7 @@ impl crate::CommandEncoder for super::CommandEncoder { .first_vertex(triangles.first_vertex); } - if let Some(ref transform) = triangles.transforms { + if let Some(ref transform) = triangles.transform { let transform_device_address = unsafe { ray_tracing_functions .buffer_device_address @@ -558,7 +558,7 @@ impl crate::CommandEncoder for super::CommandEncoder { if desc.mode == crate::AccelerationStructureBuildMode::Update { geometry_info.src_acceleration_structure = desc .source_acceleration_structure - .expect("Acceleration tructure update: source structure required") + .unwrap_or(desc.destination_acceleration_structure) .raw; } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 6bd79e2f33..a54ac12f3e 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2080,12 +2080,17 @@ impl crate::Device for super::Device { instances: instance_data, }); - (smallvec::smallvec![*geometry], smallvec::smallvec![instances.count]) + ( + smallvec::smallvec![*geometry], + smallvec::smallvec![instances.count], + ) } crate::AccelerationStructureEntries::Triangles(in_geometries) => { - let mut primitive_counts = smallvec::SmallVec::<[u32;CAPACITY]>::with_capacity(in_geometries.len()); - let mut geometries = - smallvec::SmallVec::<[vk::AccelerationStructureGeometryKHR;CAPACITY]>::with_capacity(in_geometries.len()); + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); for triangles in in_geometries { let mut triangle_data = @@ -2117,9 +2122,11 @@ impl crate::Device for super::Device { (geometries, primitive_counts) } crate::AccelerationStructureEntries::AABBs(in_geometries) => { - let mut primitive_counts = smallvec::SmallVec::<[u32;CAPACITY]>::with_capacity(in_geometries.len()); - let mut geometries = - smallvec::SmallVec::<[vk::AccelerationStructureGeometryKHR;CAPACITY]>::with_capacity(in_geometries.len()); + let mut primitive_counts = + smallvec::SmallVec::<[u32; CAPACITY]>::with_capacity(in_geometries.len()); + let mut geometries = smallvec::SmallVec::< + [vk::AccelerationStructureGeometryKHR; CAPACITY], + >::with_capacity(in_geometries.len()); for aabb in in_geometries { let aabbs_data = vk::AccelerationStructureGeometryAabbsDataKHR::builder() .stride(aabb.stride); From c1dc803e29855a12d898d6dc4ec2c0758e4aa08a Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Sun, 19 Mar 2023 17:53:21 +0100 Subject: [PATCH 27/33] added to changelog --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e209fc8541..45ba0ca539 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,20 @@ By @teoxoy in [#3534](https://github.com/gfx-rs/wgpu/pull/3534) - All `fxhash` dependencies have been replaced with `rustc-hash`. By @james7132 in [#3502](https://github.com/gfx-rs/wgpu/pull/3502) - Change type of `bytes_per_row` and `rows_per_image` (members of `ImageDataLayout`) from `Option` to `Option`. By @teoxoy in [#3529](https://github.com/gfx-rs/wgpu/pull/3529) +### Added/New Features + +#### General +- Added feature flags for ray-tracing (currently only hal): `RAY_QUERY` and `RAY_TRACING` @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + +#### Vulkan + +- Implemented basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + +#### Hal + +- Added basic ray-tracing api for acceleration structures, and ray-queries @daniel-keitel (started by @expenses) in [#3507](https://github.com/gfx-rs/wgpu/pull/3507) + + ### Changes #### General From b6524af4429f90ce90fcb11bc2ded9e568d6485c Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Mon, 20 Mar 2023 16:46:32 +0100 Subject: [PATCH 28/33] Added ray tracing pipeline creation to hal (implemented in vulkan, untested) --- wgpu-hal/src/dx11/device.rs | 11 +++ wgpu-hal/src/dx11/mod.rs | 21 +++++ wgpu-hal/src/dx12/device.rs | 11 +++ wgpu-hal/src/dx12/mod.rs | 21 +++++ wgpu-hal/src/empty.rs | 28 ++++++ wgpu-hal/src/gles/device.rs | 11 +++ wgpu-hal/src/gles/mod.rs | 21 +++++ wgpu-hal/src/lib.rs | 44 ++++++++++ wgpu-hal/src/vulkan/adapter.rs | 35 +++++++- wgpu-hal/src/vulkan/conv.rs | 18 ++++ wgpu-hal/src/vulkan/device.rs | 156 +++++++++++++++++++++++++++++++++ wgpu-hal/src/vulkan/mod.rs | 36 ++++++++ wgpu-types/src/lib.rs | 12 +++ 13 files changed, 424 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/src/dx11/device.rs b/wgpu-hal/src/dx11/device.rs index 373ae7d0b0..b912ffdfa1 100644 --- a/wgpu-hal/src/dx11/device.rs +++ b/wgpu-hal/src/dx11/device.rs @@ -225,6 +225,17 @@ impl crate::Device for super::Device { ) { unimplemented!() } + + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + unimplemented!() + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + unimplemented!() + } } impl crate::Queue for super::Queue { diff --git a/wgpu-hal/src/dx11/mod.rs b/wgpu-hal/src/dx11/mod.rs index 8c4f223c41..dab7e27279 100644 --- a/wgpu-hal/src/dx11/mod.rs +++ b/wgpu-hal/src/dx11/mod.rs @@ -36,6 +36,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; type AccelerationStructure = AccelerationStructure; } @@ -139,3 +140,23 @@ impl crate::Surface for Surface { todo!() } } + +pub struct RayTracingPipeline {} + +impl crate::RayTracingPipeline for RayTracingPipeline { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index fdff7d613b..7c0b303a40 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1622,4 +1622,15 @@ impl crate::Device for super::Device { // Destroy a D3D12 resource as per-usual. todo!() } + + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + todo!() + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + todo!() + } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index eaf1871060..3978a3d87e 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -82,6 +82,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; type AccelerationStructure = AccelerationStructure; } @@ -904,3 +905,23 @@ impl crate::Queue for Queue { (1_000_000_000.0 / frequency as f64) as f32 } } + +pub struct RayTracingPipeline {} + +impl crate::RayTracingPipeline for RayTracingPipeline { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]> { + todo!() + } + + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]> { + todo!() + } + + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]> { + todo!() + } + + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]> { + todo!() + } +} diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index b86d2f90a1..97c9d7ee4d 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -37,6 +37,7 @@ impl crate::Api for Api { type ShaderModule = Resource; type RenderPipeline = Resource; type ComputePipeline = Resource; + type RayTracingPipeline = Resource; } impl crate::Instance for Context { @@ -256,6 +257,15 @@ impl crate::Device for Context { Default::default() } unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: Resource) {} + + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + Ok(Resource) + } + + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: Resource) {} } impl crate::CommandEncoder for Encoder { @@ -437,3 +447,21 @@ impl crate::CommandEncoder for Encoder { ) { } } + +impl crate::RayTracingPipeline for Resource { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]> { + vec![] + } + + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]> { + vec![] + } + + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]> { + vec![] + } + + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]> { + vec![] + } +} diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index ff3fca58c7..217fe8e772 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1337,6 +1337,17 @@ impl crate::Device for super::Device { unimplemented!() } unsafe fn destroy_acceleration_structure(&self, _acceleration_structure: ()) {} + + unsafe fn create_ray_tracing_pipeline( + &self, + _desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + unimplemented!() + } + + unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { + unimplemented!() + } } // SAFE: WASM doesn't have threads diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 95143cca68..b2b9d27389 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -127,6 +127,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; } bitflags::bitflags! { @@ -858,3 +859,23 @@ impl fmt::Debug for CommandEncoder { .finish() } } + +pub struct RayTracingPipeline {} + +impl crate::RayTracingPipeline for RayTracingPipeline { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } + + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]> { + unimplemented!() + } +} diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 785544b557..7566f3dd0c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -177,6 +177,7 @@ pub trait Api: Clone + Sized { type ShaderModule: fmt::Debug + Send + Sync; type RenderPipeline: Send + Sync; type ComputePipeline: Send + Sync; + type RayTracingPipeline: RayTracingPipeline + Send + Sync; type AccelerationStructure: fmt::Debug + Send + Sync + 'static; } @@ -353,6 +354,12 @@ pub trait Device: Send + Sync { &self, acceleration_structure: A::AccelerationStructure, ); + + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &RayTracingPipelineDescriptor, + ) -> Result; + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: A::RayTracingPipeline); } pub trait Queue: Send + Sync { @@ -1472,3 +1479,40 @@ bitflags!( const NO_DUPLICATE_ANY_HIT_INVOCATION = 1 << 1; } ); + +#[derive(Clone, Debug)] +pub struct RayTracingGeneralShaderGroup<'a, A: Api> { + pub stage: ProgrammableStage<'a, A>, +} + +#[derive(Clone, Debug)] +pub struct RayTracingHitShaderGroup<'a, A: Api> { + pub closest_hit: Option>, + pub any_hit: Option>, + pub intersection: Option>, + pub hit_group_type: RayTracingHitGroupType, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum RayTracingHitGroupType { + Triangles, + Procedural, +} + +#[derive(Clone, Debug)] +pub struct RayTracingPipelineDescriptor<'a, A: Api> { + pub label: Label<'a>, + pub layout: &'a A::PipelineLayout, + pub max_recursion_depth: u32, + pub gen_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], + pub miss_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], + pub call_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], + pub hit_groups: &'a [RayTracingHitShaderGroup<'a, A>], +} + +pub trait RayTracingPipeline { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]>; + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]>; + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]>; + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]>; +} diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 87b8578481..01d171c9fa 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -34,6 +34,7 @@ pub struct PhysicalDeviceFeatures { acceleration_structure: Option, buffer_device_address: Option, ray_query: Option, + ray_tracing_pipeline: Option, zero_initialize_workgroup_memory: Option, } @@ -86,6 +87,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.ray_tracing_pipeline { + info = info.push_next(feature); + } info } @@ -334,6 +338,17 @@ impl PhysicalDeviceFeatures { } else { None }, + ray_tracing_pipeline: if enabled_extensions + .contains(&vk::KhrRayTracingPipelineFn::name()) + { + Some( + vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::builder() + .ray_tracing_pipeline(true) + .build(), + ) + } else { + None + }, zero_initialize_workgroup_memory: if effective_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&vk::KhrZeroInitializeWorkgroupMemoryFn::name()) { @@ -573,7 +588,8 @@ impl PhysicalDeviceFeatures { F::RAY_TRACING, caps.supports_extension(vk::KhrDeferredHostOperationsFn::name()) && caps.supports_extension(vk::KhrAccelerationStructureFn::name()) - && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()), + && caps.supports_extension(vk::KhrBufferDeviceAddressFn::name()) + && caps.supports_extension(vk::KhrRayTracingPipelineFn::name()), ); features.set( @@ -602,6 +618,7 @@ pub struct PhysicalDeviceCapabilities { maintenance_3: Option, descriptor_indexing: Option, acceleration_structure: Option, + ray_tracing_pipeline: Option, driver: Option, /// The effective driver api version supported by the physical device. /// @@ -745,6 +762,7 @@ impl PhysicalDeviceCapabilities { extensions.push(vk::KhrDeferredHostOperationsFn::name()); extensions.push(vk::KhrAccelerationStructureFn::name()); extensions.push(vk::KhrBufferDeviceAddressFn::name()); + extensions.push(vk::KhrRayTracingPipelineFn::name()) } // Require `VK_KHR_ray_query` if the associated feature was requested @@ -869,6 +887,9 @@ impl super::InstanceShared { let supports_acceleration_structure = capabilities.supports_extension(vk::KhrAccelerationStructureFn::name()); + let supports_ray_tracing_pipeline = + capabilities.supports_extension(vk::KhrRayTracingPipelineFn::name()); + let mut builder = vk::PhysicalDeviceProperties2KHR::builder(); if self.driver_api_version >= vk::API_VERSION_1_1 || capabilities.supports_extension(vk::KhrMaintenance3Fn::name()) @@ -892,6 +913,13 @@ impl super::InstanceShared { builder = builder.push_next(next); } + if supports_ray_tracing_pipeline { + let next = capabilities + .ray_tracing_pipeline + .insert(vk::PhysicalDeviceRayTracingPipelinePropertiesKHR::default()); + builder = builder.push_next(next); + } + if supports_driver_properties { let next = capabilities .driver @@ -1169,6 +1197,9 @@ impl super::Instance { .map_or(false, |ext| { ext.shader_zero_initialize_workgroup_memory == vk::TRUE }), + ray_tracing_pipeline_shader_group_size: phd_capabilities + .ray_tracing_pipeline + .map(|x| x.shader_group_handle_size), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1303,6 +1334,7 @@ impl super::Adapter { }; let ray_tracing_fns = if enabled_extensions.contains(&khr::AccelerationStructure::name()) && enabled_extensions.contains(&khr::BufferDeviceAddress::name()) + && enabled_extensions.contains(&khr::RayTracingPipeline::name()) { Some(super::RayTracingDeviceExtensionFunctions { acceleration_structure: khr::AccelerationStructure::new( @@ -1313,6 +1345,7 @@ impl super::Adapter { &self.instance.raw, &raw_device, ), + rt_pipeline: khr::RayTracingPipeline::new(&self.instance.raw, &raw_device), }) } else { None diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 2269eed978..aa27723e40 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -693,6 +693,24 @@ pub fn map_shader_stage(stage: wgt::ShaderStages) -> vk::ShaderStageFlags { if stage.contains(wgt::ShaderStages::COMPUTE) { flags |= vk::ShaderStageFlags::COMPUTE; } + if stage.contains(wgt::ShaderStages::RAYGEN) { + flags |= vk::ShaderStageFlags::RAYGEN_KHR; + } + if stage.contains(wgt::ShaderStages::MISS) { + flags |= vk::ShaderStageFlags::MISS_KHR; + } + if stage.contains(wgt::ShaderStages::CALLABLE) { + flags |= vk::ShaderStageFlags::CALLABLE_KHR; + } + if stage.contains(wgt::ShaderStages::CLOSEST_HIT) { + flags |= vk::ShaderStageFlags::CLOSEST_HIT_KHR; + } + if stage.contains(wgt::ShaderStages::ANY_HIT) { + flags |= vk::ShaderStageFlags::ANY_HIT_KHR; + } + if stage.contains(wgt::ShaderStages::INTERSECTION) { + flags |= vk::ShaderStageFlags::INTERSECTION_KHR; + } flags } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a54ac12f3e..5048ace495 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -762,6 +762,34 @@ impl super::Device { }) } + fn compile_stage_temp_ray_tracing( + &self, + stage: &crate::ProgrammableStage, + stage_flags: wgt::ShaderStages, + _binding_map: &naga::back::spv::BindingMap, + ) -> Result { + let vk_module = match *stage.module { + super::ShaderModule::Raw(raw) => raw, + _ => unimplemented!("naga support for ray tracing shaders not yet implemented"), + }; + + let entry_point = CString::new(stage.entry_point).unwrap(); + let create_info = vk::PipelineShaderStageCreateInfo::builder() + .stage(conv::map_shader_stage(stage_flags)) + .module(vk_module) + .name(&entry_point) + .build(); + + Ok(CompiledStage { + create_info, + _entry_point: entry_point, + temp_raw_module: match *stage.module { + super::ShaderModule::Raw(_) => None, + super::ShaderModule::Intermediate { .. } => Some(vk_module), + }, + }) + } + /// Returns the queue family index of the device's internal queue. /// /// This is useful for constructing memory barriers needed for queue family ownership transfer when @@ -2275,6 +2303,134 @@ impl crate::Device for super::Device { .dealloc(&*self.shared, acceleration_structure.block.into_inner()); } } + + unsafe fn create_ray_tracing_pipeline( + &self, + desc: &crate::RayTracingPipelineDescriptor, + ) -> Result { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + let get_create_info = |stage, stage_flags| -> Result<_, crate::PipelineError> { + Ok(self + .compile_stage_temp_ray_tracing(stage, stage_flags, &desc.layout.binding_arrays)? + .create_info) + }; + + let mut stages = Vec::::new(); + let mut groups = Vec::::new(); + + let mut next_shader_index = 0; + + for (entries, stage_flags) in [ + (desc.gen_groups, wgt::ShaderStages::RAYGEN), + (desc.miss_groups, wgt::ShaderStages::MISS), + (desc.call_groups, wgt::ShaderStages::CALLABLE), + ] { + for entry in entries { + let group = vk::RayTracingShaderGroupCreateInfoKHR::builder() + .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL) + .general_shader(next_shader_index); + next_shader_index += 1; + + stages.push(get_create_info(&entry.stage, stage_flags)?); + groups.push(*group); + } + } + + for entry in desc.hit_groups { + let mut group = + vk::RayTracingShaderGroupCreateInfoKHR::builder().ty(match entry.hit_group_type { + crate::RayTracingHitGroupType::Triangles => { + vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP + } + crate::RayTracingHitGroupType::Procedural => { + vk::RayTracingShaderGroupTypeKHR::PROCEDURAL_HIT_GROUP + } + }); + + if let Some(ref stage) = entry.closest_hit { + stages.push(get_create_info(stage, wgt::ShaderStages::CLOSEST_HIT)?); + group = group.closest_hit_shader(next_shader_index); + next_shader_index += 1; + } + if let Some(ref stage) = entry.any_hit { + stages.push(get_create_info(stage, wgt::ShaderStages::ANY_HIT)?); + group = group.any_hit_shader(next_shader_index); + next_shader_index += 1; + } + if let Some(ref stage) = entry.intersection { + stages.push(get_create_info(stage, wgt::ShaderStages::INTERSECTION)?); + group = group.intersection_shader(next_shader_index); + next_shader_index += 1; + } + + groups.push(*group); + } + + let create_info = vk::RayTracingPipelineCreateInfoKHR::builder() + .stages(&stages) + .groups(&groups) + .max_pipeline_ray_recursion_depth(desc.max_recursion_depth) + .layout(desc.layout.raw); + + let raw = unsafe { + ray_tracing_functions + .rt_pipeline + .create_ray_tracing_pipelines( + vk::DeferredOperationKHR::null(), + vk::PipelineCache::null(), + &[*create_info], + None, + ) + .map_err(crate::DeviceError::from)?[0] + }; + + let handle_size = self + .shared + .private_caps + .ray_tracing_pipeline_shader_group_size + .unwrap() as usize; + + let handle_data = unsafe { + ray_tracing_functions + .rt_pipeline + .get_ray_tracing_shader_group_handles( + raw, + 0, + groups.len() as u32, + handle_size * groups.len(), + ) + } + .map_err(crate::DeviceError::from)?; + + let mut range_acc = 0; + + let ranges = [ + 0, + desc.gen_groups.len(), + desc.miss_groups.len(), + desc.call_groups.len(), + desc.hit_groups.len(), + ] + .map(|x| { + range_acc += x * handle_size; + range_acc + }); + + Ok(super::RayTracingPipeline { + raw, + handle_data, + handle_size, + ranges, + }) + } + + unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: super::RayTracingPipeline) { + unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; + } } impl From for crate::DeviceError { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index b4cbda4c94..42d2fd9fc8 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -71,6 +71,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type RayTracingPipeline = RayTracingPipeline; } struct DebugUtils { @@ -154,6 +155,7 @@ struct DeviceExtensionFunctions { struct RayTracingDeviceExtensionFunctions { acceleration_structure: khr::AccelerationStructure, buffer_device_address: khr::BufferDeviceAddress, + rt_pipeline: khr::RayTracingPipeline, } /// Set of internal capabilities, which don't show up in the exposed @@ -176,6 +178,7 @@ struct PrivateCapabilities { robust_buffer_access: bool, robust_image_access: bool, zero_initialize_workgroup_memory: bool, + ray_tracing_pipeline_shader_group_size: Option, } bitflags::bitflags!( @@ -301,6 +304,39 @@ pub struct AccelerationStructure { block: Mutex>, } +#[derive(Debug)] +pub struct RayTracingPipeline { + raw: vk::Pipeline, + handle_data: Vec, + handle_size: usize, + ranges: [usize; 5], +} + +fn get_handle_slices<'a>(pipeline: &'a RayTracingPipeline, range_index: usize) -> Vec<&'a [u8]> { + let range = pipeline.ranges[range_index]..pipeline.ranges[range_index + 1]; + pipeline.handle_data[range] + .chunks(pipeline.handle_size) + .collect() +} + +impl crate::RayTracingPipeline for RayTracingPipeline { + fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]> { + get_handle_slices(self, 0) + } + + fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]> { + get_handle_slices(self, 1) + } + + fn call_handles<'a>(&'a self) -> Vec<&'a [u8]> { + get_handle_slices(self, 2) + } + + fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]> { + get_handle_slices(self, 3) + } +} + #[derive(Debug)] pub struct Texture { raw: vk::Image, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index adfc8f23be..cb0eb324f9 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1371,6 +1371,18 @@ bitflags::bitflags! { const FRAGMENT = 1 << 1; /// Binding is visible from the compute shader of a compute pipeline. const COMPUTE = 1 << 2; + /// Binding is visible from a ray generation shader of a ray-tracing pipeline. + const RAYGEN = 1 << 3; + /// Binding is visible from a miss shader of a ray-tracing pipeline. + const MISS = 1 << 4; + /// Binding is visible from a callable shader of a ray-tracing pipeline. + const CALLABLE = 1 << 5; + /// Binding is visible from a closest hit shader of a ray-tracing pipeline. + const CLOSEST_HIT = 1 << 6; + /// Binding is visible from a any hit shader of a ray-tracing pipeline. + const ANY_HIT = 1 << 7; + /// Binding is visible from a intersection shader of a ray-tracing pipeline. + const INTERSECTION = 1 << 8; /// Binding is visible from the vertex and fragment shaders of a render pipeline. const VERTEX_FRAGMENT = Self::VERTEX.bits | Self::FRAGMENT.bits; } From 5b7cd939eb8d4b43d9ca54af4518c859b44ee1d7 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Mon, 20 Mar 2023 18:56:15 +0100 Subject: [PATCH 29/33] added encoder functions --- wgpu-hal/src/dx11/command.rs | 26 ++++++++++++++ wgpu-hal/src/dx12/command.rs | 26 ++++++++++++++ wgpu-hal/src/empty.rs | 19 ++++++++++ wgpu-hal/src/gles/command.rs | 26 ++++++++++++++ wgpu-hal/src/lib.rs | 29 +++++++++++++++ wgpu-hal/src/vulkan/command.rs | 66 ++++++++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+) diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 113a14e179..97bc85ea73 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -272,4 +272,30 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unimplemented!() } + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + unimplemented!() + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unimplemented!() + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + pipeline: &super::RayTracingPipeline, + ) { + unimplemented!() + } + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &crate::ShaderBindingTableReference, + miss_sbt: &crate::ShaderBindingTableReference, + callable_sbt: &crate::ShaderBindingTableReference, + hit_sbt: &crate::ShaderBindingTableReference, + dimensions: [u32; 3], + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 0b2068323d..19a7b551d4 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1149,4 +1149,30 @@ impl crate::CommandEncoder for super::CommandEncoder { // https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#buildraytracingaccelerationstructure todo!() } + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + todo!() + } + + unsafe fn end_ray_tracing_pass(&mut self) { + todo!() + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + pipeline: &super::RayTracingPipeline, + ) { + todo!() + } + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &crate::ShaderBindingTableReference, + miss_sbt: &crate::ShaderBindingTableReference, + callable_sbt: &crate::ShaderBindingTableReference, + hit_sbt: &crate::ShaderBindingTableReference, + dimensions: [u32; 3], + ) { + todo!() + } } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 97c9d7ee4d..eeadfa97d9 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -446,6 +446,25 @@ impl crate::CommandEncoder for Encoder { _desc: &[&crate::BuildAccelerationStructureDescriptor], ) { } + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + } + + unsafe fn end_ray_tracing_pass(&mut self) { + } + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &Resource) { + } + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &crate::ShaderBindingTableReference, + miss_sbt: &crate::ShaderBindingTableReference, + callable_sbt: &crate::ShaderBindingTableReference, + hit_sbt: &crate::ShaderBindingTableReference, + dimensions: [u32; 3], + ) { + } } impl crate::RayTracingPipeline for Resource { diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index afff9c08b4..51737a16d5 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1067,4 +1067,30 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { unimplemented!() } + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + unimplemented!() + } + + unsafe fn end_ray_tracing_pass(&mut self) { + unimplemented!() + } + + unsafe fn set_ray_tracing_pipeline( + &mut self, + pipeline: &super::RayTracingPipeline, + ) { + unimplemented!() + } + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &crate::ShaderBindingTableReference, + miss_sbt: &crate::ShaderBindingTableReference, + callable_sbt: &crate::ShaderBindingTableReference, + hit_sbt: &crate::ShaderBindingTableReference, + dimensions: [u32; 3], + ) { + unimplemented!() + } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 7566f3dd0c..44a03c6dd2 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -586,6 +586,23 @@ pub trait CommandEncoder: Send + Sync + fmt::Debug { &mut self, descriptors: &[&BuildAccelerationStructureDescriptor], ); + + // ray-tracing passes + + // Begins a ray-tracing pass, clears all active bindings. + unsafe fn begin_ray_tracing_pass(&mut self, desc: &RayTracingPassDescriptor); + unsafe fn end_ray_tracing_pass(&mut self); + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &A::RayTracingPipeline); + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &ShaderBindingTableReference, + miss_sbt: &ShaderBindingTableReference, + callable_sbt: &ShaderBindingTableReference, + hit_sbt: &ShaderBindingTableReference, + dimensions: [u32; 3], + ); } bitflags!( @@ -1516,3 +1533,15 @@ pub trait RayTracingPipeline { fn call_handles<'a>(&'a self) -> Vec<&'a [u8]>; fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]>; } + +#[derive(Clone, Debug)] +pub struct ShaderBindingTableReference { + pub address: wgt::BufferAddress, + pub stride: wgt::BufferAddress, + pub size: wgt::BufferAddress, +} + +#[derive(Clone, Debug)] +pub struct RayTracingPassDescriptor<'a> { + pub label: Label<'a>, +} \ No newline at end of file diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 0c7833cb5a..d40ad9586b 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -1050,6 +1050,72 @@ impl crate::CommandEncoder for super::CommandEncoder { .cmd_dispatch_indirect(self.active, buffer.raw, offset) } } + + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + self.bind_point = vk::PipelineBindPoint::RAY_TRACING_KHR; + if let Some(label) = desc.label { + unsafe { self.begin_debug_marker(label) }; + self.rpass_debug_marker_active = true; + } + } + + unsafe fn end_ray_tracing_pass(&mut self) { + if self.rpass_debug_marker_active { + unsafe { self.end_debug_marker() }; + self.rpass_debug_marker_active = false + } + } + + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &super::RayTracingPipeline) { + unsafe { + self.device.raw.cmd_bind_pipeline( + self.active, + vk::PipelineBindPoint::RAY_TRACING_KHR, + pipeline.raw, + ) + }; + } + + unsafe fn trace_rays( + &mut self, + ray_gen_sbt: &crate::ShaderBindingTableReference, + miss_sbt: &crate::ShaderBindingTableReference, + callable_sbt: &crate::ShaderBindingTableReference, + hit_sbt: &crate::ShaderBindingTableReference, + dimensions: [u32; 3], + ) { + let ray_tracing_functions = match self.device.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + unsafe {ray_tracing_functions.rt_pipeline.cmd_trace_rays( + self.active, + &vk::StridedDeviceAddressRegionKHR { + device_address: ray_gen_sbt.address, + stride: ray_gen_sbt.stride, + size: ray_gen_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: miss_sbt.address, + stride: miss_sbt.stride, + size: miss_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: callable_sbt.address, + stride: callable_sbt.stride, + size: callable_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: hit_sbt.address, + stride: hit_sbt.stride, + size: hit_sbt.size, + }, + dimensions[0], + dimensions[1], + dimensions[2], + )}; + } } #[test] From 23d3b2d8b0cd97ed4ddb4f5a134c821ef69bf3df Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Mon, 20 Mar 2023 18:58:31 +0100 Subject: [PATCH 30/33] clippy --- wgpu-hal/src/dx11/command.rs | 5 +--- wgpu-hal/src/dx12/command.rs | 17 +++++------ wgpu-hal/src/empty.rs | 9 ++---- wgpu-hal/src/gles/command.rs | 17 +++++------ wgpu-hal/src/lib.rs | 2 +- wgpu-hal/src/vulkan/command.rs | 54 ++++++++++++++++++---------------- 6 files changed, 47 insertions(+), 57 deletions(-) diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 97bc85ea73..45c3eab4f6 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -281,10 +281,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unimplemented!() } - unsafe fn set_ray_tracing_pipeline( - &mut self, - pipeline: &super::RayTracingPipeline, - ) { + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &super::RayTracingPipeline) { unimplemented!() } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 19a7b551d4..e6b7699c03 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1150,7 +1150,7 @@ impl crate::CommandEncoder for super::CommandEncoder { todo!() } - unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) { todo!() } @@ -1158,20 +1158,17 @@ impl crate::CommandEncoder for super::CommandEncoder { todo!() } - unsafe fn set_ray_tracing_pipeline( - &mut self, - pipeline: &super::RayTracingPipeline, - ) { + unsafe fn set_ray_tracing_pipeline(&mut self, _pipeline: &super::RayTracingPipeline) { todo!() } unsafe fn trace_rays( &mut self, - ray_gen_sbt: &crate::ShaderBindingTableReference, - miss_sbt: &crate::ShaderBindingTableReference, - callable_sbt: &crate::ShaderBindingTableReference, - hit_sbt: &crate::ShaderBindingTableReference, - dimensions: [u32; 3], + _ray_gen_sbt: &crate::ShaderBindingTableReference, + _miss_sbt: &crate::ShaderBindingTableReference, + _callable_sbt: &crate::ShaderBindingTableReference, + _hit_sbt: &crate::ShaderBindingTableReference, + _dimensions: [u32; 3], ) { todo!() } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index eeadfa97d9..5d62f7d725 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -447,14 +447,11 @@ impl crate::CommandEncoder for Encoder { ) { } - unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { - } + unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) {} - unsafe fn end_ray_tracing_pass(&mut self) { - } + unsafe fn end_ray_tracing_pass(&mut self) {} - unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &Resource) { - } + unsafe fn set_ray_tracing_pipeline(&mut self, pipeline: &Resource) {} unsafe fn trace_rays( &mut self, diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 51737a16d5..bedc0080b9 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -1068,7 +1068,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unimplemented!() } - unsafe fn begin_ray_tracing_pass(&mut self, desc: &crate::RayTracingPassDescriptor) { + unsafe fn begin_ray_tracing_pass(&mut self, _desc: &crate::RayTracingPassDescriptor) { unimplemented!() } @@ -1076,20 +1076,17 @@ impl crate::CommandEncoder for super::CommandEncoder { unimplemented!() } - unsafe fn set_ray_tracing_pipeline( - &mut self, - pipeline: &super::RayTracingPipeline, - ) { + unsafe fn set_ray_tracing_pipeline(&mut self, _pipeline: &super::RayTracingPipeline) { unimplemented!() } unsafe fn trace_rays( &mut self, - ray_gen_sbt: &crate::ShaderBindingTableReference, - miss_sbt: &crate::ShaderBindingTableReference, - callable_sbt: &crate::ShaderBindingTableReference, - hit_sbt: &crate::ShaderBindingTableReference, - dimensions: [u32; 3], + _ray_gen_sbt: &crate::ShaderBindingTableReference, + _miss_sbt: &crate::ShaderBindingTableReference, + _callable_sbt: &crate::ShaderBindingTableReference, + _hit_sbt: &crate::ShaderBindingTableReference, + _dimensions: [u32; 3], ) { unimplemented!() } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 44a03c6dd2..7d62deb42b 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1544,4 +1544,4 @@ pub struct ShaderBindingTableReference { #[derive(Clone, Debug)] pub struct RayTracingPassDescriptor<'a> { pub label: Label<'a>, -} \ No newline at end of file +} diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index d40ad9586b..3a51e1038d 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -1089,32 +1089,34 @@ impl crate::CommandEncoder for super::CommandEncoder { None => panic!("Feature `RAY_TRACING` not enabled"), }; - unsafe {ray_tracing_functions.rt_pipeline.cmd_trace_rays( - self.active, - &vk::StridedDeviceAddressRegionKHR { - device_address: ray_gen_sbt.address, - stride: ray_gen_sbt.stride, - size: ray_gen_sbt.size, - }, - &vk::StridedDeviceAddressRegionKHR { - device_address: miss_sbt.address, - stride: miss_sbt.stride, - size: miss_sbt.size, - }, - &vk::StridedDeviceAddressRegionKHR { - device_address: callable_sbt.address, - stride: callable_sbt.stride, - size: callable_sbt.size, - }, - &vk::StridedDeviceAddressRegionKHR { - device_address: hit_sbt.address, - stride: hit_sbt.stride, - size: hit_sbt.size, - }, - dimensions[0], - dimensions[1], - dimensions[2], - )}; + unsafe { + ray_tracing_functions.rt_pipeline.cmd_trace_rays( + self.active, + &vk::StridedDeviceAddressRegionKHR { + device_address: ray_gen_sbt.address, + stride: ray_gen_sbt.stride, + size: ray_gen_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: miss_sbt.address, + stride: miss_sbt.stride, + size: miss_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: callable_sbt.address, + stride: callable_sbt.stride, + size: callable_sbt.size, + }, + &vk::StridedDeviceAddressRegionKHR { + device_address: hit_sbt.address, + stride: hit_sbt.stride, + size: hit_sbt.size, + }, + dimensions[0], + dimensions[1], + dimensions[2], + ) + }; } } From 31d97a8a472985668d0bb99fde0c0a9af580026d Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 21 Mar 2023 05:13:36 +0100 Subject: [PATCH 31/33] working example --- .../ray-tracing-pipeline-triangle/main.rs | 1262 +++++++++++++++++ .../shader.rchit | 39 + .../shader.rchit.spv | Bin 0 -> 2344 bytes .../ray-tracing-pipeline-triangle/shader.rgen | 55 + .../shader.rgen.spv | Bin 0 -> 3088 bytes .../shader.rmiss | 25 + .../shader.rmiss.spv | Bin 0 -> 1496 bytes wgpu-hal/src/dx11/device.rs | 12 + wgpu-hal/src/dx12/device.rs | 12 + wgpu-hal/src/empty.rs | 12 + wgpu-hal/src/gles/device.rs | 12 + wgpu-hal/src/lib.rs | 22 +- wgpu-hal/src/vulkan/adapter.rs | 20 +- wgpu-hal/src/vulkan/command.rs | 10 +- wgpu-hal/src/vulkan/conv.rs | 4 + wgpu-hal/src/vulkan/device.rs | 136 +- wgpu-hal/src/vulkan/mod.rs | 14 +- 17 files changed, 1607 insertions(+), 28 deletions(-) create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit.spv create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rgen create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rgen.spv create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss.spv diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs new file mode 100644 index 0000000000..2c48ee9624 --- /dev/null +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs @@ -0,0 +1,1262 @@ +extern crate wgpu_hal as hal; + +use hal::{ + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, + RayTracingGeneralShaderGroup, RayTracingHitGroupType, RayTracingHitShaderGroup, + RayTracingPipeline, ShaderBindingTableReference, Surface as _, +}; +use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; + +use glam::{Affine3A, Mat4, Vec3}; +use std::{ + borrow::{Borrow, Cow}, + iter, mem, + mem::{align_of, size_of}, + ptr::{self, copy_nonoverlapping}, + time::Instant, +}; + +const COMMAND_BUFFER_PER_CONTEXT: usize = 100; +const DESIRED_FRAMES: u32 = 3; + +/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc) +/// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html) +#[derive(Clone)] +#[repr(C)] +struct AccelerationStructureInstance { + transform: [f32; 12], + custom_index_and_mask: u32, + shader_binding_table_record_offset_and_flags: u32, + acceleration_structure_reference: u64, +} + +impl std::fmt::Debug for AccelerationStructureInstance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instance") + .field("transform", &self.transform) + .field("custom_index()", &self.custom_index()) + .field("mask()", &self.mask()) + .field( + "shader_binding_table_record_offset()", + &self.shader_binding_table_record_offset(), + ) + .field("flags()", &self.flags()) + .field( + "acceleration_structure_reference", + &self.acceleration_structure_reference, + ) + .finish() + } +} + +#[allow(dead_code)] +impl AccelerationStructureInstance { + const LOW_24_MASK: u32 = 0x00ff_ffff; + const MAX_U24: u32 = (1u32 << 24u32) - 1u32; + + #[inline] + fn affine_to_rows(mat: &Affine3A) -> [f32; 12] { + let row_0 = mat.matrix3.row(0); + let row_1 = mat.matrix3.row(1); + let row_2 = mat.matrix3.row(2); + let translation = mat.translation; + [ + row_0.x, + row_0.y, + row_0.z, + translation.x, + row_1.x, + row_1.y, + row_1.z, + translation.y, + row_2.x, + row_2.y, + row_2.z, + translation.z, + ] + } + + #[inline] + fn rows_to_affine(rows: &[f32; 12]) -> Affine3A { + Affine3A::from_cols_array(&[ + rows[0], rows[3], rows[6], rows[9], rows[1], rows[4], rows[7], rows[10], rows[2], + rows[5], rows[8], rows[11], + ]) + } + + pub fn transform_as_affine(&self) -> Affine3A { + Self::rows_to_affine(&self.transform) + } + pub fn set_transform(&mut self, transform: &Affine3A) { + self.transform = Self::affine_to_rows(&transform); + } + + pub fn custom_index(&self) -> u32 { + self.custom_index_and_mask & Self::LOW_24_MASK + } + + pub fn mask(&self) -> u8 { + (self.custom_index_and_mask >> 24) as u8 + } + + pub fn shader_binding_table_record_offset(&self) -> u32 { + self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK + } + + pub fn flags(&self) -> u8 { + (self.shader_binding_table_record_offset_and_flags >> 24) as u8 + } + + pub fn set_custom_index(&mut self, custom_index: u32) { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + self.custom_index_and_mask = + (custom_index & Self::LOW_24_MASK) | (self.custom_index_and_mask & !Self::LOW_24_MASK) + } + + pub fn set_mask(&mut self, mask: u8) { + self.custom_index_and_mask = + (self.custom_index_and_mask & Self::LOW_24_MASK) | (u32::from(mask) << 24) + } + + pub fn set_shader_binding_table_record_offset( + &mut self, + shader_binding_table_record_offset: u32, + ) { + debug_assert!(shader_binding_table_record_offset <= Self::MAX_U24, "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24); + self.shader_binding_table_record_offset_and_flags = (shader_binding_table_record_offset + & Self::LOW_24_MASK) + | (self.shader_binding_table_record_offset_and_flags & !Self::LOW_24_MASK) + } + + pub fn set_flags(&mut self, flags: u8) { + self.shader_binding_table_record_offset_and_flags = + (self.shader_binding_table_record_offset_and_flags & Self::LOW_24_MASK) + | (u32::from(flags) << 24) + } + + pub fn new( + transform: &Affine3A, + custom_index: u32, + mask: u8, + shader_binding_table_record_offset: u32, + flags: u8, + acceleration_structure_reference: u64, + ) -> Self { + debug_assert!( + custom_index <= Self::MAX_U24, + "custom_index uses more than 24 bits! {custom_index} > {}", + Self::MAX_U24 + ); + debug_assert!( + shader_binding_table_record_offset <= Self::MAX_U24, + "shader_binding_table_record_offset uses more than 24 bits! {shader_binding_table_record_offset} > {}", Self::MAX_U24 + ); + AccelerationStructureInstance { + transform: Self::affine_to_rows(transform), + custom_index_and_mask: (custom_index & Self::MAX_U24) | (u32::from(mask) << 24), + shader_binding_table_record_offset_and_flags: (shader_binding_table_record_offset + & Self::MAX_U24) + | (u32::from(flags) << 24), + acceleration_structure_reference, + } + } +} + +struct ExecutionContext { + encoder: A::CommandEncoder, + fence: A::Fence, + fence_value: hal::FenceValue, + used_views: Vec, + used_cmd_bufs: Vec, + frames_recorded: usize, +} + +impl ExecutionContext { + unsafe fn wait_and_clear(&mut self, device: &A::Device) { + device.wait(&self.fence, self.fence_value, !0).unwrap(); + self.encoder.reset_all(self.used_cmd_bufs.drain(..)); + for view in self.used_views.drain(..) { + device.destroy_texture_view(view); + } + self.frames_recorded = 0; + } +} + +#[allow(dead_code)] +struct Example { + instance: A::Instance, + adapter: A::Adapter, + surface: A::Surface, + surface_format: wgt::TextureFormat, + device: A::Device, + queue: A::Queue, + + contexts: Vec>, + context_index: usize, + extent: [u32; 2], + start: Instant, + pipeline: A::RayTracingPipeline, + bind_group: A::BindGroup, + bgl: A::BindGroupLayout, + gen_shader_module: A::ShaderModule, + miss_shader_module: A::ShaderModule, + hit_shader_module: A::ShaderModule, + texture_view: A::TextureView, + uniform_buffer: A::Buffer, + pipeline_layout: A::PipelineLayout, + vertices_buffer: A::Buffer, + indices_buffer: A::Buffer, + texture: A::Texture, + instances: [AccelerationStructureInstance; 3], + instances_buffer: A::Buffer, + blas: A::AccelerationStructure, + tlas: A::AccelerationStructure, + scratch_buffer: A::Buffer, + sbt_buffer: A::Buffer, + gen_sbt_ref: ShaderBindingTableReference, + miss_sbt_ref: ShaderBindingTableReference, + hit_sbt_ref: ShaderBindingTableReference, + time: f32, +} + +impl Example { + fn init(window: &winit::window::Window) -> Result { + let instance_desc = hal::InstanceDescriptor { + name: "example", + flags: if cfg!(debug_assertions) { + hal::InstanceFlags::all() + } else { + hal::InstanceFlags::empty() + }, + dx12_shader_compiler: wgt::Dx12Compiler::Fxc, + }; + let instance = unsafe { A::Instance::init(&instance_desc)? }; + let mut surface = unsafe { + instance + .create_surface(window.raw_display_handle(), window.raw_window_handle()) + .unwrap() + }; + + let (adapter, features) = unsafe { + let mut adapters = instance.enumerate_adapters(); + if adapters.is_empty() { + return Err(hal::InstanceError); + } + let exposed = adapters.swap_remove(0); + dbg!(exposed.features); + (exposed.adapter, exposed.features) + }; + let surface_caps = + unsafe { adapter.surface_capabilities(&surface) }.ok_or(hal::InstanceError)?; + log::info!("Surface caps: {:#?}", surface_caps); + + let hal::OpenDevice { device, mut queue } = + unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; + + let window_size: (u32, u32) = window.inner_size().into(); + dbg!(&surface_caps.formats); + let surface_format = if surface_caps + .formats + .contains(&wgt::TextureFormat::Rgba8Snorm) + { + wgt::TextureFormat::Rgba8Unorm + } else { + *surface_caps.formats.first().unwrap() + }; + let surface_config = hal::SurfaceConfiguration { + swap_chain_size: DESIRED_FRAMES + .max(*surface_caps.swap_chain_sizes.start()) + .min(*surface_caps.swap_chain_sizes.end()), + present_mode: wgt::PresentMode::Fifo, + composite_alpha_mode: wgt::CompositeAlphaMode::Opaque, + format: surface_format, + extent: wgt::Extent3d { + width: window_size.0, + height: window_size.1, + depth_or_array_layers: 1, + }, + usage: hal::TextureUses::COLOR_TARGET | hal::TextureUses::COPY_DST, + view_formats: vec![surface_format], + }; + unsafe { + surface.configure(&device, &surface_config).unwrap(); + }; + + #[allow(dead_code)] + struct Uniforms { + view_inverse: glam::Mat4, + proj_inverse: glam::Mat4, + } + + let bgl_desc = hal::BindGroupLayoutDescriptor { + label: None, + flags: hal::BindGroupLayoutFlags::empty(), + entries: &[ + wgt::BindGroupLayoutEntry { + binding: 0, + visibility: wgt::ShaderStages::RAYGEN | wgt::ShaderStages::CLOSEST_HIT, + ty: wgt::BindingType::Buffer { + ty: wgt::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: wgt::BufferSize::new(mem::size_of::() as _), + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 1, + visibility: wgt::ShaderStages::RAYGEN | wgt::ShaderStages::CLOSEST_HIT, + ty: wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::WriteOnly, + format: wgt::TextureFormat::Rgba8Unorm, + view_dimension: wgt::TextureViewDimension::D2, + }, + count: None, + }, + wgt::BindGroupLayoutEntry { + binding: 2, + visibility: wgt::ShaderStages::RAYGEN | wgt::ShaderStages::CLOSEST_HIT, + ty: wgt::BindingType::AccelerationStructure, + count: None, + }, + ], + }; + + let bgl = unsafe { device.create_bind_group_layout(&bgl_desc).unwrap() }; + + pub fn make_spirv_raw(data: &[u8]) -> Cow<[u32]> { + const MAGIC_NUMBER: u32 = 0x0723_0203; + assert_eq!( + data.len() % size_of::(), + 0, + "data size is not a multiple of 4" + ); + + //If the data happens to be aligned, directly use the byte array, + // otherwise copy the byte array in an owned vector and use that instead. + let words = if data.as_ptr().align_offset(align_of::()) == 0 { + let (pre, words, post) = unsafe { data.align_to::() }; + debug_assert!(pre.is_empty()); + debug_assert!(post.is_empty()); + Cow::from(words) + } else { + let mut words = vec![0u32; data.len() / size_of::()]; + unsafe { + copy_nonoverlapping(data.as_ptr(), words.as_mut_ptr() as *mut u8, data.len()); + } + Cow::from(words) + }; + + assert_eq!( + words[0], MAGIC_NUMBER, + "wrong magic word {:x}. Make sure you are using a binary SPIRV file.", + words[0] + ); + + words + } + + let gen_shader_module = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.rgen.spv"))), + ) + .unwrap() + }; + + let miss_shader_module = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.rmiss.spv"))), + ) + .unwrap() + }; + + let hit_shader_module = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.rchit.spv"))), + ) + .unwrap() + }; + + let pipeline_layout_desc = hal::PipelineLayoutDescriptor { + label: None, + flags: hal::PipelineLayoutFlags::empty(), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }; + let pipeline_layout = unsafe { + device + .create_pipeline_layout(&pipeline_layout_desc) + .unwrap() + }; + + let gen_group = RayTracingGeneralShaderGroup { + stage: hal::ProgrammableStage { + module: &gen_shader_module, + entry_point: "main", + }, + }; + + let miss_group = RayTracingGeneralShaderGroup { + stage: hal::ProgrammableStage { + module: &miss_shader_module, + entry_point: "main", + }, + }; + + let hit_group = RayTracingHitShaderGroup { + closest_hit: Some(hal::ProgrammableStage { + module: &hit_shader_module, + entry_point: "main", + }), + any_hit: None, + intersection: None, + hit_group_type: RayTracingHitGroupType::Triangles, + }; + + let pipeline = unsafe { + device.create_ray_tracing_pipeline(&hal::RayTracingPipelineDescriptor { + label: Some("pipeline"), + layout: &pipeline_layout, + max_recursion_depth: 1, + gen_groups: &[gen_group], + miss_groups: &[miss_group], + call_groups: &[], + hit_groups: &[hit_group], + }) + } + .unwrap(); + + //SBT + + let (sbt_buffer, gen_sbt_ref, miss_sbt_ref, hit_sbt_ref) = { + let col_a = glam::vec4(1.0, 1.0, 1.0, 0.5); + let col_b = glam::vec4(0.0, 0.0, 0.0, 0.0); + + let mut col_a_mem = [0u8; 16]; + let mut col_b_mem = [0u8; 16]; + + unsafe { + ptr::copy_nonoverlapping( + &col_a as *const glam::Vec4 as *const u8, + col_a_mem.as_mut_ptr(), + 16, + ); + ptr::copy_nonoverlapping( + &col_b as *const glam::Vec4 as *const u8, + col_b_mem.as_mut_ptr(), + 16, + ); + } + + let gen_records: [&[u8]; 1] = [&[]]; + let miss_records: [&[u8]; 1] = [&[]]; + let hit_records: [&[u8]; 2] = [&col_a_mem, &col_b_mem]; + + let gen_handles = pipeline.gen_handles(); + let miss_handles = pipeline.miss_handles(); + let hit_handles = pipeline.hit_handles().repeat(2); + + let gen_sbt_data = device.assemble_sbt_data(&gen_handles, &gen_records); + let miss_sbt_data = device.assemble_sbt_data(&miss_handles, &miss_records); + let hit_sbt_data = device.assemble_sbt_data(&hit_handles, &hit_records); + + let combined_iterator = gen_sbt_data + .data + .chain(miss_sbt_data.data) + .chain(hit_sbt_data.data); + + let sbt_size = + gen_sbt_data.padded_size + miss_sbt_data.padded_size + hit_sbt_data.padded_size; + + let sbt_buffer = unsafe { + let sbt_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("sbt buffer"), + size: sbt_size, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::SHADER_BINDING_TABLE, + memory_flags: hal::MemoryFlags::TRANSIENT + | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device.map_buffer(&sbt_buffer, 0..sbt_size).unwrap(); + let slice = ptr::slice_from_raw_parts_mut(mapping.ptr.as_ptr(), sbt_size as usize); + + for (i, src) in combined_iterator.enumerate() { + (*slice)[i] = src; + } + device.unmap_buffer(&sbt_buffer).unwrap(); + assert!(mapping.is_coherent); + + sbt_buffer + }; + + let sbt_address = unsafe { device.get_buffer_device_address(&sbt_buffer) }; + + let mut offset = 0; + + let gen_sbt_ref = ShaderBindingTableReference { + address: sbt_address + offset, + stride: gen_sbt_data.stride, + size: gen_sbt_data.size, + }; + offset += gen_sbt_data.padded_size; + + let miss_sbt_ref = ShaderBindingTableReference { + address: sbt_address + offset, + stride: miss_sbt_data.stride, + size: miss_sbt_data.size, + }; + offset += miss_sbt_data.padded_size; + + let hit_sbt_ref = ShaderBindingTableReference { + address: sbt_address + offset, + stride: hit_sbt_data.stride as u64, + size: hit_sbt_data.size, + }; + (sbt_buffer, gen_sbt_ref, miss_sbt_ref, hit_sbt_ref) + }; + + // t[0] = &[1u8; 8]; + + // std::vector table_data; + // for (size_t i = 0; i < count; i++) { + // group_strides[i] = align_up < VkDeviceSize > (handle_size + max_record_sizes[i], properties.shaderGroupHandleAlignment); + // sizes[i] = align_up(group_counts[i] * group_strides[i], properties.shaderGroupBaseAlignment); + // size_t offset = table_data.size(); + // table_data.insert(table_data.end(), sizes[i], 0); + // record_offsets[i] = offset + handle_size; + // for (size_t c = 0; c < group_counts[i]; c++) { + // memcpy(&table_data[offset], &handles[cur_group * handle_size], handle_size); + // offset += group_strides[i]; + // cur_group++; + // } + // } + + let vertices: [f32; 9] = [1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 0.0]; + + let vertices_size_in_bytes = vertices.len() * 4; + + let indices: [u32; 3] = [0, 1, 2]; + + let indices_size_in_bytes = indices.len() * 4; + + let vertices_buffer = unsafe { + let vertices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("vertices buffer"), + size: vertices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&vertices_buffer, 0..vertices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + vertices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + vertices_size_in_bytes, + ); + device.unmap_buffer(&vertices_buffer).unwrap(); + assert!(mapping.is_coherent); + + vertices_buffer + }; + + let indices_buffer = unsafe { + let indices_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("indices buffer"), + size: indices_size_in_bytes as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&indices_buffer, 0..indices_size_in_bytes as u64) + .unwrap(); + ptr::copy_nonoverlapping( + indices.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + indices_size_in_bytes, + ); + device.unmap_buffer(&indices_buffer).unwrap(); + assert!(mapping.is_coherent); + + indices_buffer + }; + + let blas_triangles = vec![hal::AccelerationStructureTriangles { + vertex_buffer: Some(&vertices_buffer), + first_vertex: 0, + vertex_format: wgt::VertexFormat::Float32x3, + vertex_count: vertices.len() as u32, + vertex_stride: 3 * 4, + indices: Some(hal::AccelerationStructureTriangleIndices { + buffer: Some(&indices_buffer), + format: wgt::IndexFormat::Uint32, + offset: 0, + count: indices.len() as u32, + }), + transform: None, + flags: hal::AccelerationStructureGeometryFlags::OPAQUE, + }]; + let blas_entries = hal::AccelerationStructureEntries::Triangles(&blas_triangles); + + let mut tlas_entries = + hal::AccelerationStructureEntries::Instances(hal::AccelerationStructureInstances { + buffer: None, + count: 3, + offset: 0, + }); + + let blas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &blas_entries, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + }, + ) + }; + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + let tlas_sizes = unsafe { + device.get_acceleration_structure_build_sizes( + &hal::GetAccelerationStructureBuildSizesDescriptor { + entries: &tlas_entries, + flags: tlas_flags, + }, + ) + }; + + let blas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("blas"), + size: blas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::BottomLevel, + }) + } + .unwrap(); + + let tlas = unsafe { + device.create_acceleration_structure(&hal::AccelerationStructureDescriptor { + label: Some("tlas"), + size: tlas_sizes.acceleration_structure_size, + format: hal::AccelerationStructureFormat::TopLevel, + }) + } + .unwrap(); + + let uniforms = { + let view = Mat4::look_at_rh(Vec3::new(0.0, 0.0, 2.5), Vec3::ZERO, Vec3::Y); + let proj = Mat4::perspective_rh(59.0_f32.to_radians(), 1.0, 0.001, 1000.0); + + Uniforms { + view_inverse: view.inverse(), + proj_inverse: proj.inverse(), + } + }; + + let uniforms_size = std::mem::size_of::(); + + let uniform_buffer = unsafe { + let uniform_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("uniform buffer"), + size: uniforms_size as u64, + usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM, + memory_flags: hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&uniform_buffer, 0..uniforms_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + &uniforms as *const Uniforms as *const u8, + mapping.ptr.as_ptr(), + uniforms_size, + ); + device.unmap_buffer(&uniform_buffer).unwrap(); + assert!(mapping.is_coherent); + uniform_buffer + }; + + let texture_desc = hal::TextureDescriptor { + label: None, + size: wgt::Extent3d { + width: 512, + height: 512, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgt::TextureDimension::D2, + format: wgt::TextureFormat::Rgba8Unorm, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + memory_flags: hal::MemoryFlags::empty(), + view_formats: vec![wgt::TextureFormat::Rgba8Unorm], + }; + let texture = unsafe { device.create_texture(&texture_desc).unwrap() }; + + let view_desc = hal::TextureViewDescriptor { + label: None, + format: texture_desc.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::STORAGE_READ_WRITE | hal::TextureUses::COPY_SRC, + range: wgt::ImageSubresourceRange::default(), + }; + let texture_view = unsafe { device.create_texture_view(&texture, &view_desc).unwrap() }; + + let bind_group = { + let buffer_binding = hal::BufferBinding { + buffer: &uniform_buffer, + offset: 0, + size: None, + }; + let texture_binding = hal::TextureBinding { + view: &texture_view, + usage: hal::TextureUses::STORAGE_READ_WRITE, + }; + let group_desc = hal::BindGroupDescriptor { + label: Some("bind group"), + layout: &bgl, + buffers: &[buffer_binding], + samplers: &[], + textures: &[texture_binding], + acceleration_structures: &[&tlas], + entries: &[ + hal::BindGroupEntry { + binding: 0, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 1, + resource_index: 0, + count: 1, + }, + hal::BindGroupEntry { + binding: 2, + resource_index: 0, + count: 1, + }, + ], + }; + unsafe { device.create_bind_group(&group_desc).unwrap() } + }; + + let scratch_buffer = unsafe { + device + .create_buffer(&hal::BufferDescriptor { + label: Some("scratch buffer"), + size: blas_sizes + .build_scratch_size + .max(tlas_sizes.build_scratch_size), + usage: hal::BufferUses::ACCELERATION_STRUCTURE_SCRATCH, + memory_flags: hal::MemoryFlags::empty(), + }) + .unwrap() + }; + + let instances = [ + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 0.0, + y: 0.0, + z: 0.0, + }), + 0, + 0xff, + 1, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: -1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + AccelerationStructureInstance::new( + &Affine3A::from_translation(Vec3 { + x: 1.0, + y: -1.0, + z: -2.0, + }), + 0, + 0xff, + 0, + 0, + unsafe { device.get_acceleration_structure_device_address(&blas) }, + ), + ]; + + let instances_buffer_size = + instances.len() * std::mem::size_of::(); + + let instances_buffer = unsafe { + let instances_buffer = device + .create_buffer(&hal::BufferDescriptor { + label: Some("instances_buffer"), + size: instances_buffer_size as u64, + usage: hal::BufferUses::MAP_WRITE + | hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + memory_flags: hal::MemoryFlags::TRANSIENT | hal::MemoryFlags::PREFER_COHERENT, + }) + .unwrap(); + + let mapping = device + .map_buffer(&instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + device.unmap_buffer(&instances_buffer).unwrap(); + assert!(mapping.is_coherent); + + instances_buffer + }; + + if let hal::AccelerationStructureEntries::Instances(ref mut i) = tlas_entries { + i.buffer = Some(&instances_buffer); + assert!( + instances.len() <= i.count as usize, + "Tlas allocation to small" + ); + } + + let cmd_encoder_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &queue, + }; + let mut cmd_encoder = unsafe { device.create_command_encoder(&cmd_encoder_desc).unwrap() }; + + unsafe { cmd_encoder.begin_encoding(Some("init")).unwrap() }; + + unsafe { + cmd_encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE, + destination_acceleration_structure: &blas, + scratch_buffer: &scratch_buffer, + entries: &blas_entries, + source_acceleration_structure: None, + }, + ]); + + let as_barrier = hal::BufferBarrier { + buffer: &scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + cmd_encoder.transition_buffers(iter::once(as_barrier)); + + cmd_encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Build, + flags: tlas_flags, + destination_acceleration_structure: &tlas, + scratch_buffer: &scratch_buffer, + entries: &tlas_entries, + source_acceleration_structure: None, + }, + ]); + + let texture_barrier = hal::TextureBarrier { + texture: &texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::STORAGE_READ_WRITE, + }; + + cmd_encoder.transition_textures(iter::once(texture_barrier)); + } + + let init_fence_value = 1; + let fence = unsafe { + let mut fence = device.create_fence().unwrap(); + let init_cmd = cmd_encoder.end_encoding().unwrap(); + queue + .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .unwrap(); + device.wait(&fence, init_fence_value, !0).unwrap(); + cmd_encoder.reset_all(iter::once(init_cmd)); + fence + }; + + Ok(Self { + instance, + adapter, + surface, + surface_format: surface_config.format, + device, + queue, + pipeline, + contexts: vec![ExecutionContext { + encoder: cmd_encoder, + fence, + fence_value: init_fence_value + 1, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + }], + context_index: 0, + extent: [window_size.0, window_size.1], + start: Instant::now(), + pipeline_layout, + bind_group, + texture, + instances, + instances_buffer, + blas, + tlas, + scratch_buffer, + sbt_buffer, + time: 0.0, + indices_buffer, + vertices_buffer, + uniform_buffer, + texture_view, + bgl, + gen_shader_module, + miss_shader_module, + hit_shader_module, + gen_sbt_ref, + miss_sbt_ref, + hit_sbt_ref, + }) + } + + fn update(&mut self, _event: winit::event::WindowEvent) {} + + fn render(&mut self) { + let ctx = &mut self.contexts[self.context_index]; + + let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COPY_DST, + }; + + let instances_buffer_size = + self.instances.len() * std::mem::size_of::(); + + let tlas_flags = hal::AccelerationStructureBuildFlags::PREFER_FAST_TRACE + | hal::AccelerationStructureBuildFlags::ALLOW_UPDATE; + + self.time += 1.0 / 60.0; + + self.instances[0].set_transform(&Affine3A::from_rotation_y(self.time)); + + unsafe { + let mapping = self + .device + .map_buffer(&self.instances_buffer, 0..instances_buffer_size as u64) + .unwrap(); + ptr::copy_nonoverlapping( + self.instances.as_ptr() as *const u8, + mapping.ptr.as_ptr(), + instances_buffer_size, + ); + self.device.unmap_buffer(&self.instances_buffer).unwrap(); + assert!(mapping.is_coherent); + } + + unsafe { + ctx.encoder.begin_encoding(Some("frame")).unwrap(); + + let instances = hal::AccelerationStructureInstances { + buffer: Some(&self.instances_buffer), + count: self.instances.len() as u32, + offset: 0, + }; + ctx.encoder.build_acceleration_structures(&[ + &hal::BuildAccelerationStructureDescriptor { + mode: hal::AccelerationStructureBuildMode::Update, + flags: tlas_flags, + destination_acceleration_structure: &self.tlas, + scratch_buffer: &self.scratch_buffer, + entries: &hal::AccelerationStructureEntries::Instances(instances), + source_acceleration_structure: Some(&self.tlas), + }, + ]); + + let as_barrier = hal::BufferBarrier { + buffer: &self.scratch_buffer, + usage: hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT + ..hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT, + }; + ctx.encoder.transition_buffers(iter::once(as_barrier)); + + ctx.encoder.transition_textures(iter::once(target_barrier0)); + } + + let surface_view_desc = hal::TextureViewDescriptor { + label: None, + format: self.surface_format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COPY_DST, + range: wgt::ImageSubresourceRange::default(), + }; + let surface_tex_view = unsafe { + self.device + .create_texture_view(surface_tex.borrow(), &surface_view_desc) + .unwrap() + }; + unsafe { + ctx.encoder + .begin_ray_tracing_pass(&hal::RayTracingPassDescriptor { label: None }); + ctx.encoder.set_ray_tracing_pipeline(&self.pipeline); + ctx.encoder + .set_bind_group(&self.pipeline_layout, 0, &self.bind_group, &[]); + + ctx.encoder.trace_rays( + &self.gen_sbt_ref, + &self.miss_sbt_ref, + &ShaderBindingTableReference::default(), + &self.hit_sbt_ref, + [512, 512, 1], + ) + } + + ctx.frames_recorded += 1; + let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_DST..hal::TextureUses::PRESENT, + }; + let target_barrier2 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::STORAGE_READ_WRITE..hal::TextureUses::COPY_SRC, + }; + let target_barrier3 = hal::TextureBarrier { + texture: &self.texture, + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COPY_SRC..hal::TextureUses::STORAGE_READ_WRITE, + }; + unsafe { + ctx.encoder.end_ray_tracing_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier2)); + ctx.encoder.copy_texture_to_texture( + &self.texture, + hal::TextureUses::COPY_SRC, + &surface_tex.borrow(), + std::iter::once(hal::TextureCopy { + src_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + dst_base: hal::TextureCopyBase { + mip_level: 0, + array_layer: 0, + origin: wgt::Origin3d::ZERO, + aspect: hal::FormatAspects::COLOR, + }, + size: hal::CopyExtent { + width: 512, + height: 512, + depth: 1, + }, + }), + ); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + ctx.encoder.transition_textures(iter::once(target_barrier3)); + } + + unsafe { + let cmd_buf = ctx.encoder.end_encoding().unwrap(); + let fence_param = if do_fence { + Some((&mut ctx.fence, ctx.fence_value)) + } else { + None + }; + self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue.present(&mut self.surface, surface_tex).unwrap(); + ctx.used_cmd_bufs.push(cmd_buf); + ctx.used_views.push(surface_tex_view); + }; + + if do_fence { + log::info!("Context switch from {}", self.context_index); + let old_fence_value = ctx.fence_value; + if self.contexts.len() == 1 { + let hal_desc = hal::CommandEncoderDescriptor { + label: None, + queue: &self.queue, + }; + self.contexts.push(unsafe { + ExecutionContext { + encoder: self.device.create_command_encoder(&hal_desc).unwrap(), + fence: self.device.create_fence().unwrap(), + fence_value: 0, + used_views: Vec::new(), + used_cmd_bufs: Vec::new(), + frames_recorded: 0, + } + }); + } + self.context_index = (self.context_index + 1) % self.contexts.len(); + let next = &mut self.contexts[self.context_index]; + unsafe { + next.wait_and_clear(&self.device); + } + next.fence_value = old_fence_value + 1; + } + } + + fn exit(mut self) { + unsafe { + { + let ctx = &mut self.contexts[self.context_index]; + self.queue + .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .unwrap(); + } + + for mut ctx in self.contexts { + ctx.wait_and_clear(&self.device); + self.device.destroy_command_encoder(ctx.encoder); + self.device.destroy_fence(ctx.fence); + } + + self.device.destroy_bind_group(self.bind_group); + self.device.destroy_buffer(self.scratch_buffer); + self.device.destroy_buffer(self.sbt_buffer); + self.device.destroy_buffer(self.instances_buffer); + self.device.destroy_buffer(self.indices_buffer); + self.device.destroy_buffer(self.vertices_buffer); + self.device.destroy_buffer(self.uniform_buffer); + self.device.destroy_acceleration_structure(self.tlas); + self.device.destroy_acceleration_structure(self.blas); + self.device.destroy_texture_view(self.texture_view); + self.device.destroy_texture(self.texture); + self.device.destroy_ray_tracing_pipeline(self.pipeline); + self.device.destroy_pipeline_layout(self.pipeline_layout); + self.device.destroy_bind_group_layout(self.bgl); + self.device.destroy_shader_module(self.gen_shader_module); + self.device.destroy_shader_module(self.miss_shader_module); + self.device.destroy_shader_module(self.hit_shader_module); + + self.surface.unconfigure(&self.device); + self.device.exit(self.queue); + self.instance.destroy_surface(self.surface); + drop(self.adapter); + } + } +} + +#[cfg(all(feature = "metal"))] +type Api = hal::api::Metal; +#[cfg(all(feature = "vulkan", not(feature = "metal")))] +type Api = hal::api::Vulkan; +#[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] +type Api = hal::api::Gles; +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] +type Api = hal::api::Empty; + +fn main() { + env_logger::init(); + + let event_loop = winit::event_loop::EventLoop::new(); + let window = winit::window::WindowBuilder::new() + .with_title("hal-ray-tracing-pipeline-example") + .with_inner_size(winit::dpi::PhysicalSize { + width: 512, + height: 512, + }) + .with_resizable(false) + .build(&event_loop) + .unwrap(); + + let example_result = Example::::init(&window); + let mut example = Some(example_result.expect("Selected backend is not supported")); + + event_loop.run(move |event, _, control_flow| { + let _ = &window; // force ownership by the closure + *control_flow = winit::event_loop::ControlFlow::Poll; + match event { + winit::event::Event::RedrawEventsCleared => { + window.request_redraw(); + } + winit::event::Event::WindowEvent { event, .. } => match event { + winit::event::WindowEvent::KeyboardInput { + input: + winit::event::KeyboardInput { + virtual_keycode: Some(winit::event::VirtualKeyCode::Escape), + state: winit::event::ElementState::Pressed, + .. + }, + .. + } + | winit::event::WindowEvent::CloseRequested => { + *control_flow = winit::event_loop::ControlFlow::Exit; + } + _ => { + example.as_mut().unwrap().update(event); + } + }, + winit::event::Event::RedrawRequested(_) => { + let ex = example.as_mut().unwrap(); + + ex.render(); + } + winit::event::Event::LoopDestroyed => { + example.take().unwrap().exit(); + } + _ => {} + } + }); +} diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit new file mode 100644 index 0000000000..7d537bb471 --- /dev/null +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit @@ -0,0 +1,39 @@ +// glslc --target-spv=spv1.6 shader.rchit -o shader.rchit.spv +#version 460 core +#extension GL_EXT_ray_tracing : require +#extension GL_EXT_scalar_block_layout : require + +layout(set = 0, binding = 2) uniform accelerationStructureEXT tlas; + +hitAttributeEXT vec2 barycentric_coord; + +layout(shaderRecordEXT, scalar) buffer shader_record { + vec4 col; +}record; + +struct ray_payload { + vec3 pos; + vec3 dir; + vec3 col; +}; + +layout (location = 0) rayPayloadInEXT ray_payload payload; + +vec2 bary_lerp2(vec2 a, vec2 b, vec2 c, vec3 barycentrics) { + return a * barycentrics.x + b * barycentrics.y + c * barycentrics.z; +} + +vec3 bary_lerp3(vec3 a, vec3 b, vec3 c, vec3 barycentrics) { + return a * barycentrics.x + b * barycentrics.y + c * barycentrics.z; +} + +vec4 bary_lerp4(vec4 a, vec4 b, vec4 c, vec3 barycentrics) { + return a * barycentrics.x + b * barycentrics.y + c * barycentrics.z; +} + +void main() { + vec3 barycentrics = vec3(1.0f - barycentric_coord.x - barycentric_coord.y, barycentric_coord.x, barycentric_coord.y); + vec3 col = bary_lerp3(vec3(1,0,0),vec3(0,1,0),vec3(0,0,1), barycentrics); + + payload.col = mix(col,record.col.rgb,record.col.w); +} diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit.spv b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rchit.spv new file mode 100644 index 0000000000000000000000000000000000000000..35b2e0db7c739c9e54bb2f09ae09f6edde35a362 GIT binary patch literal 2344 zcmZvcS#wNL5XVn4OJuPoc49)=i6s&twjd@L5@Ppo>$)?Ta?MQL8LH+9<=v<9=o=_} z4KI}c@8-0+QckLKy8qq%KYjc3nOyIlK3D8<18%i@>Xz4f*X!P`cfyKpY4JgDbN+6S zgx#Q%gi*cm!VL=VQE0icR2gq~s^h07kCVB^~BvX-yn>`+Q<1 zE3TmW2NV-Z&dU)xE9~OP*XBOe%D*;u zW;Jl1md`iM%Wl66tF(EzCnF4%6Pu>+39j4@p{O^%sn@%RYWL z4lT3Cq~jmffA%rum8Yz!9ci7t- zFtp$r)^7af-y*C>V)m$XYQ(0FeG=#1U!=DE(!lV;cR>4By(RD&Z}XPG@E`IzImUNd zI_rp-sAXEh8Q_TFXS{7b!_Q`7@r~+EX0^j*!r7N)e^(v&s12K$0Haq|rI#c<5^BSq zlkjbPYgs#W^Y%t{8}r)1;J&W?hj_r?#%6B7jO(Us7V#DB_{hi4JuOP?j_-Is=Mn!v zJ9Y9s-$r`fBac7U&cfzgZ03_Sr9MzQ^|IjpDWssCSZ0NH4X4?>*{sdG&Dy-%tj)X4 z+PqurS)IaNAC_?c%-ngM$@d(QfPYjB_EE_`3Ap8rODB(6w7g@|!7Yy;a&o~f?}YS~ z=baP=ZZQ)+Cd++F82F-}!Aa@xlZ)?yx<{|T(do;y*Xaqf2~IzlM{1@I7bWE3qc_-R z)OmcDB>Zl}1qZpr5X(JWk{Hi5>0o>VvvW(rd@)nRl(pZMfG>Fd1&@KVm}}-?R^P~c z-IW*zvuAx?@;>XuJ=tI;L!SS>FgUpr>f=0OsgZeoDB*q0N%$sga(*dmTEcwN2YSHj zmz-7{b2*UF2Q&ImMqe$Rd0wmC`m|0uxzsSMeMCZBcK$Yc{F6U_lWg+9jc2oT&NZH` z(&5R@%r@EJ#1NLA(*8A~!)oxWrA&>IMP`DjW{GC&|Iho^^KKG^W-|7B$Pft%z&&fdliD5U?=SJKf_m#`XnCo}n zk2!HeZh7f$v^ak&>cm@7uM^kP)~eerzE7@|>T-3u+iOgpIr|PP_qqWe0NXx!{d&;3 zLCNcJ+QP6$I3}DF-WNU>)`S85^=tk^3kMauQjO;B-Tu#(;z!^swOXY*7uDPCsN36W zCQ&nOB~c^oB=ugpnXrCW!7pvqn;VUyLrEURoY0@s-t{c&G`h}}v_E;SHlu31(WY4Y_#YcuI|lWm^*v^&`o z<4Za}bx~|ZE4dSrsdqcxYG!dm>jph1+G+1?oj5xBwoVP}SyK@PeT{8B`rEQ)D=Xck zm!IV{vC?mnW>imFy`&>m>jvC;%^RD@!~Tx8>#&@DoA^p#8+0@B>&2bbJYLByN)OlI z3xf~$+iS+YCg|LCx*o6Unp{OXb>1!fraqaMP8}H~VP3zcQx8Uw8CyL15tjqUJo0 z+|J(P7&Y)mKkl_3u1qNt_AU$f7_gK3q-Ol7!??PFK7cc@-z(Dp)SiRK# zHCH5&3-OABvs>S+1_Qq2gw9)FoYVQu7oGe1OAPNAU;3SIGxbBK2l)n@PHp+F74_3Q z^z#Ls^)oy4#h_nSeBQ}Efj3f9RNe>Q7&yS-tl!C(J@qfr+h>$Hd7@uc_Vn^g0{u>p z6PtJOvH<^!eCXeaz?C%`6Ry)|h=t zGIshHJNl_0Q%Cxcy*b-^0%yvk)4OK^haLT_z?vVmH~KjNA9!MO4{+vxK{6aVb-pOj z<5uTOfrDpowszR-1IgHb3VU6aOgwn@;*R00_Op_yJ#U8nsQp!8Ht6JgO<>*6A?A(1 z@vfEvHz)acz|+8eC>>vHH^tHa6#<@M+`MEs&atG5a-RzVzSu0T^}!Puye$v5<&5EVe2#{#~IKn&KeXikN-p!2q# z2)r%Z^GnJ2^Db@Aret`l^}1v@hH)*)aMZ=PEO5m;X^Ue${@Cfgx^g2ga>4$$qFfOE E1u~QAvH$=8 literal 0 HcmV?d00001 diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss new file mode 100644 index 0000000000..4db4bdd261 --- /dev/null +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss @@ -0,0 +1,25 @@ +// glslc --target-spv=spv1.6 shader.rmiss -o shader.rmiss.spv +#version 460 core +#extension GL_EXT_ray_tracing : require + +const float PI = 3.14159265; +const float INV_PI = 1.0 / PI; +const float INV_2PI = 0.5 / PI; + +struct ray_payload { + vec3 pos; + vec3 dir; + vec3 col; +}; +layout (location = 0) rayPayloadInEXT ray_payload payload; + +vec2 dir_to_uv(vec3 direction) +{ + vec2 uv = vec2(atan(direction.z, direction.x), asin(-direction.y)); + uv = vec2(uv.x * INV_2PI, uv.y * INV_PI) + 0.5; + return uv; +} + +void main() { + payload.col = vec3(dir_to_uv(normalize(payload.dir)),1.); +} diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss.spv b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rmiss.spv new file mode 100644 index 0000000000000000000000000000000000000000..712dfcebd17be8dd782cba77e40222e6203f131c GIT binary patch literal 1496 zcmY+D*=|x%6oz-1T3cGIwXI`?)}h)2htAeQy&z~LDOzK^J2?R~C!s)?60WK-zKHMQ zoo`^`bLfS}|J$66xXEIz^{;vDeWH=INt2i`(`MEjnc*MuLOk8>4i^&7mtsNrGi&!wY*MuP_KFQpi%RxLAzGz2K^fGQ)7NXqf+lx#~tGG z7_T2!y`*X@h1YF*z5aUt-R6NYas6h7IH{X78&{tx@m_yu&q`(oEq_pN`c-9$88A+( z*%|4~uKXjNSu4%D^2O}@E*^c4SikL`iZ5s@_-5>ejFp!pCxoR%%krNTM`z1v=~Loh z5cYH8*oS%8qOll$_|J*si{GO3^A^{i&PJ%~+HE}k7bLry&c2AVj?*toW@dD1XGEWV zt>&D5%j#c7`nq(T{E9Zh{#^fXYTg(Bp?8DrAk=v`VD$XZ`u(ek{z$~k)I{GD(G&T$ z#EB)p>*=v%V?N}ao|pDL_55f@J&}hu!(q=qg~Y|_g*|d-ENHkJ6V+smJzwQHOa({_jpq}xQn|j8I142^>aru`LH?cu4KLu c=wsOTJ<0fDbA5xc5AJV6MEyUyFJ$iaFZY>uIsgCw literal 0 HcmV?d00001 diff --git a/wgpu-hal/src/dx11/device.rs b/wgpu-hal/src/dx11/device.rs index b912ffdfa1..a655e385de 100644 --- a/wgpu-hal/src/dx11/device.rs +++ b/wgpu-hal/src/dx11/device.rs @@ -236,6 +236,18 @@ impl crate::Device for super::Device { unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { unimplemented!() } + + fn assemble_sbt_data<'a>( + &self, + handles: &'a [&'a [u8]], + records: &'a [&'a [u8]], + ) -> crate::ShaderBindingTableData<'a> { + unimplemented!() + } + + unsafe fn get_buffer_device_address(&self, buffer: &super::Buffer) -> wgt::BufferAddress { + unimplemented!() + } } impl crate::Queue for super::Queue { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 7c0b303a40..0e4e065224 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1633,4 +1633,16 @@ impl crate::Device for super::Device { unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { todo!() } + + fn assemble_sbt_data<'a>( + &self, + _handles: &'a [&'a [u8]], + _records: &'a [&'a [u8]], + ) -> crate::ShaderBindingTableData<'a> { + todo!() + } + + unsafe fn get_buffer_device_address(&self, _buffer: &super::Buffer) -> wgt::BufferAddress { + todo!() + } } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 5d62f7d725..c6784fb4ef 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -266,6 +266,18 @@ impl crate::Device for Context { } unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: Resource) {} + + fn assemble_sbt_data<'a>( + &self, + _handles: &'a [&'a [u8]], + _records: &'a [&'a [u8]], + ) -> crate::ShaderBindingTableData<'a> { + unimplemented!() + } + + unsafe fn get_buffer_device_address(&self, buffer: &Resource) -> wgt::BufferAddress { + Default::default() + } } impl crate::CommandEncoder for Encoder { diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 217fe8e772..9f76e4683d 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1348,6 +1348,18 @@ impl crate::Device for super::Device { unsafe fn destroy_ray_tracing_pipeline(&self, _pipeline: super::RayTracingPipeline) { unimplemented!() } + + fn assemble_sbt_data<'a>( + &self, + _handles: &'a [&'a [u8]], + _records: &'a [&'a [u8]], + ) -> crate::ShaderBindingTableData<'a> { + unimplemented!() + } + + unsafe fn get_buffer_device_address(&self, _buffer: &super::Buffer) -> wgt::BufferAddress { + unimplemented!() + } } // SAFE: WASM doesn't have threads diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 7d62deb42b..b0953f71f2 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -360,6 +360,14 @@ pub trait Device: Send + Sync { desc: &RayTracingPipelineDescriptor, ) -> Result; unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: A::RayTracingPipeline); + + unsafe fn get_buffer_device_address(&self, buffer: &A::Buffer) -> wgt::BufferAddress; + + fn assemble_sbt_data<'a>( + &self, + handles: &'a [&'a [u8]], + records: &'a [&'a [u8]], + ) -> ShaderBindingTableData<'a>; } pub trait Queue: Send + Sync { @@ -767,11 +775,13 @@ bitflags::bitflags! { const ACCELERATION_STRUCTURE_SCRATCH = 1 << 10; const BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 11; const TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT = 1 << 12; + const SHADER_BINDING_TABLE = 1 << 13; /// The combination of states that a buffer may be in _at the same time_. const INCLUSIVE = Self::MAP_READ.bits | Self::COPY_SRC.bits | Self::INDEX.bits | Self::VERTEX.bits | Self::UNIFORM.bits | Self::STORAGE_READ.bits | Self::INDIRECT.bits | - Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits; + Self::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits | Self::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT.bits | + Self::SHADER_BINDING_TABLE.bits; /// The combination of states that a buffer must exclusively be in. const EXCLUSIVE = Self::MAP_WRITE.bits | Self::COPY_DST.bits | Self::STORAGE_READ_WRITE.bits | Self::ACCELERATION_STRUCTURE_SCRATCH.bits; @@ -1534,7 +1544,7 @@ pub trait RayTracingPipeline { fn hit_handles<'a>(&'a self) -> Vec<&'a [u8]>; } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct ShaderBindingTableReference { pub address: wgt::BufferAddress, pub stride: wgt::BufferAddress, @@ -1545,3 +1555,11 @@ pub struct ShaderBindingTableReference { pub struct RayTracingPassDescriptor<'a> { pub label: Label<'a>, } + +pub struct ShaderBindingTableData<'a> { + pub data: Box + 'a>, + pub stride: wgt::BufferAddress, + pub count: wgt::BufferAddress, + pub size: wgt::BufferAddress, + pub padded_size: wgt::BufferAddress, +} diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 01d171c9fa..18ff6a673c 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1197,9 +1197,19 @@ impl super::Instance { .map_or(false, |ext| { ext.shader_zero_initialize_workgroup_memory == vk::TRUE }), - ray_tracing_pipeline_shader_group_size: phd_capabilities - .ray_tracing_pipeline - .map(|x| x.shader_group_handle_size), + ray_tracing_device_properties: phd_capabilities.ray_tracing_pipeline.map(|x| { + super::RayTracingCapabilities { + shader_group_handle_size: x.shader_group_handle_size, + max_ray_recursion_depth: x.max_ray_recursion_depth, + max_shader_group_stride: x.max_shader_group_stride, + shader_group_base_alignment: x.shader_group_base_alignment, + shader_group_handle_capture_replay_size: x + .shader_group_handle_capture_replay_size, + max_ray_dispatch_invocation_count: x.max_ray_dispatch_invocation_count, + shader_group_handle_alignment: x.shader_group_handle_alignment, + max_ray_hit_attribute_size: x.max_ray_hit_attribute_size, + } + }), }; let capabilities = crate::Capabilities { limits: phd_capabilities.to_wgpu_limits(), @@ -1531,6 +1541,10 @@ impl super::Adapter { Ok(crate::OpenDevice { device, queue }) } + + pub fn ray_tracing_capabilities(&self) -> &Option { + &self.private_caps.ray_tracing_device_properties + } } impl crate::Adapter for super::Adapter { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 3a51e1038d..c734a0fbd7 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -1102,16 +1102,16 @@ impl crate::CommandEncoder for super::CommandEncoder { stride: miss_sbt.stride, size: miss_sbt.size, }, - &vk::StridedDeviceAddressRegionKHR { - device_address: callable_sbt.address, - stride: callable_sbt.stride, - size: callable_sbt.size, - }, &vk::StridedDeviceAddressRegionKHR { device_address: hit_sbt.address, stride: hit_sbt.stride, size: hit_sbt.size, }, + &vk::StridedDeviceAddressRegionKHR { + device_address: callable_sbt.address, + stride: callable_sbt.stride, + size: callable_sbt.size, + }, dimensions[0], dimensions[1], dimensions[2], diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index aa27723e40..2ad53475ce 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -518,6 +518,10 @@ pub fn map_buffer_usage(usage: crate::BufferUses) -> vk::BufferUsageFlags { flags |= vk::BufferUsageFlags::ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_KHR | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; } + if usage.contains(crate::BufferUses::SHADER_BINDING_TABLE) { + flags |= vk::BufferUsageFlags::SHADER_BINDING_TABLE_KHR + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS; + } flags } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 5048ace495..84afa62be6 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -869,21 +869,30 @@ impl crate::Device for super::Device { desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), ); - let alignment_mask = if desc + let mut alignment = req.alignment; + if desc .usage .contains(crate::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT) { - 16 - } else { - req.alignment - } - 1; - + alignment = std::cmp::max(alignment, 16) + } + if desc.usage.contains(crate::BufferUses::SHADER_BINDING_TABLE) { + alignment = std::cmp::max( + alignment, + self.shared + .private_caps + .ray_tracing_device_properties + .as_ref() + .expect("Feature `RAY_TRACING` not enabled") + .shader_group_base_alignment as u64, + ) + } let block = unsafe { self.mem_allocator.lock().alloc( &*self.shared, gpu_alloc::Request { size: req.size, - align_mask: alignment_mask, + align_mask: alignment - 1, usage: alloc_usage, memory_types: req.memory_type_bits & self.valid_ash_memory_types, }, @@ -2313,10 +2322,15 @@ impl crate::Device for super::Device { None => panic!("Feature `RAY_TRACING` not enabled"), }; - let get_create_info = |stage, stage_flags| -> Result<_, crate::PipelineError> { - Ok(self - .compile_stage_temp_ray_tracing(stage, stage_flags, &desc.layout.binding_arrays)? - .create_info) + let mut compiled_storage = Vec::::new(); + let mut get_create_info = |stage, stage_flags| -> Result<_, crate::PipelineError> { + let t = self.compile_stage_temp_ray_tracing( + stage, + stage_flags, + &desc.layout.binding_arrays, + )?; + compiled_storage.push(t); + Ok(compiled_storage.last().unwrap().create_info) }; let mut stages = Vec::::new(); @@ -2332,7 +2346,10 @@ impl crate::Device for super::Device { for entry in entries { let group = vk::RayTracingShaderGroupCreateInfoKHR::builder() .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL) - .general_shader(next_shader_index); + .general_shader(next_shader_index) + .any_hit_shader(vk::SHADER_UNUSED_KHR) + .closest_hit_shader(vk::SHADER_UNUSED_KHR) + .intersection_shader(vk::SHADER_UNUSED_KHR); next_shader_index += 1; stages.push(get_create_info(&entry.stage, stage_flags)?); @@ -2341,30 +2358,37 @@ impl crate::Device for super::Device { } for entry in desc.hit_groups { - let mut group = - vk::RayTracingShaderGroupCreateInfoKHR::builder().ty(match entry.hit_group_type { + let mut group = vk::RayTracingShaderGroupCreateInfoKHR::builder() + .ty(match entry.hit_group_type { crate::RayTracingHitGroupType::Triangles => { vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP } crate::RayTracingHitGroupType::Procedural => { vk::RayTracingShaderGroupTypeKHR::PROCEDURAL_HIT_GROUP } - }); + }) + .general_shader(vk::SHADER_UNUSED_KHR); if let Some(ref stage) = entry.closest_hit { stages.push(get_create_info(stage, wgt::ShaderStages::CLOSEST_HIT)?); group = group.closest_hit_shader(next_shader_index); next_shader_index += 1; + } else { + group = group.closest_hit_shader(vk::SHADER_UNUSED_KHR); } if let Some(ref stage) = entry.any_hit { stages.push(get_create_info(stage, wgt::ShaderStages::ANY_HIT)?); group = group.any_hit_shader(next_shader_index); next_shader_index += 1; + } else { + group = group.any_hit_shader(vk::SHADER_UNUSED_KHR); } if let Some(ref stage) = entry.intersection { stages.push(get_create_info(stage, wgt::ShaderStages::INTERSECTION)?); group = group.intersection_shader(next_shader_index); next_shader_index += 1; + } else { + group = group.intersection_shader(vk::SHADER_UNUSED_KHR); } groups.push(*group); @@ -2391,8 +2415,19 @@ impl crate::Device for super::Device { let handle_size = self .shared .private_caps - .ray_tracing_pipeline_shader_group_size - .unwrap() as usize; + .ray_tracing_device_properties + .as_ref() + .unwrap() + .shader_group_handle_size as usize; + + println!( + "{:?}", + self.shared + .private_caps + .ray_tracing_device_properties + .as_ref() + .unwrap() + ); let handle_data = unsafe { ray_tracing_functions @@ -2431,6 +2466,73 @@ impl crate::Device for super::Device { unsafe fn destroy_ray_tracing_pipeline(&self, pipeline: super::RayTracingPipeline) { unsafe { self.shared.raw.destroy_pipeline(pipeline.raw, None) }; } + + fn assemble_sbt_data<'a>( + &self, + handles: &'a [&'a [u8]], + records: &'a [&'a [u8]], + ) -> crate::ShaderBindingTableData<'a> { + assert!( + handles.len() == records.len(), + "the number of handles and record must match" + ); + + let cap = self + .shared + .private_caps + .ray_tracing_device_properties + .as_ref() + .expect("Feature `RAY_TRACING` not enabled"); + + let shader_group_handle_alignment = cap.shader_group_handle_alignment; + let shader_group_base_alignment = cap.shader_group_base_alignment; + let shader_group_handle_size = cap.shader_group_handle_size; + + let max_record_size = records.iter().map(|e| e.len()).max().unwrap_or(0) as u32; + + let stride = crate::auxil::align_to( + max_record_size + shader_group_handle_size, + shader_group_handle_alignment, + ) as u64; + let count = handles.len() as u64; + let size = stride * count; + let padded_size = crate::auxil::align_to(size as u32, shader_group_base_alignment) as u64; + let outer_padding = padded_size - size; + + let ret = std::iter::zip(handles, records) + .flat_map(move |(handle, record)| { + let inner_padding = stride - (handle.len() + record.len()) as u64; + handle + .iter() + .chain(record.iter()) + .copied() + .chain((0..inner_padding).map(|_| 0)) + }) + .chain((0..outer_padding).map(|_| 0)); + + crate::ShaderBindingTableData { + data: Box::new(ret), + stride, + count, + size, + padded_size, + } + } + + unsafe fn get_buffer_device_address(&self, buffer: &super::Buffer) -> wgt::BufferAddress { + let ray_tracing_functions = match self.shared.extension_fns.ray_tracing { + Some(ref functions) => functions, + None => panic!("Feature `RAY_TRACING` not enabled"), + }; + + unsafe { + ray_tracing_functions + .buffer_device_address + .get_buffer_device_address( + &vk::BufferDeviceAddressInfo::builder().buffer(buffer.raw), + ) + } + } } impl From for crate::DeviceError { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 42d2fd9fc8..af6dce2a7f 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -178,7 +178,19 @@ struct PrivateCapabilities { robust_buffer_access: bool, robust_image_access: bool, zero_initialize_workgroup_memory: bool, - ray_tracing_pipeline_shader_group_size: Option, + ray_tracing_device_properties: Option, +} + +#[derive(Clone, Debug)] +pub struct RayTracingCapabilities { + pub shader_group_handle_size: u32, + pub max_ray_recursion_depth: u32, + pub max_shader_group_stride: u32, + pub shader_group_base_alignment: u32, + pub shader_group_handle_capture_replay_size: u32, + pub max_ray_dispatch_invocation_count: u32, + pub shader_group_handle_alignment: u32, + pub max_ray_hit_attribute_size: u32, } bitflags::bitflags!( From f3fe194abcd5eba28b4d0fae07b5edb3addd08b6 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 21 Mar 2023 19:02:58 +0100 Subject: [PATCH 32/33] Api changes --- .../ray-tracing-pipeline-triangle/main.rs | 22 ++++++++----------- wgpu-hal/src/lib.rs | 20 ++++++++--------- wgpu-hal/src/vulkan/command.rs | 2 +- wgpu-hal/src/vulkan/device.rs | 15 +++++-------- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs index 2c48ee9624..9cd861e2f8 100644 --- a/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs @@ -2,8 +2,8 @@ extern crate wgpu_hal as hal; use hal::{ Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, - RayTracingGeneralShaderGroup, RayTracingHitGroupType, RayTracingHitShaderGroup, - RayTracingPipeline, ShaderBindingTableReference, Surface as _, + RayTracingHitShaderGroup, RayTracingPipeline, ShaderBindingTableReference, SkipHitType, + Surface as _, }; use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; @@ -407,18 +407,14 @@ impl Example { .unwrap() }; - let gen_group = RayTracingGeneralShaderGroup { - stage: hal::ProgrammableStage { - module: &gen_shader_module, - entry_point: "main", - }, + let gen_group = hal::ProgrammableStage { + module: &gen_shader_module, + entry_point: "main", }; - let miss_group = RayTracingGeneralShaderGroup { - stage: hal::ProgrammableStage { - module: &miss_shader_module, - entry_point: "main", - }, + let miss_group = hal::ProgrammableStage { + module: &miss_shader_module, + entry_point: "main", }; let hit_group = RayTracingHitShaderGroup { @@ -428,7 +424,6 @@ impl Example { }), any_hit: None, intersection: None, - hit_group_type: RayTracingHitGroupType::Triangles, }; let pipeline = unsafe { @@ -436,6 +431,7 @@ impl Example { label: Some("pipeline"), layout: &pipeline_layout, max_recursion_depth: 1, + skip_hit_type: SkipHitType::None, gen_groups: &[gen_group], miss_groups: &[miss_group], call_groups: &[], diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b0953f71f2..f1e53757a0 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1507,21 +1507,17 @@ bitflags!( } ); -#[derive(Clone, Debug)] -pub struct RayTracingGeneralShaderGroup<'a, A: Api> { - pub stage: ProgrammableStage<'a, A>, -} - #[derive(Clone, Debug)] pub struct RayTracingHitShaderGroup<'a, A: Api> { pub closest_hit: Option>, pub any_hit: Option>, pub intersection: Option>, - pub hit_group_type: RayTracingHitGroupType, } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum RayTracingHitGroupType { +#[derive(Copy, Clone, Debug, Default)] +pub enum SkipHitType { + #[default] + None, Triangles, Procedural, } @@ -1531,12 +1527,14 @@ pub struct RayTracingPipelineDescriptor<'a, A: Api> { pub label: Label<'a>, pub layout: &'a A::PipelineLayout, pub max_recursion_depth: u32, - pub gen_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], - pub miss_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], - pub call_groups: &'a [RayTracingGeneralShaderGroup<'a, A>], + pub skip_hit_type: SkipHitType, + pub gen_groups: &'a [ProgrammableStage<'a, A>], + pub miss_groups: &'a [ProgrammableStage<'a, A>], + pub call_groups: &'a [ProgrammableStage<'a, A>], pub hit_groups: &'a [RayTracingHitShaderGroup<'a, A>], } +/// unstable may change for dx12 implementation pub trait RayTracingPipeline { fn gen_handles<'a>(&'a self) -> Vec<&'a [u8]>; fn miss_handles<'a>(&'a self) -> Vec<&'a [u8]>; diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index c734a0fbd7..8bc339e267 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -1095,7 +1095,7 @@ impl crate::CommandEncoder for super::CommandEncoder { &vk::StridedDeviceAddressRegionKHR { device_address: ray_gen_sbt.address, stride: ray_gen_sbt.stride, - size: ray_gen_sbt.size, + size: ray_gen_sbt.stride, // intentional }, &vk::StridedDeviceAddressRegionKHR { device_address: miss_sbt.address, diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 84afa62be6..b11d67448f 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2333,6 +2333,7 @@ impl crate::Device for super::Device { Ok(compiled_storage.last().unwrap().create_info) }; + // Future work: don't add the same shader to stages multiple times let mut stages = Vec::::new(); let mut groups = Vec::::new(); @@ -2343,7 +2344,7 @@ impl crate::Device for super::Device { (desc.miss_groups, wgt::ShaderStages::MISS), (desc.call_groups, wgt::ShaderStages::CALLABLE), ] { - for entry in entries { + for programmable_stage in entries { let group = vk::RayTracingShaderGroupCreateInfoKHR::builder() .ty(vk::RayTracingShaderGroupTypeKHR::GENERAL) .general_shader(next_shader_index) @@ -2352,20 +2353,16 @@ impl crate::Device for super::Device { .intersection_shader(vk::SHADER_UNUSED_KHR); next_shader_index += 1; - stages.push(get_create_info(&entry.stage, stage_flags)?); + stages.push(get_create_info(programmable_stage, stage_flags)?); groups.push(*group); } } for entry in desc.hit_groups { let mut group = vk::RayTracingShaderGroupCreateInfoKHR::builder() - .ty(match entry.hit_group_type { - crate::RayTracingHitGroupType::Triangles => { - vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP - } - crate::RayTracingHitGroupType::Procedural => { - vk::RayTracingShaderGroupTypeKHR::PROCEDURAL_HIT_GROUP - } + .ty(match entry.intersection { + None => vk::RayTracingShaderGroupTypeKHR::TRIANGLES_HIT_GROUP, + Some(_) => vk::RayTracingShaderGroupTypeKHR::PROCEDURAL_HIT_GROUP, }) .general_shader(vk::SHADER_UNUSED_KHR); From fe51557cdeae2321105b1f1faddda04b0888fe27 Mon Sep 17 00:00:00 2001 From: Daniel Keitel Date: Tue, 21 Mar 2023 20:10:37 +0100 Subject: [PATCH 33/33] added callable shader to example --- .../ray-tracing-pipeline-triangle/main.rs | 46 +++++++++++++++--- .../shader.rcall | 13 +++++ .../shader.rcall.spv | Bin 0 -> 580 bytes .../shader.rchit | 24 +++++++-- .../shader.rchit.spv | Bin 2344 -> 2768 bytes 5 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall create mode 100644 wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall.spv diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs index 9cd861e2f8..84c175c802 100644 --- a/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/main.rs @@ -204,6 +204,7 @@ struct Example { bgl: A::BindGroupLayout, gen_shader_module: A::ShaderModule, miss_shader_module: A::ShaderModule, + call_shader_module: A::ShaderModule, hit_shader_module: A::ShaderModule, texture_view: A::TextureView, uniform_buffer: A::Buffer, @@ -219,6 +220,7 @@ struct Example { sbt_buffer: A::Buffer, gen_sbt_ref: ShaderBindingTableReference, miss_sbt_ref: ShaderBindingTableReference, + call_sbt_ref: ShaderBindingTableReference, hit_sbt_ref: ShaderBindingTableReference, time: f32, } @@ -383,6 +385,18 @@ impl Example { .unwrap() }; + let call_shader_module = unsafe { + device + .create_shader_module( + &hal::ShaderModuleDescriptor { + label: None, + runtime_checks: false, + }, + hal::ShaderInput::SpirV(&make_spirv_raw(include_bytes!("shader.rcall.spv"))), + ) + .unwrap() + }; + let hit_shader_module = unsafe { device .create_shader_module( @@ -417,6 +431,11 @@ impl Example { entry_point: "main", }; + let call_group = hal::ProgrammableStage { + module: &call_shader_module, + entry_point: "main", + }; + let hit_group = RayTracingHitShaderGroup { closest_hit: Some(hal::ProgrammableStage { module: &hit_shader_module, @@ -434,15 +453,14 @@ impl Example { skip_hit_type: SkipHitType::None, gen_groups: &[gen_group], miss_groups: &[miss_group], - call_groups: &[], + call_groups: &[call_group], hit_groups: &[hit_group], }) } .unwrap(); //SBT - - let (sbt_buffer, gen_sbt_ref, miss_sbt_ref, hit_sbt_ref) = { + let (sbt_buffer, gen_sbt_ref, miss_sbt_ref, call_sbt_ref, hit_sbt_ref) = { let col_a = glam::vec4(1.0, 1.0, 1.0, 0.5); let col_b = glam::vec4(0.0, 0.0, 0.0, 0.0); @@ -464,23 +482,27 @@ impl Example { let gen_records: [&[u8]; 1] = [&[]]; let miss_records: [&[u8]; 1] = [&[]]; + let call_records: [&[u8]; 1] = [&[]]; let hit_records: [&[u8]; 2] = [&col_a_mem, &col_b_mem]; let gen_handles = pipeline.gen_handles(); let miss_handles = pipeline.miss_handles(); + let call_handles = pipeline.call_handles(); let hit_handles = pipeline.hit_handles().repeat(2); let gen_sbt_data = device.assemble_sbt_data(&gen_handles, &gen_records); let miss_sbt_data = device.assemble_sbt_data(&miss_handles, &miss_records); + let call_sbt_data = device.assemble_sbt_data(&call_handles, &call_records); let hit_sbt_data = device.assemble_sbt_data(&hit_handles, &hit_records); let combined_iterator = gen_sbt_data .data .chain(miss_sbt_data.data) + .chain(call_sbt_data.data) .chain(hit_sbt_data.data); let sbt_size = - gen_sbt_data.padded_size + miss_sbt_data.padded_size + hit_sbt_data.padded_size; + gen_sbt_data.padded_size + miss_sbt_data.padded_size + call_sbt_data.padded_size + hit_sbt_data.padded_size; let sbt_buffer = unsafe { let sbt_buffer = device @@ -523,12 +545,19 @@ impl Example { }; offset += miss_sbt_data.padded_size; + let call_sbt_ref = ShaderBindingTableReference { + address: sbt_address + offset, + stride: call_sbt_data.stride, + size: call_sbt_data.size, + }; + offset += call_sbt_data.padded_size; + let hit_sbt_ref = ShaderBindingTableReference { address: sbt_address + offset, stride: hit_sbt_data.stride as u64, size: hit_sbt_data.size, }; - (sbt_buffer, gen_sbt_ref, miss_sbt_ref, hit_sbt_ref) + (sbt_buffer, gen_sbt_ref, miss_sbt_ref, call_sbt_ref, hit_sbt_ref) }; // t[0] = &[1u8; 8]; @@ -799,7 +828,7 @@ impl Example { y: -1.0, z: -2.0, }), - 0, + 1, 0xff, 0, 0, @@ -950,9 +979,11 @@ impl Example { bgl, gen_shader_module, miss_shader_module, + call_shader_module, hit_shader_module, gen_sbt_ref, miss_sbt_ref, + call_sbt_ref, hit_sbt_ref, }) } @@ -1045,7 +1076,7 @@ impl Example { ctx.encoder.trace_rays( &self.gen_sbt_ref, &self.miss_sbt_ref, - &ShaderBindingTableReference::default(), + &self.call_sbt_ref, &self.hit_sbt_ref, [512, 512, 1], ) @@ -1172,6 +1203,7 @@ impl Example { self.device.destroy_bind_group_layout(self.bgl); self.device.destroy_shader_module(self.gen_shader_module); self.device.destroy_shader_module(self.miss_shader_module); + self.device.destroy_shader_module(self.call_shader_module); self.device.destroy_shader_module(self.hit_shader_module); self.surface.unconfigure(&self.device); diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall new file mode 100644 index 0000000000..eba02e7b4a --- /dev/null +++ b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall @@ -0,0 +1,13 @@ +// glslc --target-spv=spv1.6 shader.rcall -o shader.rcall.spv +#version 460 core +#extension GL_EXT_ray_tracing : require + +struct call_payload_struct { + vec3 col; +}; + +layout(location = 1) callableDataInEXT call_payload_struct call_payload; + +void main() { + call_payload.col = call_payload.col.grb; +} \ No newline at end of file diff --git a/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall.spv b/wgpu-hal/examples/ray-tracing-pipeline-triangle/shader.rcall.spv new file mode 100644 index 0000000000000000000000000000000000000000..967777c1ca598c84a6e744166258923a5a4c625e GIT binary patch literal 580 zcmY*XO-sW-5S=znqp5AJpEs#^6pDu`f+%<>5e!7Eh_@wdf&|vIq^XdT|IU*a|BV;H z_tGeF!tCt4dGmI%iPb!Eq}>o#y0RrJ8%R^$1_G=t)4NA?dvmX{uuypx>Lh)Z4)6x3 zoKwa5z-E0d(LijpkWdGZ>wf33z#Qf-z|onV7eKi?Jy9U?k3M$N;*%)$uL*;412 zgla3(Gb1(-&RgfQx{c{M|6%ziQttP#Zvz{`_o3q;GUsg^V5KgxA!_T0pSTxd7gf%4 zi0vZplsqZ#_qfV8fYI3BMI@JcXPA<+@EvJ9?+b{Hc#W0Z9-R8rxCE1ZeK_#|Q!wAG aFyitf?(&hs3TXJ_6w- zj62f@u{&4DWB4aA@%)CFM*WhL`|o`B-tWwOoE)CdXWUuea*mz0yuCfhuQ=leTNlPQ z%WQ+YS2vj#&;Z9z>C|*XfO7M{&K`9mTHVlSQ(| zBmQa@FIPEScc-F?Dx4Fh7eBSzsCBw^R`sl@TG$GDaEgjqeO&lbFk2KOy*~>7Upz$m6r}Y4&q(=%ic&zxFdQ+npF%l{ljhdB};1yFMJd@1{ zo-my#K#pdcvd3zMQZysxhayLPEAip8=!9%l$OzPTWZ}|(#%_4lki&Ltd{l64H!gVg zMwUa&AmZlWYWCQPigFhPWLyyLsyp!m;f||#vds9YTw0Tj^ewac{AN!Q0!uL1mwlS#;ULBf0fABu!n{oKt9B~Bvz_F~ za~y$>F(;XJWCJ1XQPuo-7z(~hifbY=yq4t1gEPs1x_p{m7g&&ia9NTM@LJ%PC<~}b V%$?u5bxXJ@9{tY>HaPN^oIm{Cf%5%DZm zo*#H^aV4fT$kGYAbFRSKZ{eI<;H|(H2mTX!O}zt0ar9+nydqS-$aRVD@U<0$ZP8T? z@OR(q(o=*I`Lh)83Sw)GlM0wZ{}vN?nvZL?r}q9&h(nC93G*@dG%fm+oZ(z^I-0~b z`fQnjT0nGx7_e@}QDcV-#2GqpEp{O13DkDLPw{;P@X>#b$Q