From 27d1812a56784724085056104ba9791c9815de13 Mon Sep 17 00:00:00 2001 From: Felix Kaaman Date: Tue, 12 Jun 2018 11:28:29 +0300 Subject: [PATCH 1/4] [dx11] add new memory/buffer management code Makes `Memory` keep track of ranges that buffers are bound to and allows for multiple buffers to be bound to the same `memory` now. This gets us around 50% green in `dEQP-VK.api.buffer` --- src/backend/dx11/src/device.rs | 221 ++++++++++++++++--------------- src/backend/dx11/src/internal.rs | 4 +- src/backend/dx11/src/lib.rs | 92 ++++++------- 3 files changed, 161 insertions(+), 156 deletions(-) diff --git a/src/backend/dx11/src/device.rs b/src/backend/dx11/src/device.rs index f547f030c7e..1d4e043387f 100644 --- a/src/backend/dx11/src/device.rs +++ b/src/backend/dx11/src/device.rs @@ -320,11 +320,40 @@ impl hal::Device for Device { mem_type: hal::MemoryTypeId, size: u64, ) -> Result { - // TODO: + let host_buffer = if mem_type.0 == 1 { + let desc = d3d11::D3D11_BUFFER_DESC { + ByteWidth: size as _, + Usage: d3d11::D3D11_USAGE_STAGING, + BindFlags: 0, + CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_WRITE, + MiscFlags:0, + StructureByteStride: 0, + + }; + let mut host_buffer = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateBuffer( + &desc, + ptr::null_mut(), + &mut host_buffer as *mut *mut _ as *mut *mut _ + ) + }; + + if !winerror::SUCCEEDED(hr) { + return Err(device::OutOfMemory); + } + + Some(unsafe { ComPtr::from_raw(host_buffer) }) + } else { + None + }; + Ok(Memory { properties: self.memory_properties.memory_types[mem_type.0].properties, - buffer: RefCell::new(None), size, + flushes: RefCell::new(Vec::new()), + local_buffers: RefCell::new(Vec::new()), + host_buffer }) } @@ -492,23 +521,17 @@ impl hal::Device for Device { offset: u64, unbound_buffer: UnboundBuffer, ) -> Result { - // TODO: offset - assert_eq!(0, offset); - // TODO: structured buffers - // assert_eq!(0, unbound_buffer.bind & d3d11::D3D11_BIND_SHADER_RESOURCE); - // TODO: change memory to be capable of more than one buffer? - // assert_eq!(None, memory.buffer); - use memory::Properties; debug!("usage={:?}, props={:b}", unbound_buffer.usage, memory.properties); + let MiscFlags = if unbound_buffer.usage.contains(buffer::Usage::TRANSFER_SRC) { d3d11::D3D11_RESOURCE_MISC_BUFFER_STRUCTURED } else { 0 }; - let buffer = if memory.properties == Properties::DEVICE_LOCAL { + let raw = if memory.properties == Properties::DEVICE_LOCAL { // device local memory let desc = d3d11::D3D11_BUFFER_DESC { ByteWidth: unbound_buffer.size as _, @@ -516,7 +539,7 @@ impl hal::Device for Device { BindFlags: unbound_buffer.bind, CPUAccessFlags: 0, MiscFlags, - StructureByteStride: 0, + StructureByteStride: if unbound_buffer.usage.contains(buffer::Usage::TRANSFER_SRC) { 4 } else { 0 }, }; let mut buffer: *mut d3d11::ID3D11Buffer = ptr::null_mut(); @@ -530,18 +553,18 @@ impl hal::Device for Device { if !winerror::SUCCEEDED(hr) { return Err(device::BindError::WrongMemory); - } else { - InternalBuffer::Coherent(unsafe { ComPtr::from_raw(buffer) }) } - } else if memory.properties == (Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED) { - // coherent device local and cpu-visible memory + + unsafe { ComPtr::from_raw(buffer) } + } else if memory.properties == (Properties::CPU_VISIBLE) { let desc = d3d11::D3D11_BUFFER_DESC { ByteWidth: unbound_buffer.size as _, - Usage: d3d11::D3D11_USAGE_DYNAMIC, + // TODO: dynamic? + Usage: d3d11::D3D11_USAGE_DEFAULT, BindFlags: unbound_buffer.bind, - CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_WRITE, + CPUAccessFlags: 0, MiscFlags, - StructureByteStride: 0, + StructureByteStride: if unbound_buffer.usage.contains(buffer::Usage::TRANSFER_SRC) { 4 } else { 0 }, }; let mut buffer: *mut d3d11::ID3D11Buffer = ptr::null_mut(); @@ -555,69 +578,9 @@ impl hal::Device for Device { if !winerror::SUCCEEDED(hr) { return Err(device::BindError::WrongMemory); - } else { - InternalBuffer::Coherent(unsafe { ComPtr::from_raw(buffer) }) } - } else if memory.properties == (Properties::CPU_VISIBLE | Properties::CPU_CACHED) { - // non-coherent cpu-visible memory, need to create two buffers to - // allow gpu-read beyond copying - let staging = { - let desc = d3d11::D3D11_BUFFER_DESC { - ByteWidth: unbound_buffer.size as _, - Usage: d3d11::D3D11_USAGE_STAGING, - BindFlags: 0, - CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_READ | d3d11::D3D11_CPU_ACCESS_WRITE, - MiscFlags: 0, - StructureByteStride: 0, - }; - - let mut buffer: *mut d3d11::ID3D11Buffer = ptr::null_mut(); - let hr = unsafe { - self.raw.CreateBuffer( - &desc, - ptr::null_mut(), - &mut buffer as *mut *mut _ as *mut *mut _ - ) - }; - if !winerror::SUCCEEDED(hr) { - return Err(device::BindError::WrongMemory); - } else { - unsafe { ComPtr::from_raw(buffer) } - } - }; - - let device = { - let desc = d3d11::D3D11_BUFFER_DESC { - ByteWidth: unbound_buffer.size as _, - // TODO: dynamic? - Usage: d3d11::D3D11_USAGE_DEFAULT, - BindFlags: unbound_buffer.bind, - CPUAccessFlags: 0, - MiscFlags, - StructureByteStride: if unbound_buffer.usage.contains(buffer::Usage::TRANSFER_SRC) { 4 } else { 0 }, - }; - - let mut buffer: *mut d3d11::ID3D11Buffer = ptr::null_mut(); - let hr = unsafe { - self.raw.CreateBuffer( - &desc, - ptr::null_mut(), - &mut buffer as *mut *mut _ as *mut *mut _ - ) - }; - - if !winerror::SUCCEEDED(hr) { - return Err(device::BindError::WrongMemory); - } else { - unsafe { ComPtr::from_raw(buffer) } - } - }; - - InternalBuffer::NonCoherent { - device, - staging - } + unsafe { ComPtr::from_raw(buffer) } } else { unimplemented!() }; @@ -635,7 +598,7 @@ impl hal::Device for Device { let mut srv = ptr::null_mut(); let hr = unsafe { self.raw.CreateShaderResourceView( - buffer.device_local_buffer().as_raw() as *mut _, + raw.as_raw() as *mut _, &desc, &mut srv as *mut *mut _ as *mut *mut _ ) @@ -644,19 +607,23 @@ impl hal::Device for Device { if !winerror::SUCCEEDED(hr) { // TODO: better errors return Err(device::BindError::WrongMemory); - } else { - Some(unsafe { ComPtr::from_raw(srv) }) } + + Some(srv) } else { None }; - // TODO: - memory.buffer.replace(Some(buffer.clone())); + let buffer = InternalBuffer { + raw: raw.into_raw(), + srv + }; + let range = offset..unbound_buffer.size; + + memory.bind_buffer(range, buffer.clone()); Ok(Buffer { - buffer, - srv, + internal: buffer, size: unbound_buffer.size }) } @@ -982,7 +949,7 @@ impl hal::Device for Device { debug!("offset={}, target_binding={}", offset, target_binding); match *descriptor.borrow() { pso::Descriptor::Buffer(buffer, ref range) => { - write.set.cbv_handles.borrow_mut().push((target_binding as _, buffer.device_local_buffer())); + write.set.cbv_handles.borrow_mut().push((target_binding as _, buffer.internal.raw)); debug!("buffer={:#?}, range={:#?}", buffer, range); } pso::Descriptor::Image(image, _layout) => { @@ -1017,10 +984,9 @@ impl hal::Device for Device { where R: RangeArg, { - let buffer = match memory.buffer.borrow().clone().unwrap() { - InternalBuffer::Coherent(buf) => buf, - InternalBuffer::NonCoherent { device, staging } => staging - }; + assert_eq!(memory.host_buffer.is_some(), true); + + let buffer = memory.host_buffer.clone().unwrap(); let mut mapped = unsafe { mem::zeroed::() }; let hr = unsafe { self.context.Map( @@ -1034,7 +1000,7 @@ impl hal::Device for Device { }; if winerror::SUCCEEDED(hr) { - Ok(mapped.pData as _) + Ok(unsafe { mapped.pData.offset(*range.start().unwrap_or(&0) as isize) as _ }) } else { // TODO: better error Err(mapping::Error::InvalidAccess) @@ -1042,25 +1008,59 @@ impl hal::Device for Device { } fn unmap_memory(&self, memory: &Memory) { - let (buffer, device_buffer) = match memory.buffer.borrow().clone().unwrap() { - InternalBuffer::Coherent(buf) => (buf, None), - InternalBuffer::NonCoherent { device, staging } => (staging, Some(device)) - }; + assert_eq!(memory.host_buffer.is_some(), true); + let buffer = memory.host_buffer.clone().unwrap(); unsafe { self.context.Unmap( buffer.as_raw() as _, 0, ); + } + + fn intersection(a: &Range, b: &Range) -> Option> { + let min = if a.start < b.start { a } else { b }; + let max = if min == a { b } else { a }; + + if min.end < max.start { + None + } else { + let end = if min.end < max.end { min.end } else { max.end }; + Some(max.start..end) + } + } - // coherency! - if let Some(device_buffer) = device_buffer { - self.context.CopyResource( - device_buffer.as_raw() as _, - buffer.as_raw() as _, - ); + // go through every range we wrote to + for range in memory.flushes.borrow().iter() { + // and for every resource whose "virtual address" in our imaginary + // heap intersects; we copy from our host visible buffer to the + // corresponding dx11 resource + for &(ref buffer_range, ref buffer) in memory.local_buffers.borrow().iter() { + if let Some(range) = intersection(&range, &buffer_range) { + unsafe { + self.context.CopySubresourceRegion( + buffer.raw as _, + 0, + 0, + 0, + 0, + memory.host_buffer.clone().unwrap().as_raw() as _, + 0, + &d3d11::D3D11_BOX { + left: range.start as _, + top: 0, + front: 0, + right: buffer_range.end as _, + bottom: 1, + back: 1, + } + ); + } + } } } + + memory.flushes.borrow_mut().clear(); } fn flush_mapped_memory_ranges<'a, I, R>(&self, ranges: I) @@ -1069,7 +1069,13 @@ impl hal::Device for Device { I::Item: Borrow<(&'a Memory, R)>, R: RangeArg, { - // TODO: flush? + // we can't copy while mapped, so store the ranges for later + for range in ranges.into_iter() { + let &(memory, ref range) = range.borrow(); + let range = *range.start().unwrap_or(&0)..*range.end().unwrap_or(&memory.size); + + memory.flush(range); + } } fn invalidate_mapped_memory_ranges<'a, I, R>(&self, ranges: I) @@ -1109,7 +1115,14 @@ impl hal::Device for Device { } fn free_memory(&self, memory: Memory) { - unimplemented!() + for (_range, internal) in memory.local_buffers.borrow_mut().iter() { + unsafe { + (*internal.raw).Release(); + if let Some(srv) = internal.srv { + (*srv).Release(); + } + } + } } fn create_query_pool(&self, query_ty: query::QueryType, count: u32) -> QueryPool { diff --git a/src/backend/dx11/src/internal.rs b/src/backend/dx11/src/internal.rs index 19cb3cce261..0b97f34ad98 100644 --- a/src/backend/dx11/src/internal.rs +++ b/src/backend/dx11/src/internal.rs @@ -102,7 +102,7 @@ impl BufferImageCopy { pub fn copy_2d(&mut self, context: ComPtr, - buffer: ComPtr, + buffer: *mut d3d11::ID3D11ShaderResourceView, image: ComPtr, info: command::BufferImageCopy) { self.update_buffer(context.clone(), info.clone()); @@ -110,7 +110,7 @@ impl BufferImageCopy { unsafe { context.CSSetShader(self.cs.as_raw(), ptr::null_mut(), 0); context.CSSetConstantBuffers(0, 1, &self.copy_info.as_raw()); - context.CSSetShaderResources(0, 1, &buffer.as_raw()); + context.CSSetShaderResources(0, 1, &buffer); context.CSSetUnorderedAccessViews(0, 1, &image.as_raw(), ptr::null_mut()); context.Dispatch( diff --git a/src/backend/dx11/src/lib.rs b/src/backend/dx11/src/lib.rs index 3e77328c2c7..5efb469db80 100644 --- a/src/backend/dx11/src/lib.rs +++ b/src/backend/dx11/src/lib.rs @@ -44,6 +44,8 @@ mod shader; mod internal; mod device; + + #[derive(Clone, Debug)] pub(crate) struct ViewInfo { resource: *mut d3d11::ID3D11Resource, @@ -251,18 +253,6 @@ impl hal::Instance for Instance { (unsafe { ComPtr::::from_raw(device) }, feature_level) }; - // TODO: we should improve the way memory is managed. we should - // give access to DEFAULT, DYNAMIC and STAGING; - // - // roughly this should translate to: - // - // DEFAULT => DEVICE_LOCAL - // - // NOTE: DYNAMIC only offers cpu write, potentially add - // a HOST_WRITE_ONLY flag.. - // DYNAMIC => DEVICE_LOCAL | CPU_VISIBLE - // - // STAGING => CPU_VISIBLE | CPU_CACHED let memory_properties = hal::MemoryProperties { memory_types: vec![ hal::MemoryType { @@ -270,11 +260,7 @@ impl hal::Instance for Instance { heap_index: 0, }, hal::MemoryType { - properties: Properties::DEVICE_LOCAL, //| Properties::CPU_VISIBLE | Properties::CPU_CACHED, - heap_index: 0, - }, - hal::MemoryType { - properties: Properties::CPU_VISIBLE | Properties::CPU_CACHED, + properties: Properties::CPU_VISIBLE, heap_index: 1, }, ], @@ -864,7 +850,7 @@ impl hal::command::RawCommandBuffer for CommandBuffer { fn bind_index_buffer(&mut self, ibv: buffer::IndexBufferView) { unsafe { self.context.IASetIndexBuffer( - ibv.buffer.device_local_buffer().as_raw(), + ibv.buffer.internal.raw, conv::map_index_type(ibv.index_type), ibv.offset as u32 ); @@ -873,7 +859,7 @@ impl hal::command::RawCommandBuffer for CommandBuffer { fn bind_vertex_buffers(&mut self, first_binding: u32, vbs: pso::VertexBufferSet) { let (buffers, offsets): (Vec<*mut d3d11::ID3D11Buffer>, Vec) = vbs.0.iter() - .map(|(buf, offset)| (buf.device_local_buffer().as_raw(), *offset as u32)) + .map(|(buf, offset)| (buf.internal.raw, *offset as u32)) .unzip(); // TODO: strides @@ -976,8 +962,7 @@ impl hal::command::RawCommandBuffer for CommandBuffer { let set = set.borrow(); for (binding, cbv) in set.cbv_handles.borrow().iter() { - let cbv = cbv.as_raw(); - unsafe { self.context.VSSetConstantBuffers(*binding, 1, &cbv); } + unsafe { self.context.VSSetConstantBuffers(*binding, 1, cbv); } } for (binding, srv) in set.srv_handles.borrow().iter() { @@ -1047,10 +1032,12 @@ impl hal::command::RawCommandBuffer for CommandBuffer { T: IntoIterator, T::Item: Borrow, { + assert_eq!(buffer.internal.srv.is_some(), true); + for copy in regions.into_iter() { self.internal.copy_2d( self.context.clone(), - buffer.srv.clone().unwrap(), + buffer.internal.srv.unwrap(), image.uav.clone().unwrap(), copy.borrow().clone() ); @@ -1129,19 +1116,44 @@ impl hal::command::RawCommandBuffer for CommandBuffer { } } +// Since we dont have any heaps to work with directly, everytime we bind a +// buffer/image to memory we allocate a dx11 resource and assign it a range. +// +// `HOST_VISIBLE` memory gets a staging buffer which covers the entire memory +// range. This forces us to only expose non-coherent memory, as this +// abstraction acts as a "cache" since the staging buffer is disjoint from all +// the dx11 resources we store in the struct. #[derive(Derivative)] #[derivative(Debug)] pub struct Memory { properties: memory::Properties, - #[derivative(Debug="ignore")] - // TODO: :-( - buffer: RefCell>, size: u64, + + // stores flushed ranges inbetween mappings + flushes: RefCell>>, + + // list of all buffers bound to this memory + #[derivative(Debug="ignore")] + local_buffers: RefCell, InternalBuffer)>>, + + // staging buffer covering the whole memory region, if it's HOST_VISIBLE + #[derivative(Debug="ignore")] + host_buffer: Option>, } unsafe impl Send for Memory {} unsafe impl Sync for Memory {} +impl Memory { + pub fn flush(&self, range: Range) { + self.flushes.borrow_mut().push(range); + } + + pub fn bind_buffer(&self, range: Range, buffer: InternalBuffer) { + self.local_buffers.borrow_mut().push((range, buffer)); + } +} + pub struct CommandPool { device: ComPtr, internal: internal::BufferImageCopy, @@ -1202,39 +1214,19 @@ pub struct UnboundBuffer { } #[derive(Clone)] -pub enum InternalBuffer { - Coherent(ComPtr), - NonCoherent { - device: ComPtr, - staging: ComPtr - } -} - -impl InternalBuffer { - pub fn device_local_buffer(&self) -> ComPtr { - match self { - InternalBuffer::Coherent(ref buf) => buf.clone(), - InternalBuffer::NonCoherent { ref device, ref staging } => device.clone() - } - } +pub struct InternalBuffer { + raw: *mut d3d11::ID3D11Buffer, + srv: Option<*mut d3d11::ID3D11ShaderResourceView> } #[derive(Derivative)] #[derivative(Debug)] pub struct Buffer { #[derivative(Debug="ignore")] - buffer: InternalBuffer, - #[derivative(Debug="ignore")] - srv: Option>, + internal: InternalBuffer, size: u64, } -impl Buffer { - pub fn device_local_buffer(&self) -> ComPtr { - self.buffer.device_local_buffer() - } -} - unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} @@ -1363,7 +1355,7 @@ pub struct DescriptorSet { #[derivative(Debug="ignore")] srv_handles: RefCell)>>, #[derivative(Debug="ignore")] - cbv_handles: RefCell)>>, + cbv_handles: RefCell>, #[derivative(Debug="ignore")] sampler_handles: RefCell)>>, } From 8c8281c2ea8ed1865f978a2cc1cc3e2d29cca316 Mon Sep 17 00:00:00 2001 From: msiglreith Date: Sat, 16 Jun 2018 16:36:21 +0200 Subject: [PATCH 2/4] hal: Improve buffer documentation and cleanup error handling --- src/backend/dx11/src/device.rs | 2 +- src/backend/dx12/src/device.rs | 4 +- src/backend/empty/src/lib.rs | 2 +- src/backend/gl/src/device.rs | 7 +- src/backend/metal/src/device.rs | 10 +-- src/backend/vulkan/src/device.rs | 2 +- src/hal/src/buffer.rs | 127 ++++++++++++++----------------- src/hal/src/device.rs | 4 +- 8 files changed, 75 insertions(+), 83 deletions(-) diff --git a/src/backend/dx11/src/device.rs b/src/backend/dx11/src/device.rs index f547f030c7e..eb1061d0102 100644 --- a/src/backend/dx11/src/device.rs +++ b/src/backend/dx11/src/device.rs @@ -666,7 +666,7 @@ impl hal::Device for Device { buffer: &Buffer, format: Option, range: R, - ) -> Result { + ) -> Result { unimplemented!() } diff --git a/src/backend/dx12/src/device.rs b/src/backend/dx12/src/device.rs index 38ba29e51ce..76d18f27c0a 100644 --- a/src/backend/dx12/src/device.rs +++ b/src/backend/dx12/src/device.rs @@ -1910,14 +1910,14 @@ impl d::Device for Device { buffer: &n::Buffer, format: Option, range: R, - ) -> Result { + ) -> Result { let buffer_features = { let idx = format.map(|fmt| fmt as usize).unwrap_or(0); self.format_properties[idx].buffer_features }; let (format, format_desc) = match format.and_then(conv::map_format) { Some(fmt) => (fmt, format.unwrap().surface_desc()), - None => return Err(buffer::ViewError::Unsupported), + None => return Err(buffer::ViewCreationError::UnsupportedFormat { format }), }; let start = *range.start().unwrap_or(&0); diff --git a/src/backend/empty/src/lib.rs b/src/backend/empty/src/lib.rs index 7f2d9d39f9f..93775ab818e 100644 --- a/src/backend/empty/src/lib.rs +++ b/src/backend/empty/src/lib.rs @@ -177,7 +177,7 @@ impl hal::Device for Device { unimplemented!() } - fn create_buffer_view>(&self, _: &(), _: Option, _: R) -> Result<(), buffer::ViewError> { + fn create_buffer_view>(&self, _: &(), _: Option, _: R) -> Result<(), buffer::ViewCreationError> { unimplemented!() } diff --git a/src/backend/gl/src/device.rs b/src/backend/gl/src/device.rs index f5961d34c1e..25d54d7f84c 100644 --- a/src/backend/gl/src/device.rs +++ b/src/backend/gl/src/device.rs @@ -672,8 +672,7 @@ impl d::Device for Device { ) -> Result { if !self.share.legacy_features.contains(LegacyFeatures::CONSTANT_BUFFER) && usage.contains(buffer::Usage::UNIFORM) { - error!("Constant buffers are not supported by this GL version"); - return Err(buffer::CreationError::Other); + return Err(buffer::CreationError::UnsupportedUsage { usage }); } let target = if self.share.private_caps.buffer_role_change { @@ -681,7 +680,7 @@ impl d::Device for Device { } else { match conv::buffer_usage_to_gl_target(usage) { Some(target) => target, - None => return Err(buffer::CreationError::Usage(usage)), + None => return Err(buffer::CreationError::UnsupportedUsage { usage }), } }; @@ -838,7 +837,7 @@ impl d::Device for Device { fn create_buffer_view>( &self, _: &n::Buffer, _: Option, _: R - ) -> Result { + ) -> Result { unimplemented!() } diff --git a/src/backend/metal/src/device.rs b/src/backend/metal/src/device.rs index e8a3470776f..0baad0c8b95 100644 --- a/src/backend/metal/src/device.rs +++ b/src/backend/metal/src/device.rs @@ -166,7 +166,7 @@ fn create_depth_stencil_state( raw.set_front_face_stencil(Some(&front_desc)); let back_desc = metal::StencilDescriptor::new(); - back_desc.set_stencil_compare_function(conv::map_compare_function(back.fun)); + back_desc.set_stencil_compare_function(conv::map_compare_function(back.fun)); dss.stencil.back_read_mask = back.mask_read; match back.mask_read { @@ -1667,22 +1667,22 @@ impl hal::Device for Device { fn create_buffer_view>( &self, buffer: &n::Buffer, format_maybe: Option, range: R - ) -> Result { + ) -> Result { let start = buffer.range.start + *range.start().unwrap_or(&0); let end_rough = *range.end().unwrap_or(&buffer.raw.length()); let format = match format_maybe { Some(fmt) => fmt, - None => return Err(buffer::ViewError::Unsupported), + None => return Err(buffer::ViewCreationError::UnsupportedFormat { format: format_maybe }), }; let format_desc = format.surface_desc(); if format_desc.aspects != format::Aspects::COLOR { // no depth/stencil support for buffer views here - return Err(buffer::ViewError::Unsupported) + return Err(buffer::ViewCreationError::UnsupportedFormat { format: format_maybe }) } let block_count = (end_rough - start) * 8 / format_desc.bits as u64; let mtl_format = self.private_caps .map_format(format) - .ok_or(buffer::ViewError::Unsupported)?; + .ok_or(buffer::ViewCreationError::UnsupportedFormat { format: format_maybe })?; let descriptor = metal::TextureDescriptor::new(); descriptor.set_texture_type(MTLTextureType::D2); diff --git a/src/backend/vulkan/src/device.rs b/src/backend/vulkan/src/device.rs index 2da5c7aaa37..6512c370510 100644 --- a/src/backend/vulkan/src/device.rs +++ b/src/backend/vulkan/src/device.rs @@ -930,7 +930,7 @@ impl d::Device for Device { fn create_buffer_view>( &self, buffer: &n::Buffer, format: Option, range: R - ) -> Result { + ) -> Result { let (offset, size) = conv::map_range_arg(&range); let info = vk::BufferViewCreateInfo { s_type: vk::StructureType::BufferViewCreateInfo, diff --git a/src/hal/src/buffer.rs b/src/hal/src/buffer.rs index 9abbf7097dc..e965a7793bf 100644 --- a/src/hal/src/buffer.rs +++ b/src/hal/src/buffer.rs @@ -1,72 +1,58 @@ -//! Memory buffers +//! Memory buffers. +//! +//! # Buffer +//! +//! Buffers interpret memory slices as linear continguous data array. +//! They can be used as shader resources, vertex buffers, index buffers or for +//! specifying the action commands for indirect exection. -use std::error::Error; -use std::fmt; - -use {IndexType, Backend}; +use {format, IndexType, Backend}; /// An offset inside a buffer, in bytes. pub type Offset = u64; +/// Buffer state. +pub type State = Access; + /// Error creating a buffer. -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[derive(Fail, Debug, Clone, PartialEq, Eq)] pub enum CreationError { - /// Required `Usage` is not supported. - Usage(Usage), - /// Some other problem. - Other, -} - -impl fmt::Display for CreationError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let description = self.description(); - match *self { - CreationError::Usage(usage) => write!(f, "{}: {:?}", description, usage), - _ => write!(f, "{}", description) - } - } + /// Memory allocation on the host side failed. + /// This could be caused by a lack of memory. + #[fail(display = "Host memory allocation failed.")] + OutOfHostMemory, + /// Memory allocation on the device side failed. + /// This could be caused by a lack of memory. + #[fail(display = "Device memory allocation failed.")] + OutOfDeviceMemory, + /// Requested buffer usage is not supported. + /// + /// Older GL version don't support constant buffers or multiple usage flags. + #[fail(display = "Buffer usage unsupported ({:?}).", usage)] + UnsupportedUsage { + /// Unsupported usage passed on buffer creation. + usage: Usage, + }, } -impl Error for CreationError { - fn description(&self) -> &str { - match *self { - CreationError::Usage(_) => - "Required `Usage` is not supported", - CreationError::Other => - "Some other problem", - } - } -} - -/// Error creating a `BufferView`. -#[derive(Clone, Debug, PartialEq)] -pub enum ViewError { - /// The required usage flag is not present in the image. - Usage(Usage), - /// The backend refused for some reason. - Unsupported, -} - -impl fmt::Display for ViewError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let description = self.description(); - match *self { - ViewError::Usage(usage) => write!(f, "{}: {:?}", description, usage), - _ => write!(f, "{}", description) - } - } -} - -impl Error for ViewError { - fn description(&self) -> &str { - match *self { - ViewError::Usage(_) => - "The required usage flag is not present in the image", - ViewError::Unsupported => - "The backend refused for some reason", - } - } +/// Error creating a buffer view. +#[derive(Fail, Debug, Clone, PartialEq, Eq)] +pub enum ViewCreationError { + /// Memory allocation on the host side failed. + /// This could be caused by a lack of memory. + #[fail(display = "Host memory allocation failed.")] + OutOfHostMemory, + /// Memory allocation on the device side failed. + /// This could be caused by a lack of memory. + #[fail(display = "Device memory allocation failed.")] + OutOfDeviceMemory, + /// Buffer view format is not supported. + #[fail(display = "Buffer view format unsupported ({:?}).", format)] + UnsupportedFormat { + /// Unsupported format passed on view creation. + format: Option, + }, } bitflags!( @@ -95,21 +81,27 @@ bitflags!( ); impl Usage { - /// Can this buffer be used in transfer operations ? + /// Returns if the buffer can be used in transfer operations. pub fn can_transfer(&self) -> bool { self.intersects(Usage::TRANSFER_SRC | Usage::TRANSFER_DST) } } bitflags!( - /// Buffer state flags. + /// Buffer access flags. + /// + /// Access of buffers by the pipeline or shaders. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Access: u32 { - /// + /// Read commands instruction for indirect execution. const INDIRECT_COMMAND_READ = 0x1; + /// Read index values for indexed draw commands. /// + /// See [`draw_indexed`](../command/trait.RawCommandBuffer.html#tymethod.draw_indexed) + /// and [`draw_indexed_indirect`](../command/trait.RawCommandBuffer.html#tymethod.draw_indexed_indirect). const INDEX_BUFFER_READ = 0x2; - /// + /// Read vertices from vertex buffer for draw commands in the [`VERTEX_INPUT`]( + /// ../pso/struct.PipelineStage.html#associatedconstant.VERTEX_INPUT) stage. const VERTEX_BUFFER_READ = 0x4; /// const CONSTANT_BUFFER_READ = 0x8; @@ -132,11 +124,10 @@ bitflags!( } ); -/// Buffer state -pub type State = Access; - -/// Index buffer view for `bind_index_buffer`, slightly -/// analogous to an index table into an array. +/// Index buffer view for `bind_index_buffer`. +/// +/// Defines a buffer slice used for acquiring the indicies on draw commands. +/// Indices are used to lookup vertex indices in the vertex buffers. pub struct IndexBufferView<'a, B: Backend> { /// The buffer to bind. pub buffer: &'a B::Buffer, diff --git a/src/hal/src/device.rs b/src/hal/src/device.rs index 9214ea44d39..95852519346 100644 --- a/src/hal/src/device.rs +++ b/src/hal/src/device.rs @@ -1,3 +1,5 @@ +//! Logical device +//! //! # Device //! //! This module exposes the `Device` trait, which provides methods for creating @@ -306,7 +308,7 @@ pub trait Device: Any + Send + Sync { /// fn create_buffer_view>( &self, buf: &B::Buffer, fmt: Option, range: R - ) -> Result; + ) -> Result; /// fn destroy_buffer_view(&self, view: B::BufferView); From d7e0676c0fde687d66dc44678bdfc9e8ea8fbdc8 Mon Sep 17 00:00:00 2001 From: Hal Gentz Date: Fri, 1 Jun 2018 14:16:57 -0600 Subject: [PATCH 3/4] Adds basic descriptor set support to the opengl backend Signed-off-by: Hal Gentz --- src/backend/gl/Cargo.toml | 2 +- src/backend/gl/src/command.rs | 53 ++++++- src/backend/gl/src/device.rs | 252 ++++++++++++++++++++++++++++++++-- src/backend/gl/src/info.rs | 7 + src/backend/gl/src/lib.rs | 2 +- src/backend/gl/src/native.rs | 102 +++++++++++++- src/backend/gl/src/queue.rs | 13 ++ 7 files changed, 403 insertions(+), 28 deletions(-) diff --git a/src/backend/gl/Cargo.toml b/src/backend/gl/Cargo.toml index 80509e01d2c..536b7854a6b 100644 --- a/src/backend/gl/Cargo.toml +++ b/src/backend/gl/Cargo.toml @@ -24,4 +24,4 @@ gfx_gl = "0.5" gfx-hal = { path = "../../hal", version = "0.1" } smallvec = "0.6" glutin = { version = "0.16", optional = true } -spirv_cross = "0.8" +spirv_cross = "0.9.2" diff --git a/src/backend/gl/src/command.rs b/src/backend/gl/src/command.rs index ab28143b41f..7a3b30fd40f 100644 --- a/src/backend/gl/src/command.rs +++ b/src/backend/gl/src/command.rs @@ -108,6 +108,10 @@ pub enum Command { CopySurfaceToBuffer(n::Surface, n::RawBuffer, command::BufferImageCopy), CopyImageToTexture(n::ImageKind, n::Texture, command::ImageCopy), CopyImageToSurface(n::ImageKind, n::Surface, command::ImageCopy), + + BindBufferRange(gl::types::GLenum, gl::types::GLuint, n::RawBuffer, gl::types::GLintptr, gl::types::GLsizeiptr), + BindTexture(gl::types::GLenum, n::Texture), + BindSampler(gl::types::GLuint, n::Texture), } pub type FrameBufferTarget = gl::types::GLenum; @@ -862,17 +866,56 @@ impl command::RawCommandBuffer for RawCommandBuffer { fn bind_graphics_descriptor_sets( &mut self, - _layout: &n::PipelineLayout, - _first_set: usize, - _sets: I, - _offsets: J, + layout: &n::PipelineLayout, + first_set: usize, + sets: I, + offsets: J, ) where I: IntoIterator, I::Item: Borrow, J: IntoIterator, J::Item: Borrow, { - // TODO + assert!(offsets.into_iter().next().is_none()); // TODO: offsets unsupported + + let mut set = first_set as _; + let drd = &*layout.desc_remap_data.read().unwrap(); + + for desc_set in sets { + let desc_set = desc_set.borrow(); + for new_binding in &*desc_set.bindings.lock().unwrap() { + match new_binding { + n::DescSetBindings::Buffer {ty: btype, binding, buffer, offset, size} => { + for binding in drd.get_binding(n::BindingTypes::UniformBuffers, set, *binding).unwrap() { + self.push_cmd(Command::BindBufferRange( + gl::UNIFORM_BUFFER, + *binding, + *buffer, + *offset, + *size, + )) + } + } + n::DescSetBindings::Texture(binding, texture) => { + for binding in drd.get_binding(n::BindingTypes::Images, set, *binding).unwrap() { + self.push_cmd(Command::BindTexture( + *binding, + *texture, + )) + } + } + n::DescSetBindings::Sampler(binding, sampler) => { + for binding in drd.get_binding(n::BindingTypes::Images, set, *binding).unwrap() { + self.push_cmd(Command::BindSampler( + *binding, + *sampler, + )) + } + } + } + } + set += 1; + } } fn bind_compute_pipeline(&mut self, pipeline: &n::ComputePipeline) { diff --git a/src/backend/gl/src/device.rs b/src/backend/gl/src/device.rs index e5873f41f77..5f663fdc70e 100644 --- a/src/backend/gl/src/device.rs +++ b/src/backend/gl/src/device.rs @@ -3,7 +3,7 @@ use std::cell::Cell; use std::iter::repeat; use std::ops::Range; use std::{ptr, mem, slice}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, RwLock}; use gl; use gl::types::{GLint, GLenum, GLfloat}; @@ -284,21 +284,143 @@ impl Device { }) } + fn remap_bindings( + &self, + ast: &mut spirv::Ast, + desc_remap_data: &mut n::DescRemapData, + nb_map: &mut FastHashMap, + ) { + let res = ast.get_shader_resources().unwrap(); + self.remap_binding(ast, desc_remap_data, nb_map, &res.sampled_images, n::BindingTypes::Images); + self.remap_binding(ast, desc_remap_data, nb_map, &res.uniform_buffers, n::BindingTypes::UniformBuffers); + } + + fn remap_binding( + &self, + ast: &mut spirv::Ast, + desc_remap_data: &mut n::DescRemapData, + nb_map: &mut FastHashMap, + all_res: &[spirv::Resource], + btype: n::BindingTypes, + ) { + for res in all_res { + let set = ast.get_decoration(res.id, spirv::Decoration::DescriptorSet).unwrap(); + let binding = ast.get_decoration(res.id, spirv::Decoration::Binding).unwrap(); + let nbs = desc_remap_data.get_binding(btype, set as _, binding).unwrap(); + + for nb in nbs { + ast.set_decoration(res.id, spirv::Decoration::DescriptorSet, 0).unwrap(); + if self.share.legacy_features.contains(LegacyFeatures::EXPLICIT_LAYOUTS_IN_SHADER) { + ast.set_decoration(res.id, spirv::Decoration::Binding, *nb).unwrap() + } else { + ast.set_decoration(res.id, spirv::Decoration::Binding, 0).unwrap(); + assert!(nb_map.insert(res.name.clone(), *nb).is_none()) + } + } + } + } + + fn combine_seperate_images_and_samplers( + &self, + ast: &mut spirv::Ast, + desc_remap_data: &mut n::DescRemapData, + nb_map: &mut FastHashMap, + ) { + let mut id_map = FastHashMap::::default(); + let res = ast.get_shader_resources().unwrap(); + self.populate_id_map(ast, &mut id_map, &res.separate_images); + self.populate_id_map(ast, &mut id_map, &res.separate_samplers); + + let comb_res = ast.get_shader_resources().unwrap().sampled_images; + + for cis in ast.get_combined_image_samplers().unwrap() { + let (set, binding) = id_map.get(&cis.image_id).unwrap(); + let nb = desc_remap_data.reserve_binding(n::BindingTypes::Images); + desc_remap_data.insert_missing_binding( + nb, + n::BindingTypes::Images, + *set, + *binding, + ); + let (set, binding) = id_map.get(&cis.sampler_id).unwrap(); + desc_remap_data.insert_missing_binding( + nb, + n::BindingTypes::Images, + *set, + *binding, + ); + + ast.set_decoration(cis.combined_id, spirv::Decoration::DescriptorSet, 0).unwrap(); + if self.share.legacy_features.contains(LegacyFeatures::EXPLICIT_LAYOUTS_IN_SHADER) { + ast.set_decoration(cis.combined_id, spirv::Decoration::Binding, nb).unwrap() + } else { + ast.set_decoration(cis.combined_id, spirv::Decoration::Binding, 0).unwrap(); + let name = comb_res + .iter() + .filter_map(|t| + if t.id == cis.combined_id { + Some(t.name.clone()) + } else { + None + } + ) + .next() + .unwrap(); + + assert!(nb_map.insert(name, nb).is_none()) + } + } + } + + fn populate_id_map( + &self, + ast: &mut spirv::Ast, + id_map: &mut FastHashMap, + all_res: &[spirv::Resource], + ) { + for res in all_res { + let set = ast.get_decoration(res.id, spirv::Decoration::DescriptorSet).unwrap(); + let binding = ast.get_decoration(res.id, spirv::Decoration::Binding).unwrap(); + assert!(id_map.insert(res.id, (set as _, binding)).is_none()) + } + } + fn compile_shader( - &self, point: &pso::EntryPoint, stage: pso::Stage + &self, point: &pso::EntryPoint, stage: pso::Stage, desc_remap_data: &mut n::DescRemapData ) -> n::Shader { assert_eq!(point.entry, "main"); match *point.module { - n::ShaderModule::Raw(raw) => raw, + n::ShaderModule::Raw(raw) => { + debug!("Can't remap bindings for raw shaders. Assuming they are already rebound."); + raw + } n::ShaderModule::Spirv(ref spirv) => { let mut ast = self.parse_spirv(spirv).unwrap(); + + let mut name_binding_map = FastHashMap::::default(); + self.specialize_ast(&mut ast, point.specialization).unwrap(); + self.remap_bindings(&mut ast, desc_remap_data, &mut name_binding_map); + self.combine_seperate_images_and_samplers(&mut ast, desc_remap_data, &mut name_binding_map); + let glsl = self.translate_spirv(&mut ast).unwrap(); info!("Generated:\n{:?}", glsl); - match self.create_shader_module_from_source(glsl.as_bytes(), stage).unwrap() { + let program = match self.create_shader_module_from_source(glsl.as_bytes(), stage).unwrap() { n::ShaderModule::Raw(raw) => raw, _ => panic!("Unhandled") + }; + + if !self.share.legacy_features.contains(LegacyFeatures::EXPLICIT_LAYOUTS_IN_SHADER) { + let gl = &self.share.context; + for (name, binding) in name_binding_map.iter() { + unsafe { + let index = gl.GetUniformBlockIndex(program, name.as_ptr() as _); + gl.UniformBlockBinding(program, index, *binding) + } + } } + + program } } } @@ -383,14 +505,57 @@ impl d::Device for Device { } } - fn create_pipeline_layout(&self, _: IS, _: IR) -> n::PipelineLayout + fn create_pipeline_layout(&self, layouts: IS, _: IR) -> n::PipelineLayout where IS: IntoIterator, IS::Item: Borrow, IR: IntoIterator, IR::Item: Borrow<(pso::ShaderStageFlags, Range)>, { - n::PipelineLayout + let mut drd = n::DescRemapData::new(); + + layouts + .into_iter() + .enumerate() + .for_each(|(set, layout)| { + layout.borrow().iter().for_each(|binding| { + // DescriptorType -> Descriptor + // + // Sampler -> Sampler + // Image -> SampledImage, StorageImage, InputAttachment + // CombinedImageSampler -> CombinedImageSampler + // Buffer -> UniformBuffer, StorageBuffer + // UniformTexel -> UniformTexel + // StorageTexel -> StorageTexel + + assert!(!binding.immutable_samplers); //TODO: Implement immutable_samplers + use pso::DescriptorType::*; + match binding.ty { + CombinedImageSampler => { + drd.insert_missing_binding_into_spare(n::BindingTypes::Images, set as _, binding.binding); + } + Sampler | SampledImage => { + // We need to figure out combos once we get the shaders, until then we + // do nothing + } + UniformBuffer => { + drd.insert_missing_binding_into_spare(n::BindingTypes::UniformBuffers, set as _, binding.binding); + } + StorageImage + | UniformTexelBuffer + | UniformBufferDynamic + | StorageTexelBuffer + | StorageBufferDynamic + | StorageBuffer + + | InputAttachment => unimplemented!(), // 6 + } + }) + }); + + n::PipelineLayout { + desc_remap_data: Arc::new(RwLock::new(drd)), + } } fn create_graphics_pipeline<'a>( @@ -423,7 +588,7 @@ impl d::Device for Device { .iter() .filter_map(|&(stage, point_maybe)| { point_maybe.map(|point| { - let shader_name = self.compile_shader(point, stage); + let shader_name = self.compile_shader(point, stage, &mut desc.layout.desc_remap_data.write().unwrap()); unsafe { gl.AttachShader(name, shader_name); } shader_name }) @@ -507,10 +672,11 @@ impl d::Device for Device { ) -> Result { let gl = &self.share.context; let share = &self.share; + let program = { let name = unsafe { gl.CreateProgram() }; - let shader = self.compile_shader(&desc.shader, pso::Stage::Compute); + let shader = self.compile_shader(&desc.shader, pso::Stage::Compute, &mut desc.layout.desc_remap_data.write().unwrap()); unsafe { gl.AttachShader(name, shader) }; unsafe { gl.LinkProgram(name) }; @@ -588,7 +754,6 @@ impl d::Device for Device { assert!(pass.attachments.len() <= att_points.len()); gl.DrawBuffers(attachments_len as _, att_points.as_ptr()); let status = gl.CheckFramebufferStatus(target); - assert_eq!(status, gl::FRAMEBUFFER_COMPLETE); gl.BindFramebuffer(target, 0); } if let Err(err) = self.share.check() { @@ -764,6 +929,7 @@ impl d::Device for Device { Ok(n::Buffer { raw: unbound.name, target, + size: unbound.requirements.size, }) } @@ -973,14 +1139,15 @@ impl d::Device for Device { n::DescriptorPool { } } - fn create_descriptor_set_layout(&self, _: I, _: J) -> n::DescriptorSetLayout + fn create_descriptor_set_layout(&self, layout: I, _: J) -> n::DescriptorSetLayout where I: IntoIterator, I::Item: Borrow, J: IntoIterator, J::Item: Borrow, { - n::DescriptorSetLayout + // Just return it + layout.into_iter().map(|l| l.borrow().clone()).collect() } fn write_descriptor_sets<'a, I, J>(&self, writes: I) @@ -989,9 +1156,66 @@ impl d::Device for Device { J: IntoIterator, J::Item: Borrow>, { - for _write in writes { - //unimplemented!() // not panicing because of Warden - warn!("TODO: implement `write_descriptor_sets`"); + for mut write in writes { + let set = &mut write.set; + let mut bindings = set.bindings.lock().unwrap(); + let binding = write.binding; + let mut offset = write.array_offset as _; + + for descriptor in write.descriptors { + match descriptor.borrow() { + pso::Descriptor::Buffer(buffer, ref range) => { + let start = range.start.unwrap_or(0); + let end = range.end.unwrap_or(buffer.size); + let size = (end - start) as _; + + bindings + .push(n::DescSetBindings::Buffer { + ty: n::BindingTypes::UniformBuffers, + binding, + buffer: buffer.raw, + offset, + size, + }); + + offset += size; + }, + pso::Descriptor::CombinedImageSampler(view, _layout, sampler) => { + match view { + n::ImageView::Texture(tex, _) + | n::ImageView::TextureLayer(tex, _, _) => + bindings + .push(n::DescSetBindings::Texture(binding, *tex)), + n::ImageView::Surface(_) => unimplemented!(), + } + match sampler { + n::FatSampler::Sampler(sampler) => + bindings + .push(n::DescSetBindings::Sampler(binding, *sampler)), + n::FatSampler::Info(_) => unimplemented!(), + } + } + pso::Descriptor::Image(view, _layout) => { + match view { + n::ImageView::Texture(tex, _) + | n::ImageView::TextureLayer(tex, _, _) => + bindings + .push(n::DescSetBindings::Texture(binding, *tex)), + n::ImageView::Surface(_) => unimplemented!(), + } + } + pso::Descriptor::Sampler(sampler) => { + match sampler { + n::FatSampler::Sampler(sampler) => + bindings + .push(n::DescSetBindings::Sampler(binding, *sampler)), + n::FatSampler::Info(_) => unimplemented!(), + } + } + pso::Descriptor::UniformTexelBuffer(_view) => unimplemented!(), + pso::Descriptor::StorageTexelBuffer(_view) => unimplemented!(), + } + } } } diff --git a/src/backend/gl/src/info.rs b/src/backend/gl/src/info.rs index fcf61338de0..8195a934c53 100644 --- a/src/backend/gl/src/info.rs +++ b/src/backend/gl/src/info.rs @@ -226,6 +226,8 @@ bitflags! { const SAMPLER_LOD_BIAS = 0x2000; /// Support setting border texel colors. const SAMPLER_BORDER_COLOR = 0x4000; + /// No explicit layouts in shader support + const EXPLICIT_LAYOUTS_IN_SHADER = 0x8000; } } @@ -347,6 +349,11 @@ pub fn query_all(gl: &gl::Gl) -> (Info, Features, LegacyFeatures, Limits, Privat ]) { features |= Features::SAMPLER_ANISOTROPY; } + if info.is_supported(&[ + Core(4, 2), + ]) { + legacy |= LegacyFeatures::EXPLICIT_LAYOUTS_IN_SHADER; + } if info.is_supported(&[ Core(3, 3), Es(3, 0), diff --git a/src/backend/gl/src/lib.rs b/src/backend/gl/src/lib.rs index 15208afc2a6..acc99fb4098 100644 --- a/src/backend/gl/src/lib.rs +++ b/src/backend/gl/src/lib.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use std::ops::Deref; use std::thread::{self, ThreadId}; -use hal::{error, image}; +use hal::{error, image, pso}; use hal::queue::{Queues, QueueFamilyId}; pub use self::device::Device; diff --git a/src/backend/gl/src/native.rs b/src/backend/gl/src/native.rs index 3f60af4100f..c483d96bba7 100644 --- a/src/backend/gl/src/native.rs +++ b/src/backend/gl/src/native.rs @@ -1,13 +1,14 @@ use std::cell::Cell; +use std::sync::{Arc, Mutex, RwLock}; use hal::{format, image as i, pass, pso}; use hal::memory::Properties; +use hal::backend::FastHashMap; use gl; use Backend; use std::borrow::Borrow; - pub type RawBuffer = gl::types::GLuint; pub type Shader = gl::types::GLuint; pub type Program = gl::types::GLuint; @@ -16,12 +17,15 @@ pub type Surface = gl::types::GLuint; pub type Texture = gl::types::GLuint; pub type Sampler = gl::types::GLuint; +pub type DescriptorSetLayout = Vec; + pub const DEFAULT_FRAMEBUFFER: FrameBuffer = 0; #[derive(Debug)] pub struct Buffer { pub(crate) raw: RawBuffer, pub(crate) target: gl::types::GLenum, + pub(crate) size: u64, } #[derive(Debug)] @@ -38,6 +42,72 @@ impl Fence { } } +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum BindingTypes { + Images, + UniformBuffers, +} + +#[derive(Clone, Debug)] +pub struct DescRemapData { + bindings: FastHashMap<(BindingTypes, pso::DescriptorSetIndex, pso::DescriptorBinding), Vec>, + names: FastHashMap, + next_binding: FastHashMap, +} + +/// Stores where the descriptor bindings have been remaped too. +/// +/// OpenGL doesn't support sets, so we have to flatten out the bindings. +impl DescRemapData { + pub fn new() -> Self { + DescRemapData { + bindings: FastHashMap::default(), + names: FastHashMap::default(), + next_binding: FastHashMap::default(), + } + } + + pub fn insert_missing_binding_into_spare( + &mut self, + btype: BindingTypes, + set: pso::DescriptorSetIndex, + binding: pso::DescriptorBinding, + ) -> &[pso::DescriptorBinding] { + let nb = self.next_binding.entry(btype).or_insert(0); + let val = self.bindings.entry((btype, set, binding)).or_insert(Vec::new()); + val.push(*nb); + *nb += 1; + &*val + } + + pub fn reserve_binding(&mut self, btype: BindingTypes) -> pso::DescriptorBinding { + let nb = self.next_binding.entry(btype).or_insert(0); + *nb += 1; + *nb - 1 + } + + pub fn insert_missing_binding( + &mut self, + nb: pso::DescriptorBinding, + btype: BindingTypes, + set: pso::DescriptorSetIndex, + binding: pso::DescriptorBinding, + ) -> &[pso::DescriptorBinding] { + let val = self.bindings.entry((btype, set, binding)).or_insert(Vec::new()); + val.push(nb); + &*val + } + + pub fn get_binding( + &self, + btype: BindingTypes, + set: pso::DescriptorSetIndex, + binding: pso::DescriptorBinding, + ) -> Option<&[pso::DescriptorBinding]> { + self.bindings.get(&(btype, set, binding)).map(AsRef::as_ref) + } +} + #[derive(Clone, Debug)] pub struct GraphicsPipeline { pub(crate) program: Program, @@ -48,7 +118,7 @@ pub struct GraphicsPipeline { pub(crate) vertex_buffers: Vec>, } -#[derive(Clone, Debug, Copy)] +#[derive(Clone, Debug)] pub struct ComputePipeline { pub(crate) program: Program, } @@ -82,10 +152,23 @@ pub enum ImageView { } #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub struct DescriptorSetLayout; +pub(crate) enum DescSetBindings { + Buffer { + ty: BindingTypes, + binding: pso::DescriptorBinding, + buffer: RawBuffer, + offset: gl::types::GLintptr, + size: gl::types::GLsizeiptr + }, + Texture(pso::DescriptorBinding, Texture), + Sampler(pso::DescriptorBinding, Sampler), +} -#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub struct DescriptorSet; +#[derive(Clone, Debug)] +pub struct DescriptorSet { + layout: DescriptorSetLayout, + pub(crate) bindings: Arc>>, +} #[derive(Debug)] pub struct DescriptorPool {} @@ -96,7 +179,10 @@ impl pso::DescriptorPool for DescriptorPool { I: IntoIterator, I::Item: Borrow, { - layouts.into_iter().map(|_| Ok(DescriptorSet)).collect() + layouts.into_iter().map(|layout| Ok(DescriptorSet { + layout: layout.borrow().clone(), + bindings: Arc::new(Mutex::new(Vec::new())), + })).collect() } fn free_sets(&mut self, _descriptor_sets: &[DescriptorSet]) { @@ -166,7 +252,9 @@ impl SubpassDesc { } #[derive(Debug)] -pub struct PipelineLayout; +pub struct PipelineLayout { + pub(crate) desc_remap_data: Arc>, +} #[derive(Debug)] // No inter-queue synchronization required for GL. diff --git a/src/backend/gl/src/queue.rs b/src/backend/gl/src/queue.rs index 53c0e5d43bd..0b1c265169d 100644 --- a/src/backend/gl/src/queue.rs +++ b/src/backend/gl/src/queue.rs @@ -551,6 +551,19 @@ impl CommandQueue { com::Command::CopyImageToSurface(..) => { unimplemented!() //TODO: use FBO } + com::Command::BindBufferRange(target, index, buffer, offset, size) => unsafe { + let gl = &self.share.context; + gl.BindBufferRange(target, index, buffer, offset, size); + } + com::Command::BindTexture(index, texture) => unsafe { + let gl = &self.share.context; + gl.ActiveTexture(gl::TEXTURE0 + index); + gl.BindTexture(gl::TEXTURE_2D, texture); + } + com::Command::BindSampler(index, sampler) => unsafe { + let gl = &self.share.context; + gl.BindSampler(index, sampler); + } /* com::Command::BindConstantBuffer(pso::ConstantBufferParam(buffer, _, slot)) => unsafe { self.share.context.BindBufferBase(gl::UNIFORM_BUFFER, slot as gl::types::GLuint, buffer); From d2a313f703dc502766f44683456c88defc9a9bec Mon Sep 17 00:00:00 2001 From: Felix Kaaman Date: Thu, 14 Jun 2018 22:25:25 +0300 Subject: [PATCH 4/4] [dx11] add flush/invalidate, add image/buffer copies Fixes previous `Memory` implementation. Now works like the following: ``` 0.........................size +----------------------------+ | Memory | +----------------------------+ A..B C.....D E...F 1 fixed-size `STAGING` buffer which gets used for reading back from resources.(and should be used to copy from/to on flush/invalidate): (0..size, ComPtr) 1 `Vec` which covers the whole memory range (0..size). This is pointer we hand out to users. flush/invalidate moves the affected regions into our `STAGING` buffer to get read/uploaded. *N* Resources: (A..B, ComPtr), (C..D, ComPtr), (E..F, ComPtr), ``` Implements copying between images and buffers. Image<->Image copies are mostly handled by `CopySubresourceRegion` but some formats, while same size, cant be copied with this method: > Cannot invoke CopySubresourceRegion when the Formats of each Resource are not the same or at least castable to each other, unless one format is compressed (DXGI_FORMAT_R9G9B9E5_SHAREDEXP, or DXGI_FORMAT_BC[1,2,3,4,5]_* ) and the source format is similar to the dest according to: BC[1|4] ~= R16G16B16A16|R32G32, BC[2|3|5] ~= R32G32B32A32, R9G9B9E5_SHAREDEXP ~= R32. [ RESOURCE_MANIPULATION ERROR #281: ] These has to be done through compute shaders instead. Image->Buffer & Buffer->Image copies also have to be done through compute shaders, as `CopySubresourceRegion` can only copy between resources of same type (Image<->Image, Buffer<->Buffer). The following formats have Buffer->Image and Image->Buffer copies implemented with these changes: * `R8` * `Rg8` * `R16` * `Rg16` * `R32` --- src/backend/dx11/shaders/copy.hlsl | 201 +++++++++++++++++++++++- src/backend/dx11/src/conv.rs | 46 ++++-- src/backend/dx11/src/device.rs | 228 +++++++++++++++------------ src/backend/dx11/src/internal.rs | 238 ++++++++++++++++++++++++----- src/backend/dx11/src/lib.rs | 225 ++++++++++++++++++++++++--- 5 files changed, 762 insertions(+), 176 deletions(-) diff --git a/src/backend/dx11/shaders/copy.hlsl b/src/backend/dx11/shaders/copy.hlsl index c0bd27853fb..a239caf8661 100644 --- a/src/backend/dx11/shaders/copy.hlsl +++ b/src/backend/dx11/shaders/copy.hlsl @@ -1,15 +1,200 @@ -cbuffer BufferImageCopy : register(b0) { - uint2 BufferSize; - uint2 ImageOffset; +struct BufferCopy { + uint4 SrcDst; }; -StructuredBuffer CopySrc : register(t0); -RWTexture2D CopyDst : register(u0); +struct ImageCopy { + uint4 Src; + uint4 Dst; +}; + +struct BufferImageCopy { + // x=offset, yz=size + uint4 BufferVars; + uint4 ImageOffset; + uint4 ImageExtent; +}; + +cbuffer CopyConstants : register(b0) { + BufferCopy BufferCopies; + ImageCopy ImageCopies; + BufferImageCopy BufferImageCopies; +}; + +uint2 GetImageDst(uint3 dispatch_thread_id) +{ + return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; +} + +uint2 GetImageSrc(uint3 dispatch_thread_id) +{ + return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; +} + +uint GetBufferDst(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; +} + +uint GetBufferSrc(uint3 dispatch_thread_id) +{ + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; +} + +uint Uint4ToUint(uint4 data) +{ + data.x = min(data.x, 0x000000ff); + data.y = min(data.y, 0x000000ff); + data.z = min(data.z, 0x000000ff); + data.w = min(data.w, 0x000000ff); + + uint output = (data.x | + (data.y << 8) | + (data.z << 16) | + (data.w << 24)); + + return output; +} + +uint4 UintToUint4(uint data) +{ + return uint4((data & 0xff000000) >> 24, (data & 0xff0000) >> 16, (data & 0xff00) >> 8, data & 0xff); +} + +uint2 UintToUint2(uint data) +{ + return uint2((data >> 16) & 0x0000ffff, data & 0x0000ffff); +} + +uint Uint2ToUint(uint2 data) +{ + data.x = min(data.x, 0x0000ffff); + data.y = min(data.y, 0x0000ffff); + + uint output = (data.x | + (data.y << 16)); + + return output; +} + +// Buffers are always R32-aligned +StructuredBuffer BufferCopySrc : register(t0); +RWBuffer BufferCopyDst: register(u0); + +// R32 +Texture2D ImageCopySrcR32 : register(t0); +RWTexture2D ImageCopyDstR32 : register(u0); + +// TODO: correct, but slow +[numthreads(1, 1, 1)] +void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint2 dst_idx = GetImageDst(dispatch_thread_id); + uint src_idx = GetBufferSrc(dispatch_thread_id); + + ImageCopyDstR32[dst_idx] = BufferCopySrc[src_idx]; +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint dst_idx = GetBufferDst(dispatch_thread_id); + uint2 src_idx = GetImageSrc(dispatch_thread_id); + + BufferCopyDst[dst_idx] = ImageCopySrcR32[src_idx]; +} + +// R16G16 +Texture2D ImageCopySrcR16G16 : register(t0); +RWTexture2D ImageCopyDstR16G16 : register(u0); // TODO: correct, but slow [numthreads(1, 1, 1)] -void cs_copy_buffer_image_2d(uint3 dispatch_thread_id : SV_DispatchThreadID) { - uint2 idx = ImageOffset + dispatch_thread_id.xy; +void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint2 dst_idx = GetImageDst(dispatch_thread_id); + uint src_idx = GetBufferSrc(dispatch_thread_id); + + ImageCopyDstR16G16[dst_idx] = UintToUint2(BufferCopySrc[src_idx]); +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint dst_idx = GetBufferDst(dispatch_thread_id); + uint2 src_idx = GetImageSrc(dispatch_thread_id); + + BufferCopyDst[dst_idx] = Uint2ToUint(ImageCopySrcR16G16[src_idx].yx); +} + +// R16 +Texture2D ImageCopySrcR16 : register(t0); +RWTexture2D ImageCopyDstR16 : register(u0); + +[numthreads(1, 1, 1)] +void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; + + uint2 data = UintToUint2(BufferCopySrc[src_idx]); + + ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.y; + ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.x; +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; + + uint upper = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; + uint lower = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; + uint data = Uint2ToUint(uint2(upper, lower)); + + BufferCopyDst[dst_idx] = data; +} + +// R8G8 +Texture2D ImageCopySrcR8G8 : register(t0); +RWTexture2D ImageCopyDstR8G8 : register(u0); + +[numthreads(1, 1, 1)] +void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; + + uint4 data = UintToUint4(BufferCopySrc[src_idx]); + + ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.xy; + ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.zw; +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; + + uint2 lower = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))].yx; + uint2 upper = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))].yx; + uint data = Uint4ToUint(uint4(upper.x, upper.y, lower.x, lower.y)); + + BufferCopyDst[dst_idx] = data; +} + +// R8 +Texture2D ImageCopySrcR8 : register(t0); +RWTexture2D ImageCopyDstR8 : register(u0); + +[numthreads(1, 1, 1)] +void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; + uint4 data = UintToUint4(BufferCopySrc[src_idx]); + + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.w; + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.z; + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))] = data.y; + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))] = data.x; +} + +[numthreads(1, 1, 1)] +void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; + + uint src_1 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; + uint src_2 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; + uint src_3 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))]; + uint src_4 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))]; - CopyDst[idx] = CopySrc[BufferSize.x + idx.x + idx.y * BufferSize.y]; + BufferCopyDst[dst_idx] = Uint4ToUint(uint4(src_1, src_2, src_3, src_4)); } diff --git a/src/backend/dx11/src/conv.rs b/src/backend/dx11/src/conv.rs index c91d4718255..a723b211be0 100644 --- a/src/backend/dx11/src/conv.rs +++ b/src/backend/dx11/src/conv.rs @@ -24,62 +24,84 @@ pub fn map_index_type(ty: IndexType) -> DXGI_FORMAT { } } -pub fn typeless_format(format: DXGI_FORMAT) -> Option { +pub fn typeless_format(format: DXGI_FORMAT) -> Option<(DXGI_FORMAT, DXGI_FORMAT)> { match format { DXGI_FORMAT_R8G8B8A8_UNORM | DXGI_FORMAT_R8G8B8A8_SNORM | DXGI_FORMAT_R8G8B8A8_UINT | DXGI_FORMAT_R8G8B8A8_SINT | - DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some(DXGI_FORMAT_R8G8B8A8_TYPELESS), + DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some((DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT)), // ?` DXGI_FORMAT_B8G8R8A8_UNORM | - DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some(DXGI_FORMAT_B8G8R8A8_TYPELESS), + DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some((DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_R32_UINT)), DXGI_FORMAT_R8_UNORM | DXGI_FORMAT_R8_SNORM | DXGI_FORMAT_R8_UINT | - DXGI_FORMAT_R8_SINT => Some(DXGI_FORMAT_R8_TYPELESS), + DXGI_FORMAT_R8_SINT => Some((DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT)), DXGI_FORMAT_R8G8_UNORM | DXGI_FORMAT_R8G8_SNORM | DXGI_FORMAT_R8G8_UINT | - DXGI_FORMAT_R8G8_SINT => Some(DXGI_FORMAT_R8G8_TYPELESS), + DXGI_FORMAT_R8G8_SINT => Some((DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT)), DXGI_FORMAT_R16_UNORM | DXGI_FORMAT_R16_SNORM | DXGI_FORMAT_R16_UINT | DXGI_FORMAT_R16_SINT | - DXGI_FORMAT_R16_FLOAT => Some(DXGI_FORMAT_R16_TYPELESS), + DXGI_FORMAT_R16_FLOAT => Some((DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT)), DXGI_FORMAT_R16G16_UNORM | DXGI_FORMAT_R16G16_SNORM | DXGI_FORMAT_R16G16_UINT | DXGI_FORMAT_R16G16_SINT | - DXGI_FORMAT_R16G16_FLOAT => Some(DXGI_FORMAT_R16G16_TYPELESS), + DXGI_FORMAT_R16G16_FLOAT => Some((DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT)), DXGI_FORMAT_R16G16B16A16_UNORM | DXGI_FORMAT_R16G16B16A16_SNORM | DXGI_FORMAT_R16G16B16A16_UINT | DXGI_FORMAT_R16G16B16A16_SINT | - DXGI_FORMAT_R16G16B16A16_FLOAT => Some(DXGI_FORMAT_R16G16B16A16_TYPELESS), + DXGI_FORMAT_R16G16B16A16_FLOAT => Some((DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT)), DXGI_FORMAT_D32_FLOAT | DXGI_FORMAT_R32_UINT | DXGI_FORMAT_R32_SINT | - DXGI_FORMAT_R32_FLOAT => Some(DXGI_FORMAT_R32_TYPELESS), + DXGI_FORMAT_R32_FLOAT => Some((DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_UINT)), DXGI_FORMAT_R32G32_UINT | DXGI_FORMAT_R32G32_SINT | - DXGI_FORMAT_R32G32_FLOAT => Some(DXGI_FORMAT_R32G32_TYPELESS), + DXGI_FORMAT_R32G32_FLOAT => Some((DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_UINT)), DXGI_FORMAT_R32G32B32_UINT | DXGI_FORMAT_R32G32B32_SINT | - DXGI_FORMAT_R32G32B32_FLOAT => Some(DXGI_FORMAT_R32G32B32_TYPELESS), + DXGI_FORMAT_R32G32B32_FLOAT => Some((DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_UINT)), DXGI_FORMAT_R32G32B32A32_UINT | DXGI_FORMAT_R32G32B32A32_SINT | - DXGI_FORMAT_R32G32B32A32_FLOAT => Some(DXGI_FORMAT_R32G32B32A32_TYPELESS), + DXGI_FORMAT_R32G32B32A32_FLOAT => Some((DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_UINT)), + + DXGI_FORMAT_BC1_UNORM | + DXGI_FORMAT_BC1_UNORM_SRGB => Some((DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_R32_UINT)), + + DXGI_FORMAT_BC2_UNORM | + DXGI_FORMAT_BC2_UNORM_SRGB => Some((DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_R32_UINT)), + + DXGI_FORMAT_BC3_UNORM | + DXGI_FORMAT_BC3_UNORM_SRGB => Some((DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_R32_UINT)), + + DXGI_FORMAT_BC4_UNORM | + DXGI_FORMAT_BC4_SNORM => Some((DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_R32_UINT)), + + DXGI_FORMAT_BC5_UNORM | + DXGI_FORMAT_BC5_SNORM => Some((DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_R32_UINT)), + + DXGI_FORMAT_BC6H_UF16 | + DXGI_FORMAT_BC6H_SF16 => Some((DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_R32_UINT)), + + // TODO: srgb craziness + DXGI_FORMAT_BC7_UNORM | + DXGI_FORMAT_BC7_UNORM_SRGB => Some((DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM)), /*R5g6b5Unorm => DXGI_FORMAT_B5G6R5_UNORM, R5g5b5a1Unorm => DXGI_FORMAT_B5G5R5A1_UNORM, diff --git a/src/backend/dx11/src/device.rs b/src/backend/dx11/src/device.rs index 1d4e043387f..04785cea565 100644 --- a/src/backend/dx11/src/device.rs +++ b/src/backend/dx11/src/device.rs @@ -18,9 +18,9 @@ use std::ptr; use { Backend, Buffer, BufferView, CommandPool, ComputePipeline, DescriptorPool, DescriptorSetLayout, - Fence, Framebuffer, GraphicsPipeline, Image, ImageView, InternalBuffer, Memory, PipelineLayout, - QueryPool, RenderPass, Sampler, Semaphore, ShaderModule, Surface, Swapchain, UnboundBuffer, - UnboundImage, ViewInfo, + Fence, Framebuffer, GraphicsPipeline, Image, ImageView, InternalBuffer, InternalImage, Memory, + PipelineLayout, QueryPool, RenderPass, Sampler, Semaphore, ShaderModule, Surface, Swapchain, + UnboundBuffer, UnboundImage, ViewInfo, }; use {conv, internal, shader}; @@ -29,19 +29,23 @@ pub struct Device { raw: ComPtr, pub(crate) context: ComPtr, memory_properties: hal::MemoryProperties, - pub(crate) internal: internal::BufferImageCopy + pub(crate) internal: internal::Internal } unsafe impl Send for Device { } unsafe impl Sync for Device { } impl Device { + pub fn as_raw(&self) -> *mut d3d11::ID3D11Device { + self.raw.as_raw() + } + pub fn new(device: ComPtr, context: ComPtr, memory_properties: hal::MemoryProperties) -> Self { Device { raw: device.clone(), context, memory_properties, - internal: internal::BufferImageCopy::new(device) + internal: internal::Internal::new(device) } } @@ -320,22 +324,22 @@ impl hal::Device for Device { mem_type: hal::MemoryTypeId, size: u64, ) -> Result { - let host_buffer = if mem_type.0 == 1 { + let working_buffer = if mem_type.0 == 1 { let desc = d3d11::D3D11_BUFFER_DESC { - ByteWidth: size as _, + ByteWidth: 65535, Usage: d3d11::D3D11_USAGE_STAGING, BindFlags: 0, - CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_WRITE, + CPUAccessFlags: d3d11::D3D11_CPU_ACCESS_READ | d3d11::D3D11_CPU_ACCESS_WRITE, MiscFlags:0, StructureByteStride: 0, }; - let mut host_buffer = ptr::null_mut(); + let mut working_buffer = ptr::null_mut(); let hr = unsafe { self.raw.CreateBuffer( &desc, ptr::null_mut(), - &mut host_buffer as *mut *mut _ as *mut *mut _ + &mut working_buffer as *mut *mut _ as *mut *mut _ ) }; @@ -343,7 +347,7 @@ impl hal::Device for Device { return Err(device::OutOfMemory); } - Some(unsafe { ComPtr::from_raw(host_buffer) }) + Some(unsafe { ComPtr::from_raw(working_buffer) }) } else { None }; @@ -351,9 +355,11 @@ impl hal::Device for Device { Ok(Memory { properties: self.memory_properties.memory_types[mem_type.0].properties, size, - flushes: RefCell::new(Vec::new()), + mapped_ptr: RefCell::new(None), + host_visible: Some(RefCell::new(Vec::with_capacity(size as usize))), + working_buffer, local_buffers: RefCell::new(Vec::new()), - host_buffer + local_images: RefCell::new(Vec::new()), }) } @@ -368,7 +374,8 @@ impl hal::Device for Device { } fn destroy_command_pool(&self, _pool: CommandPool) { - unimplemented!() + // TODO: + // unimplemented!() } fn create_render_pass<'a, IA, IS, ID>( @@ -614,9 +621,41 @@ impl hal::Device for Device { None }; + let uav = if unbound_buffer.usage.contains(buffer::Usage::TRANSFER_DST) { + let mut desc = unsafe { mem::zeroed::() }; + desc.Format = dxgiformat::DXGI_FORMAT_R32_UINT; + desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_BUFFER; + unsafe { + *desc.u.Buffer_mut() = d3d11::D3D11_BUFFER_UAV { + FirstElement: 0, + NumElements: unbound_buffer.size as u32 / 4, + Flags: 0 + }; + }; + + let mut uav = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateUnorderedAccessView( + raw.as_raw() as *mut _, + &desc, + &mut uav as *mut *mut _ as *mut *mut _ + ) + }; + + if !winerror::SUCCEEDED(hr) { + // TODO: better errors + return Err(device::BindError::WrongMemory); + } + + Some(uav) + } else { + None + }; + let buffer = InternalBuffer { raw: raw.into_raw(), - srv + srv, + uav, }; let range = offset..unbound_buffer.size; @@ -721,7 +760,7 @@ impl hal::Device for Device { }; let dxgi_format = conv::map_format(image.format).unwrap(); - let typeless_format = conv::typeless_format(dxgi_format).unwrap(); + let (typeless_format, typed_raw_format) = conv::typeless_format(dxgi_format).unwrap(); let (resource, levels) = match image.kind { image::Kind::D2(width, height, layers, _) => { @@ -762,9 +801,10 @@ impl hal::Device for Device { _ => unimplemented!() }; + // TODO: view dimensions let uav = if image.usage.contains(image::Usage::TRANSFER_DST) { let mut desc = unsafe { mem::zeroed::() }; - desc.Format = dxgiformat::DXGI_FORMAT_R32_UINT; + desc.Format = typed_raw_format; desc.ViewDimension = d3d11::D3D11_UAV_DIMENSION_TEXTURE2D; let mut uav = ptr::null_mut(); @@ -779,13 +819,41 @@ impl hal::Device for Device { if !winerror::SUCCEEDED(hr) { // TODO: better errors return Err(device::BindError::WrongMemory); - } else { - Some(unsafe { ComPtr::from_raw(uav) }) } + + Some(unsafe { ComPtr::from_raw(uav) }) } else { None }; + let srv = if image.usage.contains(image::Usage::TRANSFER_SRC) { + let mut desc = unsafe { mem::zeroed::() }; + desc.Format = typed_raw_format; + desc.ViewDimension = d3dcommon::D3D11_SRV_DIMENSION_TEXTURE2D; + // TODO: + *unsafe{ desc.u.Texture2D_mut() } = d3d11::D3D11_TEX2D_SRV { + MostDetailedMip: 0, + MipLevels: 1, + }; + + let mut srv = ptr::null_mut(); + let hr = unsafe { + self.raw.CreateShaderResourceView( + resource, + &desc, + &mut srv as *mut *mut _ as *mut *mut _ + ) + }; + + if !winerror::SUCCEEDED(hr) { + // TODO: better errors + return Err(device::BindError::WrongMemory); + } + + Some(unsafe { ComPtr::from_raw(srv) }) + } else { + None + }; let rtv = if image.usage.contains(image::Usage::COLOR_ATTACHMENT) { let mut rtv = ptr::null_mut(); @@ -799,24 +867,30 @@ impl hal::Device for Device { if !winerror::SUCCEEDED(hr) { return Err(device::BindError::WrongMemory); - } else { - Some(unsafe { ComPtr::from_raw(rtv) }) } + + Some(unsafe { ComPtr::from_raw(rtv) }) } else { None }; + let internal = InternalImage { + raw: resource, + srv, + uav, + rtv, + }; + Ok(Image { - resource: resource, kind: image.kind, usage: image.usage, storage_flags: image.flags, dxgi_format, + typed_raw_format, bytes_per_block: bytes_per_block, block_dim: block_dim, num_levels: levels as _, - uav, - rtv //unsafe { ComPtr::from_raw(rtv) } + internal, }) } @@ -829,7 +903,7 @@ impl hal::Device for Device { range: image::SubresourceRange, ) -> Result { let info = ViewInfo { - resource: image.resource, + resource: image.internal.raw, kind: image.kind, flags: image.storage_flags, view_kind, @@ -984,23 +1058,11 @@ impl hal::Device for Device { where R: RangeArg, { - assert_eq!(memory.host_buffer.is_some(), true); + if let Some(ref host_visible) = memory.host_visible { + let ptr = host_visible.borrow_mut().as_mut_ptr(); + memory.mapped_ptr.replace(Some(ptr)); - let buffer = memory.host_buffer.clone().unwrap(); - let mut mapped = unsafe { mem::zeroed::() }; - let hr = unsafe { - self.context.Map( - buffer.as_raw() as _, - 0, - // TODO: - d3d11::D3D11_MAP_WRITE, - 0, - &mut mapped - ) - }; - - if winerror::SUCCEEDED(hr) { - Ok(unsafe { mapped.pData.offset(*range.start().unwrap_or(&0) as isize) as _ }) + Ok(unsafe { ptr.offset(*range.start().unwrap_or(&0) as isize) }) } else { // TODO: better error Err(mapping::Error::InvalidAccess) @@ -1008,59 +1070,17 @@ impl hal::Device for Device { } fn unmap_memory(&self, memory: &Memory) { - assert_eq!(memory.host_buffer.is_some(), true); - let buffer = memory.host_buffer.clone().unwrap(); + assert_eq!(memory.host_visible.is_some(), true); + let buffer = memory.host_visible.clone().unwrap(); - unsafe { + /*unsafe { self.context.Unmap( buffer.as_raw() as _, 0, ); - } - - fn intersection(a: &Range, b: &Range) -> Option> { - let min = if a.start < b.start { a } else { b }; - let max = if min == a { b } else { a }; - - if min.end < max.start { - None - } else { - let end = if min.end < max.end { min.end } else { max.end }; - Some(max.start..end) - } - } + }*/ - // go through every range we wrote to - for range in memory.flushes.borrow().iter() { - // and for every resource whose "virtual address" in our imaginary - // heap intersects; we copy from our host visible buffer to the - // corresponding dx11 resource - for &(ref buffer_range, ref buffer) in memory.local_buffers.borrow().iter() { - if let Some(range) = intersection(&range, &buffer_range) { - unsafe { - self.context.CopySubresourceRegion( - buffer.raw as _, - 0, - 0, - 0, - 0, - memory.host_buffer.clone().unwrap().as_raw() as _, - 0, - &d3d11::D3D11_BOX { - left: range.start as _, - top: 0, - front: 0, - right: buffer_range.end as _, - bottom: 1, - back: 1, - } - ); - } - } - } - } - - memory.flushes.borrow_mut().clear(); + memory.mapped_ptr.replace(None); } fn flush_mapped_memory_ranges<'a, I, R>(&self, ranges: I) @@ -1069,12 +1089,13 @@ impl hal::Device for Device { I::Item: Borrow<(&'a Memory, R)>, R: RangeArg, { - // we can't copy while mapped, so store the ranges for later + + // go through every range we wrote to for range in ranges.into_iter() { let &(memory, ref range) = range.borrow(); - let range = *range.start().unwrap_or(&0)..*range.end().unwrap_or(&memory.size); + let range = memory.resolve(range); - memory.flush(range); + memory.flush(&self.context, range); } } @@ -1084,7 +1105,13 @@ impl hal::Device for Device { I::Item: Borrow<(&'a Memory, R)>, R: RangeArg, { - unimplemented!() + // go through every range we want to read from + for range in ranges.into_iter() { + let &(memory, ref range) = range.borrow(); + let range = *range.start().unwrap_or(&0)..*range.end().unwrap_or(&memory.size); + + memory.invalidate(&self.context, range); + } } fn create_semaphore(&self) -> Semaphore { @@ -1163,7 +1190,8 @@ impl hal::Device for Device { } fn destroy_image(&self, image: Image) { - unimplemented!() + // TODO: + // unimplemented!() } fn destroy_image_view(&self, _view: ImageView) { @@ -1182,7 +1210,7 @@ impl hal::Device for Device { } fn destroy_fence(&self, _fence: Fence) { - unimplemented!() + // unimplemented!() } fn destroy_semaphore(&self, _semaphore: Semaphore) { @@ -1301,18 +1329,24 @@ impl hal::Device for Device { let kind = image::Kind::D2(surface.width, surface.height, 1, 1); + let internal = InternalImage { + raw: resource, + srv: None, + uav: None, + rtv: Some(unsafe { ComPtr::from_raw(rtv) }) + }; + Image { - resource, kind, usage: config.image_usage, storage_flags: image::StorageFlags::empty(), // NOTE: not the actual format of the backbuffer(s) + typed_raw_format: dxgiformat::DXGI_FORMAT_UNKNOWN, dxgi_format: format, bytes_per_block, block_dim, num_levels: 1, - uav: None, - rtv: Some(unsafe { ComPtr::from_raw(rtv) }) + internal } }).collect(); diff --git a/src/backend/dx11/src/internal.rs b/src/backend/dx11/src/internal.rs index 0b97f34ad98..2e435af98bc 100644 --- a/src/backend/dx11/src/internal.rs +++ b/src/backend/dx11/src/internal.rs @@ -1,6 +1,7 @@ use hal::pso::{Stage}; use hal::command; +use winapi::shared::dxgiformat; use winapi::shared::winerror; use winapi::um::d3d11; use wio::com::ComPtr; @@ -10,36 +11,79 @@ use std::{mem, ptr}; use spirv_cross; use shader; +#[repr(C)] +struct BufferCopy { + src: u32, + dst: u32, + _padding: [u32; 2] +} + +#[repr(C)] +struct ImageCopy { + src: [u32; 4], + dst: [u32; 4], +} + +#[repr(C)] +struct BufferImageCopy { + buffer_offset: u32, + buffer_size: [u32; 2], + _padding: u32, + image_offset: [u32; 4], + image_extent: [u32; 4], +} + #[repr(C)] struct BufferImageCopyInfo { - data: [u32; 4], + buffer: BufferCopy, + image: ImageCopy, + buffer_image: BufferImageCopy, } #[derive(Clone)] -pub struct BufferImageCopy { - cs: ComPtr, +pub struct Internal { + cs_copy_image2d_r32_buffer: ComPtr, + cs_copy_image2d_r16g16_buffer: ComPtr, + cs_copy_image2d_r16_buffer: ComPtr, + cs_copy_image2d_r8g8_buffer: ComPtr, + cs_copy_image2d_r8_buffer: ComPtr, + + cs_copy_buffer_image2d_r32: ComPtr, + cs_copy_buffer_image2d_r16g16: ComPtr, + cs_copy_buffer_image2d_r16: ComPtr, + cs_copy_buffer_image2d_r8g8: ComPtr, + cs_copy_buffer_image2d_r8: ComPtr, + copy_info: ComPtr, } -impl BufferImageCopy { - pub fn new(device: ComPtr) -> Self { - let cs = { - let shader_src = include_bytes!("../shaders/copy.hlsl"); - let bytecode = unsafe { ComPtr::from_raw(shader::compile_hlsl_shader(Stage::Compute, spirv_cross::hlsl::ShaderModel::V5_0, "cs_copy_buffer_image_2d", shader_src).unwrap()) }; - let mut shader = ptr::null_mut(); - let hr = unsafe { - device.CreateComputeShader( - bytecode.GetBufferPointer(), - bytecode.GetBufferSize(), - ptr::null_mut(), - &mut shader as *mut *mut _ as *mut *mut _ - ) - }; - assert_eq!(true, winerror::SUCCEEDED(hr)); +fn compile(device: ComPtr, entrypoint: &str) -> ComPtr { + let shader_src = include_bytes!("../shaders/copy.hlsl"); + let bytecode = unsafe { + ComPtr::from_raw(shader::compile_hlsl_shader( + Stage::Compute, + spirv_cross::hlsl::ShaderModel::V5_0, + entrypoint, + shader_src + ).unwrap()) + }; - unsafe { ComPtr::from_raw(shader) } - }; + let mut shader = ptr::null_mut(); + let hr = unsafe { + device.CreateComputeShader( + bytecode.GetBufferPointer(), + bytecode.GetBufferSize(), + ptr::null_mut(), + &mut shader as *mut *mut _ as *mut *mut _ + ) + }; + assert_eq!(true, winerror::SUCCEEDED(hr)); + unsafe { ComPtr::from_raw(shader) } +} + +impl Internal { + pub fn new(device: ComPtr) -> Self { let copy_info = { let desc = d3d11::D3D11_BUFFER_DESC { ByteWidth: mem::size_of::() as _, @@ -63,13 +107,52 @@ impl BufferImageCopy { unsafe { ComPtr::from_raw(buffer) } }; - BufferImageCopy { - cs, + Internal { + cs_copy_image2d_r32_buffer: compile(device.clone(), "cs_copy_image2d_r32_buffer"), + cs_copy_image2d_r16g16_buffer: compile(device.clone(), "cs_copy_image2d_r16g16_buffer"), + cs_copy_image2d_r16_buffer: compile(device.clone(), "cs_copy_image2d_r16_buffer"), + cs_copy_image2d_r8g8_buffer: compile(device.clone(), "cs_copy_image2d_r8g8_buffer"), + cs_copy_image2d_r8_buffer: compile(device.clone(), "cs_copy_image2d_r8_buffer"), + + cs_copy_buffer_image2d_r32: compile(device.clone(), "cs_copy_buffer_image2d_r32"), + cs_copy_buffer_image2d_r16g16: compile(device.clone(), "cs_copy_buffer_image2d_r16g16"), + cs_copy_buffer_image2d_r16: compile(device.clone(), "cs_copy_buffer_image2d_r16"), + cs_copy_buffer_image2d_r8g8: compile(device.clone(), "cs_copy_buffer_image2d_r8g8"), + cs_copy_buffer_image2d_r8: compile(device.clone(), "cs_copy_buffer_image2d_r8"), copy_info } } - fn update_buffer(&mut self, context: ComPtr, info: command::BufferImageCopy) { + fn update_buffer(&mut self, context: ComPtr, info: command::BufferCopy) { + let mut mapped = unsafe { mem::zeroed::() }; + let hr = unsafe { + context.Map( + self.copy_info.as_raw() as _, + 0, + d3d11::D3D11_MAP_WRITE_DISCARD, + 0, + &mut mapped + ) + }; + + unsafe { ptr::copy(&BufferImageCopyInfo { + buffer: BufferCopy { + src: info.src as _, + dst: info.dst as _, + _padding: [0u32; 2] + }, + .. mem::zeroed() + }, mem::transmute(mapped.pData), 1) }; + + unsafe { + context.Unmap( + self.copy_info.as_raw() as _, + 0, + ); + } + } + + fn update_image(&mut self, context: ComPtr, info: command::ImageCopy) { let mut mapped = unsafe { mem::zeroed::() }; let hr = unsafe { context.Map( @@ -81,16 +164,44 @@ impl BufferImageCopy { ) }; - let info_struct = BufferImageCopyInfo { - data: [ - info.buffer_offset as u32, - info.buffer_width as u32, - info.image_offset.x as u32, - info.image_offset.y as u32, - ], + unsafe { ptr::copy(&BufferImageCopyInfo { + image: ImageCopy { + src: [info.src_offset.x as _, info.src_offset.y as _, info.src_offset.z as _, 0], + dst: [info.dst_offset.x as _, info.dst_offset.y as _, info.dst_offset.z as _, 0], + }, + .. mem::zeroed() + }, mem::transmute(mapped.pData), 1) }; + + unsafe { + context.Unmap( + self.copy_info.as_raw() as _, + 0, + ); + } + } + + fn update_buffer_image(&mut self, context: ComPtr, info: command::BufferImageCopy) { + let mut mapped = unsafe { mem::zeroed::() }; + let hr = unsafe { + context.Map( + self.copy_info.as_raw() as _, + 0, + d3d11::D3D11_MAP_WRITE_DISCARD, + 0, + &mut mapped + ) }; - unsafe { ptr::copy(&info_struct, mem::transmute(mapped.pData), 1) }; + unsafe { ptr::copy(&BufferImageCopyInfo { + buffer_image: BufferImageCopy { + buffer_offset: info.buffer_offset as _, + buffer_size: [info.buffer_width, info.buffer_height], + _padding: 0, + image_offset: [info.image_offset.x as _, info.image_offset.y as _, info.image_offset.z as _, 0], + image_extent: [info.image_extent.width, info.image_extent.height, info.image_extent.depth, 0], + }, + .. mem::zeroed() + }, mem::transmute(mapped.pData), 1) }; unsafe { context.Unmap( @@ -100,22 +211,77 @@ impl BufferImageCopy { } } - pub fn copy_2d(&mut self, + fn find_image_to_buffer_shader(&self, format: dxgiformat::DXGI_FORMAT) -> Option<(*mut d3d11::ID3D11ComputeShader, u32, u32)> { + use dxgiformat::*; + + match format { + DXGI_FORMAT_R32_UINT => Some((self.cs_copy_image2d_r32_buffer.as_raw(), 1, 1)), + DXGI_FORMAT_R16G16_UINT => Some((self.cs_copy_image2d_r16g16_buffer.as_raw(), 1, 1)), + DXGI_FORMAT_R16_UINT => Some((self.cs_copy_image2d_r16_buffer.as_raw(), 2, 1)), + DXGI_FORMAT_R8G8_UINT => Some((self.cs_copy_image2d_r8g8_buffer.as_raw(), 2, 1)), + DXGI_FORMAT_R8_UINT => Some((self.cs_copy_image2d_r8_buffer.as_raw(), 4, 1)), + _ => None + } + } + + fn find_buffer_to_image_shader(&self, format: dxgiformat::DXGI_FORMAT) -> Option<(*mut d3d11::ID3D11ComputeShader, u32, u32)> { + use dxgiformat::*; + + match format { + DXGI_FORMAT_R32_UINT => Some((self.cs_copy_buffer_image2d_r32.as_raw(), 1, 1)), + DXGI_FORMAT_R16G16_UINT => Some((self.cs_copy_buffer_image2d_r16g16.as_raw(), 1, 1)), + DXGI_FORMAT_R16_UINT => Some((self.cs_copy_buffer_image2d_r16.as_raw(), 2, 1)), + DXGI_FORMAT_R8G8_UINT => Some((self.cs_copy_buffer_image2d_r8g8.as_raw(), 2, 1)), + DXGI_FORMAT_R8_UINT => Some((self.cs_copy_buffer_image2d_r8.as_raw(), 4, 1)), + _ => None + } + } + + pub fn copy_image_2d_buffer(&mut self, + context: ComPtr, + image: ComPtr, + image_format: dxgiformat::DXGI_FORMAT, + buffer: *mut d3d11::ID3D11UnorderedAccessView, + info: command::BufferImageCopy) { + self.update_buffer_image(context.clone(), info.clone()); + let (shader, stride_x, stride_y) = self.find_image_to_buffer_shader(image_format).unwrap(); + + unsafe { + context.CSSetShader(shader, ptr::null_mut(), 0); + context.CSSetConstantBuffers(0, 1, &self.copy_info.as_raw()); + context.CSSetShaderResources(0, 1, &image.as_raw()); + context.CSSetUnorderedAccessViews(0, 1, &buffer, ptr::null_mut()); + + context.Dispatch( + info.image_extent.width / stride_x, + info.image_extent.height / stride_y, + 1 + ); + + // unbind external resources + context.CSSetShaderResources(0, 1, [ptr::null_mut(); 1].as_ptr()); + context.CSSetUnorderedAccessViews(0, 1, [ptr::null_mut(); 1].as_ptr(), ptr::null_mut()); + } + } + + pub fn copy_buffer_image_2d(&mut self, context: ComPtr, buffer: *mut d3d11::ID3D11ShaderResourceView, image: ComPtr, + image_format: dxgiformat::DXGI_FORMAT, info: command::BufferImageCopy) { - self.update_buffer(context.clone(), info.clone()); + self.update_buffer_image(context.clone(), info.clone()); + let (shader, stride_x, stride_y) = self.find_buffer_to_image_shader(image_format).unwrap(); unsafe { - context.CSSetShader(self.cs.as_raw(), ptr::null_mut(), 0); + context.CSSetShader(shader, ptr::null_mut(), 0); context.CSSetConstantBuffers(0, 1, &self.copy_info.as_raw()); context.CSSetShaderResources(0, 1, &buffer); context.CSSetUnorderedAccessViews(0, 1, &image.as_raw(), ptr::null_mut()); context.Dispatch( - info.image_extent.width, - info.image_extent.height, + info.image_extent.width / stride_x, + info.image_extent.height / stride_y, 1 ); diff --git a/src/backend/dx11/src/lib.rs b/src/backend/dx11/src/lib.rs index 5efb469db80..3290cc7a916 100644 --- a/src/backend/dx11/src/lib.rs +++ b/src/backend/dx11/src/lib.rs @@ -46,8 +46,10 @@ mod device; -#[derive(Clone, Debug)] +#[derive(Clone, Derivative)] +#[derivative(Debug)] pub(crate) struct ViewInfo { + #[derivative(Debug="ignore")] resource: *mut d3d11::ID3D11Resource, kind: image::Kind, flags: image::StorageFlags, @@ -696,7 +698,7 @@ impl hal::queue::RawCommandQueue for CommandQueue { pub struct CommandBuffer { // TODO: better way of sharing #[derivative(Debug="ignore")] - internal: internal::BufferImageCopy, + internal: internal::Internal, #[derivative(Debug="ignore")] context: ComPtr, #[derivative(Debug="ignore")] @@ -707,7 +709,7 @@ unsafe impl Send for CommandBuffer {} unsafe impl Sync for CommandBuffer {} impl CommandBuffer { - fn create_deferred(device: ComPtr, internal: internal::BufferImageCopy) -> Self { + fn create_deferred(device: ComPtr, internal: internal::Internal) -> Self { let mut context: *mut d3d11::ID3D11DeviceContext = ptr::null_mut(); let hr = unsafe { device.CreateDeferredContext(0, &mut context as *mut *mut _ as *mut *mut _) @@ -814,7 +816,7 @@ impl hal::command::RawCommandBuffer for CommandBuffer { let _sub = subresource_range.borrow(); unsafe { self.context.ClearRenderTargetView( - image.rtv.clone().unwrap().as_raw(), + image.internal.rtv.clone().unwrap().as_raw(), &color.float32 ); } @@ -1016,7 +1018,29 @@ impl hal::command::RawCommandBuffer for CommandBuffer { T: IntoIterator, T::Item: Borrow, { - unimplemented!() + for region in regions.into_iter() { + let info = region.borrow(); + + unsafe { + self.context.CopySubresourceRegion( + dst.internal.raw as _, + 0, + info.dst as _, + 0, + 0, + src.internal.raw as _, + 0, + &d3d11::D3D11_BOX { + left: info.src as _, + top: 0, + front: 0, + right: (info.src + info.size) as _, + bottom: 1, + back: 1, + } + ); + } + } } fn copy_image(&mut self, src: &Image, _: image::Layout, dst: &Image, _: image::Layout, regions: T) @@ -1024,7 +1048,30 @@ impl hal::command::RawCommandBuffer for CommandBuffer { T: IntoIterator, T::Item: Borrow, { - unimplemented!() + for region in regions.into_iter() { + let info = region.borrow(); + + // TODO: subresources + unsafe { + self.context.CopySubresourceRegion( + dst.internal.raw as _, + 0, + info.dst_offset.x as _, + info.dst_offset.y as _, + info.dst_offset.z as _, + src.internal.raw as _, + 0, + &d3d11::D3D11_BOX { + left: info.src_offset.x as _, + top: info.src_offset.y as _, + front: info.src_offset.z as _, + right: info.extent.width as _, + bottom: info.extent.height as _, + back: info.extent.depth as _, + } + ); + } + } } fn copy_buffer_to_image(&mut self, buffer: &Buffer, image: &Image, _: image::Layout, regions: T) @@ -1034,11 +1081,13 @@ impl hal::command::RawCommandBuffer for CommandBuffer { { assert_eq!(buffer.internal.srv.is_some(), true); + // TODO: more than 2D for copy in regions.into_iter() { - self.internal.copy_2d( + self.internal.copy_buffer_image_2d( self.context.clone(), buffer.internal.srv.unwrap(), - image.uav.clone().unwrap(), + image.internal.uav.clone().unwrap(), + image.typed_raw_format, copy.borrow().clone() ); } @@ -1049,7 +1098,17 @@ impl hal::command::RawCommandBuffer for CommandBuffer { T: IntoIterator, T::Item: Borrow, { - unimplemented!() + assert_eq!(buffer.internal.uav.is_some(), true); + + for copy in regions.into_iter() { + self.internal.copy_image_2d_buffer( + self.context.clone(), + image.internal.srv.clone().unwrap(), + image.typed_raw_format, + buffer.internal.uav.unwrap(), + copy.borrow().clone() + ); + } } fn draw(&mut self, vertices: Range, instances: Range) { @@ -1119,44 +1178,153 @@ impl hal::command::RawCommandBuffer for CommandBuffer { // Since we dont have any heaps to work with directly, everytime we bind a // buffer/image to memory we allocate a dx11 resource and assign it a range. // -// `HOST_VISIBLE` memory gets a staging buffer which covers the entire memory +// `HOST_VISIBLE` memory gets a `Vec` which covers the entire memory // range. This forces us to only expose non-coherent memory, as this -// abstraction acts as a "cache" since the staging buffer is disjoint from all -// the dx11 resources we store in the struct. +// abstraction acts as a "cache" since the "staging buffer" vec is disjoint +// from all the dx11 resources we store in the struct. #[derive(Derivative)] #[derivative(Debug)] pub struct Memory { properties: memory::Properties, size: u64, - // stores flushed ranges inbetween mappings - flushes: RefCell>>, + mapped_ptr: RefCell>, + + // staging buffer covering the whole memory region, if it's HOST_VISIBLE + host_visible: Option>>, + + #[derivative(Debug="ignore")] + working_buffer: Option>, // list of all buffers bound to this memory #[derivative(Debug="ignore")] local_buffers: RefCell, InternalBuffer)>>, - // staging buffer covering the whole memory region, if it's HOST_VISIBLE + // list of all images bound to this memory #[derivative(Debug="ignore")] - host_buffer: Option>, + local_images: RefCell, InternalImage)>>, } unsafe impl Send for Memory {} unsafe impl Sync for Memory {} +fn intersection(a: &Range, b: &Range) -> Option> { + let min = if a.start < b.start { a } else { b }; + let max = if min == a { b } else { a }; + + if min.end < max.start { + None + } else { + let end = if min.end < max.end { min.end } else { max.end }; + Some(max.start..end) + } +} + +// TODO: implement flush/invalidate for bound images as well impl Memory { - pub fn flush(&self, range: Range) { - self.flushes.borrow_mut().push(range); + pub fn resolve>(&self, range: &R) -> Range { + *range.start().unwrap_or(&0) .. *range.end().unwrap_or(&self.size) } pub fn bind_buffer(&self, range: Range, buffer: InternalBuffer) { self.local_buffers.borrow_mut().push((range, buffer)); } + + pub fn flush(&self, context: &ComPtr, range: Range) { + for &(ref buffer_range, ref buffer) in self.local_buffers.borrow().iter() { + if let Some(range) = intersection(&range, &buffer_range) { + unsafe { + let src = self.mapped_ptr.borrow().unwrap(); + + context.UpdateSubresource( + buffer.raw as _, + 0, + &d3d11::D3D11_BOX { + left: (range.start - buffer_range.start) as _, + top: 0, + front: 0, + right: (range.end - buffer_range.start) as _, + bottom: 1, + back: 1, + }, + src.offset(range.start as isize) as _, + 0, + 0 + ); + } + } + } + } + + fn map(&self, context: &ComPtr) -> *mut u8 { + assert_eq!(self.working_buffer.is_some(), true); + + unsafe { + let mut map = mem::zeroed(); + let hr = context.Map( + self.working_buffer.clone().unwrap().as_raw() as _, + 0, + d3d11::D3D11_MAP_READ, + 0, + &mut map + ); + + assert_eq!(hr, winerror::S_OK); + + map.pData as _ + } + } + + fn unmap(&self, context: &ComPtr) { + unsafe { + context.Unmap( + self.working_buffer.clone().unwrap().as_raw() as _, + 0, + ); + } + } + + pub fn invalidate(&self, context: &ComPtr, range: Range) { + for &(ref buffer_range, ref buffer) in self.local_buffers.borrow().iter() { + if let Some(range) = intersection(&range, &buffer_range) { + unsafe { + + // upload to staging buffer + context.CopySubresourceRegion( + self.working_buffer.clone().unwrap().as_raw() as _, + 0, + 0, + 0, + 0, + buffer.raw as _, + 0, + &d3d11::D3D11_BOX { + left: range.start as _, + top: 0, + front: 0, + right: range.end as _, + bottom: 1, + back: 1, + } + ); + + // TODO: handle memory larger than our hardcoded 1<<15 + // staging buffer + + // copy over to our vec + let dst = self.mapped_ptr.borrow().unwrap().offset(range.start as isize); + let src = self.map(&context); + ptr::copy(src, dst, (range.end - range.start) as usize); + self.unmap(&context); + } + } + } + } } pub struct CommandPool { device: ComPtr, - internal: internal::BufferImageCopy, + internal: internal::Internal, } unsafe impl Send for CommandPool {} @@ -1174,7 +1342,8 @@ impl hal::pool::RawCommandPool for CommandPool { } unsafe fn free(&mut self, _cbufs: Vec) { - unimplemented!() + // TODO: + // unimplemented!() } } @@ -1216,7 +1385,8 @@ pub struct UnboundBuffer { #[derive(Clone)] pub struct InternalBuffer { raw: *mut d3d11::ID3D11Buffer, - srv: Option<*mut d3d11::ID3D11ShaderResourceView> + srv: Option<*mut d3d11::ID3D11ShaderResourceView>, + uav: Option<*mut d3d11::ID3D11UnorderedAccessView> } #[derive(Derivative)] @@ -1247,15 +1417,24 @@ pub struct UnboundImage { #[derive(Derivative)] #[derivative(Debug)] pub struct Image { - #[derivative(Debug="ignore")] - resource: *mut d3d11::ID3D11Resource, kind: image::Kind, usage: image::Usage, storage_flags: image::StorageFlags, dxgi_format: dxgiformat::DXGI_FORMAT, + typed_raw_format: dxgiformat::DXGI_FORMAT, bytes_per_block: u8, block_dim: (u8, u8), num_levels: image::Level, + internal: InternalImage, +} + +#[derive(Derivative)] +#[derivative(Debug)] +pub struct InternalImage { + #[derivative(Debug="ignore")] + raw: *mut d3d11::ID3D11Resource, + #[derivative(Debug="ignore")] + srv: Option>, #[derivative(Debug="ignore")] uav: Option>, #[derivative(Debug="ignore")]