Skip to content

Commit

Permalink
Merge #2110
Browse files Browse the repository at this point in the history
2110: dx12: Add dynamically sized CPU descriptor heaps r=kvark a=msiglreith

Dynamic allocator for CPU descriptor heaps. We currently have 2 ways of allocating CPU descriptors: from a device global heap and command local linear allocators. This PR cleans up the two paths a bit and fixes the current size limitation for the first kind by dynamically creating new small fixed size CPU heaps.

Freeing currently not support but that's not an regression as we didn't support it before either.
Small 'regression' concerning `fill_buffer` which was implemented incorrectly as far as I can see. Probably would work only on AMD. This breaks the fill reftests.

Fixes #1915
PR checklist:
- [x] tested quad, relevant CTS and a few vulkan samples with the following backends: dx12


Co-authored-by: msiglreith <[email protected]>
  • Loading branch information
bors[bot] and msiglreith committed Jun 5, 2018
2 parents 505e805 + 8e6c759 commit 3058b6e
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 136 deletions.
30 changes: 14 additions & 16 deletions src/backend/dx12/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use winapi::shared::{dxgiformat, winerror};

use wio::com::ComPtr;

use {conv, device, internal, native as n, Backend, Device, Shared, MAX_VERTEX_BUFFERS, validate_line_width};
use {conv, device, descriptors_cpu, internal, native as n, Backend, Device, Shared, MAX_VERTEX_BUFFERS, validate_line_width};
use device::ViewInfo;
use root_constants::RootConstant;
use smallvec::SmallVec;
Expand Down Expand Up @@ -1249,19 +1249,11 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
pass_cache.framebuffer.attachments[rtv_id]
};

let mut rtv_pool = n::DescriptorCpuPool {
heap: Device::create_descriptor_heap_impl(
&mut device.clone(),
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
false,
clear_rects.len()
),
offset: 0,
size: 0,
max_size: clear_rects.len() as _
};

self.rtv_pools.push(rtv_pool.heap.raw.clone());
let mut rtv_pool = descriptors_cpu::HeapLinear::new(
&device,
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
clear_rects.len()
);

for clear_rect in &clear_rects {
let rect = [get_rect(&clear_rect.rect)];
Expand All @@ -1278,9 +1270,10 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
layers: clear_rect.layers.clone()
}
};
let rtv = Device::view_image_as_render_target_impl(
let rtv = rtv_pool.alloc_handle();
Device::view_image_as_render_target_impl(
&mut device,
&mut rtv_pool,
rtv,
view_info
).unwrap();

Expand Down Expand Up @@ -1841,6 +1834,10 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
);
unsafe { self.raw.ResourceBarrier(1, &pre_barrier) };

warn!("fill_buffer currently unimplemented");
// TODO: GPU handle must be in the current heap. Atm we use a CPU descriptor heap for allocation
// which is not shader visible.
/*
let handle = buffer.clear_uav.unwrap();
unsafe {
self.raw.ClearUnorderedAccessViewUint(
Expand All @@ -1852,6 +1849,7 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
&rect as *const _,
);
}
*/

let post_barrier = Self::transition_barrier(
d3d12::D3D12_RESOURCE_TRANSITION_BARRIER {
Expand Down
171 changes: 171 additions & 0 deletions src/backend/dx12/src/descriptors_cpu.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@

use std::collections::HashSet;
use std::ptr;
use winapi::Interface;
use winapi::um::d3d12;
use wio::com::ComPtr;

// Linear stack allocator for CPU descriptor heaps.
pub struct HeapLinear {
handle_size: usize,
num: usize,
size: usize,
start: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
_raw: ComPtr<d3d12::ID3D12DescriptorHeap>,
}

impl HeapLinear {
pub fn new(
device: &ComPtr<d3d12::ID3D12Device>,
ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
size: usize,
) -> Self {
let desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
Type: ty,
NumDescriptors: size as u32,
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
NodeMask: 0,
};

let mut heap: *mut d3d12::ID3D12DescriptorHeap = ptr::null_mut();
let handle_size = unsafe {
device.CreateDescriptorHeap(
&desc,
&d3d12::ID3D12DescriptorHeap::uuidof(),
&mut heap as *mut *mut _ as *mut *mut _,
);
device.GetDescriptorHandleIncrementSize(ty) as usize
};

let start = unsafe { (*heap).GetCPUDescriptorHandleForHeapStart() };

HeapLinear {
handle_size,
num: 0,
size,
start,
_raw: unsafe { ComPtr::from_raw(heap) },
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
assert!(!self.is_full());

let slot = self.num;
self.num += 1;

d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.start.ptr + self.handle_size * slot,
}
}

pub fn is_full(&self) -> bool {
self.num >= self.size
}

pub fn clear(&mut self) {
self.num = 0;
}
}

const HEAP_SIZE_FIXED: usize = 64;

// Fixed-size free-list allocator for CPU descriptors.
struct Heap {
// Bit flag representation of available handles in the heap.
//
// 0 - Occupied
// 1 - free
availability: u64,
handle_size: usize,
start: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
_raw: ComPtr<d3d12::ID3D12DescriptorHeap>,
}

impl Heap {
pub fn new(device: &ComPtr<d3d12::ID3D12Device>, ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE) -> Self {
let desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
Type: ty,
NumDescriptors: HEAP_SIZE_FIXED as _,
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
NodeMask: 0,
};

let mut heap: *mut d3d12::ID3D12DescriptorHeap = ptr::null_mut();
let handle_size = unsafe {
device.CreateDescriptorHeap(
&desc,
&d3d12::ID3D12DescriptorHeap::uuidof(),
&mut heap as *mut *mut _ as *mut *mut _,
);
device.GetDescriptorHandleIncrementSize(ty) as usize
};
let start = unsafe { (*heap).GetCPUDescriptorHandleForHeapStart() };

Heap {
handle_size,
availability: !0, // all free!
start,
_raw: unsafe { ComPtr::from_raw(heap) },
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
// Find first free slot.
let slot = self.availability.trailing_zeros() as usize;
assert!(slot < HEAP_SIZE_FIXED);
// Set the slot as occupied.
self.availability ^= 1 << slot;

d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
ptr: self.start.ptr + self.handle_size * slot,
}
}

pub fn is_full(&self) -> bool {
self.availability == 0
}
}

pub struct DescriptorCpuPool {
device: ComPtr<d3d12::ID3D12Device>,
ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
heaps: Vec<Heap>,
free_list: HashSet<usize>,
}

impl DescriptorCpuPool {
pub fn new(device: &ComPtr<d3d12::ID3D12Device>, ty: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE) -> Self {
DescriptorCpuPool {
device: device.clone(),
ty,
heaps: Vec::new(),
free_list: HashSet::new(),
}
}

pub fn alloc_handle(&mut self) -> d3d12::D3D12_CPU_DESCRIPTOR_HANDLE {
let heap_id = self
.free_list
.iter()
.cloned()
.next()
.unwrap_or_else(|| {
// Allocate a new heap
let id = self.heaps.len();
self.heaps.push(Heap::new(&self.device, self.ty));
self.free_list.insert(id);
id
});

let heap = &mut self.heaps[heap_id];
let handle = heap.alloc_handle();
if heap.is_full() {
self.free_list.remove(&heap_id);
}

handle
}

// TODO: free handles
}
54 changes: 26 additions & 28 deletions src/backend/dx12/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use hal::queue::{RawCommandQueue, QueueFamilyId};
use hal::range::RangeArg;

use {
conv, native as n, root_constants, window as w,
conv, descriptors_cpu, native as n, root_constants, window as w,
Backend as B, Device, MemoryGroup, QUEUE_FAMILIES, MAX_VERTEX_BUFFERS, NUM_HEAP_PROPERTIES,
};
use pool::RawCommandPool;
Expand Down Expand Up @@ -560,8 +560,10 @@ impl Device {
}

pub(crate) fn view_image_as_render_target_impl(
device: &mut ComPtr<d3d12::ID3D12Device>, pool: &mut n::DescriptorCpuPool, info: ViewInfo
) -> Result<d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, image::ViewError> {
device: &mut ComPtr<d3d12::ID3D12Device>,
handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
info: ViewInfo,
) -> Result<(), image::ViewError> {
#![allow(non_snake_case)]

let mut desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC {
Expand Down Expand Up @@ -642,21 +644,21 @@ impl Device {
}
};

let handle = pool.alloc_handles(1).cpu;

unsafe {
device.CreateRenderTargetView(info.resource, &desc, handle);
}

Ok(handle)
Ok(())
}

fn view_image_as_render_target(
&self, info: ViewInfo
) -> Result<d3d12::D3D12_CPU_DESCRIPTOR_HANDLE, image::ViewError> {
let mut pool = self.rtv_pool.lock().unwrap();
let handle = pool.alloc_handle();

Self::view_image_as_render_target_impl(&mut self.raw.clone(), &mut *pool, info)
Self::view_image_as_render_target_impl(&mut self.raw.clone(), handle, info)
.map(|_| handle)
}

fn view_image_as_depth_stencil(
Expand Down Expand Up @@ -728,7 +730,7 @@ impl Device {
}
};

let handle = self.dsv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.dsv_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateDepthStencilView(info.resource, &desc, handle);
}
Expand Down Expand Up @@ -857,7 +859,7 @@ impl Device {
};

let desc = Self::build_image_as_shader_resource_desc(&info)?;
let handle = self.srv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateShaderResourceView(info.resource, &desc, handle);
}
Expand Down Expand Up @@ -933,7 +935,7 @@ impl Device {
}
}

let handle = self.uav_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateUnorderedAccessView(info.resource, ptr::null_mut(), &desc, handle);
}
Expand Down Expand Up @@ -1863,7 +1865,7 @@ impl d::Device<B> for Device {
});

let clear_uav = if buffer.usage.contains(buffer::Usage::TRANSFER_DST) {
let handles = self.uav_pool.lock().unwrap().alloc_handles(1);
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
let mut view_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC {
Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER,
Expand All @@ -1883,10 +1885,10 @@ impl d::Device<B> for Device {
resource as *mut _,
ptr::null_mut(),
&view_desc,
handles.cpu,
handle,
);
}
Some(handles)
Some(handle)
} else {
None
};
Expand Down Expand Up @@ -1937,7 +1939,7 @@ impl d::Device<B> for Device {
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_NONE,
};

let handle = self.srv_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateShaderResourceView(buffer.resource, &desc, handle);
}
Expand All @@ -1961,7 +1963,7 @@ impl d::Device<B> for Device {
CounterOffsetInBytes: 0,
};

let handle = self.uav_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.srv_uav_pool.lock().unwrap().alloc_handle();
unsafe {
self.raw.clone().CreateUnorderedAccessView(buffer.resource, ptr::null_mut(), &desc, handle);
}
Expand Down Expand Up @@ -2278,7 +2280,7 @@ impl d::Device<B> for Device {
}

fn create_sampler(&self, info: image::SamplerInfo) -> n::Sampler {
let handle = self.sampler_pool.lock().unwrap().alloc_handles(1).cpu;
let handle = self.sampler_pool.lock().unwrap().alloc_handle();

let op = match info.comparison {
Some(_) => d3d12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON,
Expand Down Expand Up @@ -2428,21 +2430,17 @@ impl d::Device<B> for Device {
pso::Descriptor::Buffer(buffer, ref range) => {
if update_pool_index == descriptor_update_pools.len() {
let max_size = 1u64<<12; //arbitrary
descriptor_update_pools.push(n::DescriptorCpuPool {
heap: Self::create_descriptor_heap_impl(
&mut self.raw.clone(),
descriptor_update_pools.push(
descriptors_cpu::HeapLinear::new(
&self.raw,
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
false,
max_size as _,
),
offset: 0,
size: 0,
max_size,
});
)
);
}
let heap = descriptor_update_pools.last_mut().unwrap();
let handle = heap.alloc_handles(1).cpu;
if heap.size == heap.max_size {
let handle = heap.alloc_handle();
if heap.is_full() {
// pool is full, move to the next one
update_pool_index += 1;
}
Expand Down Expand Up @@ -2561,7 +2559,7 @@ impl d::Device<B> for Device {

// reset the temporary CPU-size descriptor pools
for buffer_desc_pool in descriptor_update_pools.iter_mut() {
buffer_desc_pool.size = 0;
buffer_desc_pool.clear();
}
}

Expand Down
Loading

0 comments on commit 3058b6e

Please sign in to comment.