Skip to content

Commit

Permalink
[dx11] add flush/invalidate, add image/buffer copies
Browse files Browse the repository at this point in the history
Fixes previous `Memory` implementation. Now works like the following:

```
    0.........................size
    +----------------------------+
    |          Memory            |
    +----------------------------+
    A..B  C.....D         E...F

    1 fixed-size `STAGING` buffer which gets used for reading back from
      resources.(and should be used to copy from/to on flush/invalidate):
      (0..size, ComPtr<Buffer>)

    1 `Vec<u8>` which covers the whole memory range (0..size). This is
      pointer we hand out to users. flush/invalidate moves the affected
      regions into our `STAGING` buffer to get read/uploaded.

    *N* Resources:
      (A..B, ComPtr<Resource>),
      (C..D, ComPtr<Resource>),
      (E..F, ComPtr<Resource>),
```

Implements copying between images and buffers. Image<->Image copies are
mostly handled by `CopySubresourceRegion` but some formats, while same
size, cant be copied with this method:

> Cannot invoke CopySubresourceRegion when the Formats of each Resource are not the same or at least castable to each other, unless one format is compressed (DXGI_FORMAT_R9G9B9E5_SHAREDEXP, or DXGI_FORMAT_BC[1,2,3,4,5]_* ) and the source format is similar to the dest according to: BC[1|4] ~= R16G16B16A16|R32G32, BC[2|3|5] ~= R32G32B32A32, R9G9B9E5_SHAREDEXP ~= R32. [ RESOURCE_MANIPULATION ERROR #281: ]

These has to be done through compute shaders instead. Image->Buffer &
Buffer->Image copies also have to be done through compute shaders, as
`CopySubresourceRegion` can only copy between resources of same type
(Image<->Image, Buffer<->Buffer). The following formats have
Buffer->Image and Image->Buffer copies implemented with these changes:

* `R8`
* `Rg8`
* `R16`
* `Rg16`
* `R32`
  • Loading branch information
fkaa committed Jun 14, 2018
1 parent 27d1812 commit 9603c6d
Show file tree
Hide file tree
Showing 5 changed files with 750 additions and 176 deletions.
201 changes: 193 additions & 8 deletions src/backend/dx11/shaders/copy.hlsl
Original file line number Diff line number Diff line change
@@ -1,15 +1,200 @@
cbuffer BufferImageCopy : register(b0) {
uint2 BufferSize;
uint2 ImageOffset;
struct BufferCopy {
uint4 SrcDst;
};

StructuredBuffer<uint> CopySrc : register(t0);
RWTexture2D<uint> CopyDst : register(u0);
struct ImageCopy {
uint4 Src;
uint4 Dst;
};

struct BufferImageCopy {
// x=offset, yz=size
uint4 BufferVars;
uint4 ImageOffset;
uint4 ImageExtent;
};

cbuffer CopyConstants : register(b0) {
BufferCopy BufferCopies;
ImageCopy ImageCopies;
BufferImageCopy BufferImageCopies;
};

uint2 GetImageDst(uint3 dispatch_thread_id)
{
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy;
}

uint2 GetImageSrc(uint3 dispatch_thread_id)
{
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy;
}

uint GetBufferDst(uint3 dispatch_thread_id)
{
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y;
}

uint GetBufferSrc(uint3 dispatch_thread_id)
{
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y;
}

uint Uint4ToUint(uint4 data)
{
data.x = min(data.x, 0x000000ff);
data.y = min(data.y, 0x000000ff);
data.z = min(data.z, 0x000000ff);
data.w = min(data.w, 0x000000ff);

uint output = (data.x |
(data.y << 8) |
(data.z << 16) |
(data.w << 24));

return output;
}

uint4 UintToUint4(uint data)
{
return uint4((data & 0xff000000) >> 24, (data & 0xff0000) >> 16, (data & 0xff00) >> 8, data & 0xff);
}

uint2 UintToUint2(uint data)
{
return uint2((data >> 16) & 0x0000ffff, data & 0x0000ffff);
}

uint Uint2ToUint(uint2 data)
{
data.x = min(data.x, 0x0000ffff);
data.y = min(data.y, 0x0000ffff);

uint output = (data.x |
(data.y << 16));

return output;
}

// Buffers are always R32-aligned
StructuredBuffer<uint> BufferCopySrc : register(t0);
RWBuffer<uint> BufferCopyDst: register(u0);

// R32
Texture2D<uint> ImageCopySrcR32 : register(t0);
RWTexture2D<uint> ImageCopyDstR32 : register(u0);

// TODO: correct, but slow
[numthreads(1, 1, 1)]
void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint2 dst_idx = GetImageDst(dispatch_thread_id);
uint src_idx = GetBufferSrc(dispatch_thread_id);

ImageCopyDstR32[dst_idx] = BufferCopySrc[src_idx];
}

[numthreads(1, 1, 1)]
void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint dst_idx = GetBufferDst(dispatch_thread_id);
uint2 src_idx = GetImageSrc(dispatch_thread_id);

BufferCopyDst[dst_idx] = ImageCopySrcR32[src_idx];
}

// R16G16
Texture2D<uint2> ImageCopySrcR16G16 : register(t0);
RWTexture2D<uint2> ImageCopyDstR16G16 : register(u0);

// TODO: correct, but slow
[numthreads(1, 1, 1)]
void cs_copy_buffer_image_2d(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint2 idx = ImageOffset + dispatch_thread_id.xy;
void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint2 dst_idx = GetImageDst(dispatch_thread_id);
uint src_idx = GetBufferSrc(dispatch_thread_id);

ImageCopyDstR16G16[dst_idx] = UintToUint2(BufferCopySrc[src_idx]);
}

[numthreads(1, 1, 1)]
void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint dst_idx = GetBufferDst(dispatch_thread_id);
uint2 src_idx = GetImageSrc(dispatch_thread_id);

BufferCopyDst[dst_idx] = Uint2ToUint(ImageCopySrcR16G16[src_idx].yx);
}

// R16
Texture2D<uint> ImageCopySrcR16 : register(t0);
RWTexture2D<uint> ImageCopyDstR16 : register(u0);

[numthreads(1, 1, 1)]
void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;

uint2 data = UintToUint2(BufferCopySrc[src_idx]);

ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.y;
ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.x;
}

[numthreads(1, 1, 1)]
void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;

uint upper = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))];
uint lower = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))];
uint data = Uint2ToUint(uint2(upper, lower));

BufferCopyDst[dst_idx] = data;
}

// R8G8
Texture2D<uint2> ImageCopySrcR8G8 : register(t0);
RWTexture2D<uint2> ImageCopyDstR8G8 : register(u0);

[numthreads(1, 1, 1)]
void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;

uint4 data = UintToUint4(BufferCopySrc[src_idx]);

ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.xy;
ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.zw;
}

[numthreads(1, 1, 1)]
void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;

uint2 lower = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))].yx;
uint2 upper = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))].yx;
uint data = Uint4ToUint(uint4(upper.x, upper.y, lower.x, lower.y));

BufferCopyDst[dst_idx] = data;
}

// R8
Texture2D<uint> ImageCopySrcR8 : register(t0);
RWTexture2D<uint> ImageCopyDstR8 : register(u0);

[numthreads(1, 1, 1)]
void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4;
uint4 data = UintToUint4(BufferCopySrc[src_idx]);

ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.w;
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.z;
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))] = data.y;
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))] = data.x;
}

[numthreads(1, 1, 1)]
void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4;

uint src_1 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))];
uint src_2 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))];
uint src_3 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))];
uint src_4 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))];

CopyDst[idx] = CopySrc[BufferSize.x + idx.x + idx.y * BufferSize.y];
BufferCopyDst[dst_idx] = Uint4ToUint(uint4(src_1, src_2, src_3, src_4));
}
46 changes: 34 additions & 12 deletions src/backend/dx11/src/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,62 +24,84 @@ pub fn map_index_type(ty: IndexType) -> DXGI_FORMAT {
}
}

pub fn typeless_format(format: DXGI_FORMAT) -> Option<DXGI_FORMAT> {
pub fn typeless_format(format: DXGI_FORMAT) -> Option<(DXGI_FORMAT, DXGI_FORMAT)> {
match format {
DXGI_FORMAT_R8G8B8A8_UNORM |
DXGI_FORMAT_R8G8B8A8_SNORM |
DXGI_FORMAT_R8G8B8A8_UINT |
DXGI_FORMAT_R8G8B8A8_SINT |
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some(DXGI_FORMAT_R8G8B8A8_TYPELESS),
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some((DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT)),

// ?`
DXGI_FORMAT_B8G8R8A8_UNORM |
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some(DXGI_FORMAT_B8G8R8A8_TYPELESS),
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some((DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_R8_UNORM |
DXGI_FORMAT_R8_SNORM |
DXGI_FORMAT_R8_UINT |
DXGI_FORMAT_R8_SINT => Some(DXGI_FORMAT_R8_TYPELESS),
DXGI_FORMAT_R8_SINT => Some((DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT)),

DXGI_FORMAT_R8G8_UNORM |
DXGI_FORMAT_R8G8_SNORM |
DXGI_FORMAT_R8G8_UINT |
DXGI_FORMAT_R8G8_SINT => Some(DXGI_FORMAT_R8G8_TYPELESS),
DXGI_FORMAT_R8G8_SINT => Some((DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT)),

DXGI_FORMAT_R16_UNORM |
DXGI_FORMAT_R16_SNORM |
DXGI_FORMAT_R16_UINT |
DXGI_FORMAT_R16_SINT |
DXGI_FORMAT_R16_FLOAT => Some(DXGI_FORMAT_R16_TYPELESS),
DXGI_FORMAT_R16_FLOAT => Some((DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT)),

DXGI_FORMAT_R16G16_UNORM |
DXGI_FORMAT_R16G16_SNORM |
DXGI_FORMAT_R16G16_UINT |
DXGI_FORMAT_R16G16_SINT |
DXGI_FORMAT_R16G16_FLOAT => Some(DXGI_FORMAT_R16G16_TYPELESS),
DXGI_FORMAT_R16G16_FLOAT => Some((DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT)),

DXGI_FORMAT_R16G16B16A16_UNORM |
DXGI_FORMAT_R16G16B16A16_SNORM |
DXGI_FORMAT_R16G16B16A16_UINT |
DXGI_FORMAT_R16G16B16A16_SINT |
DXGI_FORMAT_R16G16B16A16_FLOAT => Some(DXGI_FORMAT_R16G16B16A16_TYPELESS),
DXGI_FORMAT_R16G16B16A16_FLOAT => Some((DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT)),

DXGI_FORMAT_D32_FLOAT |
DXGI_FORMAT_R32_UINT |
DXGI_FORMAT_R32_SINT |
DXGI_FORMAT_R32_FLOAT => Some(DXGI_FORMAT_R32_TYPELESS),
DXGI_FORMAT_R32_FLOAT => Some((DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_R32G32_UINT |
DXGI_FORMAT_R32G32_SINT |
DXGI_FORMAT_R32G32_FLOAT => Some(DXGI_FORMAT_R32G32_TYPELESS),
DXGI_FORMAT_R32G32_FLOAT => Some((DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_UINT)),

DXGI_FORMAT_R32G32B32_UINT |
DXGI_FORMAT_R32G32B32_SINT |
DXGI_FORMAT_R32G32B32_FLOAT => Some(DXGI_FORMAT_R32G32B32_TYPELESS),
DXGI_FORMAT_R32G32B32_FLOAT => Some((DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_UINT)),

DXGI_FORMAT_R32G32B32A32_UINT |
DXGI_FORMAT_R32G32B32A32_SINT |
DXGI_FORMAT_R32G32B32A32_FLOAT => Some(DXGI_FORMAT_R32G32B32A32_TYPELESS),
DXGI_FORMAT_R32G32B32A32_FLOAT => Some((DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_UINT)),

DXGI_FORMAT_BC1_UNORM |
DXGI_FORMAT_BC1_UNORM_SRGB => Some((DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_BC2_UNORM |
DXGI_FORMAT_BC2_UNORM_SRGB => Some((DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_BC3_UNORM |
DXGI_FORMAT_BC3_UNORM_SRGB => Some((DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_BC4_UNORM |
DXGI_FORMAT_BC4_SNORM => Some((DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_BC5_UNORM |
DXGI_FORMAT_BC5_SNORM => Some((DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_R32_UINT)),

DXGI_FORMAT_BC6H_UF16 |
DXGI_FORMAT_BC6H_SF16 => Some((DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_R32_UINT)),

// TODO: srgb craziness
DXGI_FORMAT_BC7_UNORM |
DXGI_FORMAT_BC7_UNORM_SRGB => Some((DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM)),

/*R5g6b5Unorm => DXGI_FORMAT_B5G6R5_UNORM,
R5g5b5a1Unorm => DXGI_FORMAT_B5G5R5A1_UNORM,
Expand Down
Loading

0 comments on commit 9603c6d

Please sign in to comment.