-
Notifications
You must be signed in to change notification settings - Fork 546
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[dx11] add memory flush/invalidate & image/buffer copies #2149
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,200 @@ | ||
cbuffer BufferImageCopy : register(b0) { | ||
uint2 BufferSize; | ||
uint2 ImageOffset; | ||
struct BufferCopy { | ||
uint4 SrcDst; | ||
}; | ||
|
||
StructuredBuffer<uint> CopySrc : register(t0); | ||
RWTexture2D<uint> CopyDst : register(u0); | ||
struct ImageCopy { | ||
uint4 Src; | ||
uint4 Dst; | ||
}; | ||
|
||
struct BufferImageCopy { | ||
// x=offset, yz=size | ||
uint4 BufferVars; | ||
uint4 ImageOffset; | ||
uint4 ImageExtent; | ||
}; | ||
|
||
cbuffer CopyConstants : register(b0) { | ||
BufferCopy BufferCopies; | ||
ImageCopy ImageCopies; | ||
BufferImageCopy BufferImageCopies; | ||
}; | ||
|
||
uint2 GetImageDst(uint3 dispatch_thread_id) | ||
{ | ||
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; | ||
} | ||
|
||
uint2 GetImageSrc(uint3 dispatch_thread_id) | ||
{ | ||
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; | ||
} | ||
|
||
uint GetBufferDst(uint3 dispatch_thread_id) | ||
{ | ||
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; | ||
} | ||
|
||
uint GetBufferSrc(uint3 dispatch_thread_id) | ||
{ | ||
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; | ||
} | ||
|
||
uint Uint4ToUint(uint4 data) | ||
{ | ||
data.x = min(data.x, 0x000000ff); | ||
data.y = min(data.y, 0x000000ff); | ||
data.z = min(data.z, 0x000000ff); | ||
data.w = min(data.w, 0x000000ff); | ||
|
||
uint output = (data.x | | ||
(data.y << 8) | | ||
(data.z << 16) | | ||
(data.w << 24)); | ||
|
||
return output; | ||
} | ||
|
||
uint4 UintToUint4(uint data) | ||
{ | ||
return uint4((data & 0xff000000) >> 24, (data & 0xff0000) >> 16, (data & 0xff00) >> 8, data & 0xff); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe even simpler: return ((data >> uint4(24, 16, 8, 0)) & 0xFF; |
||
} | ||
|
||
uint2 UintToUint2(uint data) | ||
{ | ||
return uint2((data >> 16) & 0x0000ffff, data & 0x0000ffff); | ||
} | ||
|
||
uint Uint2ToUint(uint2 data) | ||
{ | ||
data.x = min(data.x, 0x0000ffff); | ||
data.y = min(data.y, 0x0000ffff); | ||
|
||
uint output = (data.x | | ||
(data.y << 16)); | ||
|
||
return output; | ||
} | ||
|
||
// Buffers are always R32-aligned | ||
StructuredBuffer<uint> BufferCopySrc : register(t0); | ||
RWBuffer<uint> BufferCopyDst: register(u0); | ||
|
||
// R32 | ||
Texture2D<uint> ImageCopySrcR32 : register(t0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should totally share these definitions between different bit sizes (R16/R32/R8/etc) |
||
RWTexture2D<uint> ImageCopyDstR32 : register(u0); | ||
|
||
// TODO: correct, but slow | ||
[numthreads(1, 1, 1)] | ||
void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint2 dst_idx = GetImageDst(dispatch_thread_id); | ||
uint src_idx = GetBufferSrc(dispatch_thread_id); | ||
|
||
ImageCopyDstR32[dst_idx] = BufferCopySrc[src_idx]; | ||
} | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint dst_idx = GetBufferDst(dispatch_thread_id); | ||
uint2 src_idx = GetImageSrc(dispatch_thread_id); | ||
|
||
BufferCopyDst[dst_idx] = ImageCopySrcR32[src_idx]; | ||
} | ||
|
||
// R16G16 | ||
Texture2D<uint2> ImageCopySrcR16G16 : register(t0); | ||
RWTexture2D<uint2> ImageCopyDstR16G16 : register(u0); | ||
|
||
// TODO: correct, but slow | ||
[numthreads(1, 1, 1)] | ||
void cs_copy_buffer_image_2d(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint2 idx = ImageOffset + dispatch_thread_id.xy; | ||
void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint2 dst_idx = GetImageDst(dispatch_thread_id); | ||
uint src_idx = GetBufferSrc(dispatch_thread_id); | ||
|
||
ImageCopyDstR16G16[dst_idx] = UintToUint2(BufferCopySrc[src_idx]); | ||
} | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint dst_idx = GetBufferDst(dispatch_thread_id); | ||
uint2 src_idx = GetImageSrc(dispatch_thread_id); | ||
|
||
BufferCopyDst[dst_idx] = Uint2ToUint(ImageCopySrcR16G16[src_idx].yx); | ||
} | ||
|
||
// R16 | ||
Texture2D<uint> ImageCopySrcR16 : register(t0); | ||
RWTexture2D<uint> ImageCopyDstR16 : register(u0); | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why isn't |
||
|
||
uint2 data = UintToUint2(BufferCopySrc[src_idx]); | ||
|
||
ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.y; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure I understand this. The image is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
0: https://msdn.microsoft.com/en-us/library/windows/desktop/mt427455(v=vs.85).aspx |
||
ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.x; | ||
} | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; | ||
|
||
uint upper = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; | ||
uint lower = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; | ||
uint data = Uint2ToUint(uint2(upper, lower)); | ||
|
||
BufferCopyDst[dst_idx] = data; | ||
} | ||
|
||
// R8G8 | ||
Texture2D<uint2> ImageCopySrcR8G8 : register(t0); | ||
RWTexture2D<uint2> ImageCopyDstR8G8 : register(u0); | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; | ||
|
||
uint4 data = UintToUint4(BufferCopySrc[src_idx]); | ||
|
||
ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.xy; | ||
ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.zw; | ||
} | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; | ||
|
||
uint2 lower = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))].yx; | ||
uint2 upper = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))].yx; | ||
uint data = Uint4ToUint(uint4(upper.x, upper.y, lower.x, lower.y)); | ||
|
||
BufferCopyDst[dst_idx] = data; | ||
} | ||
|
||
// R8 | ||
Texture2D<uint> ImageCopySrcR8 : register(t0); | ||
RWTexture2D<uint> ImageCopyDstR8 : register(u0); | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; | ||
uint4 data = UintToUint4(BufferCopySrc[src_idx]); | ||
|
||
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.w; | ||
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.z; | ||
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))] = data.y; | ||
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))] = data.x; | ||
} | ||
|
||
[numthreads(1, 1, 1)] | ||
void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { | ||
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; | ||
|
||
uint src_1 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; | ||
uint src_2 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; | ||
uint src_3 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))]; | ||
uint src_4 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))]; | ||
|
||
CopyDst[idx] = CopySrc[BufferSize.x + idx.x + idx.y * BufferSize.y]; | ||
BufferCopyDst[dst_idx] = Uint4ToUint(uint4(src_1, src_2, src_3, src_4)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,62 +24,84 @@ pub fn map_index_type(ty: IndexType) -> DXGI_FORMAT { | |
} | ||
} | ||
|
||
pub fn typeless_format(format: DXGI_FORMAT) -> Option<DXGI_FORMAT> { | ||
pub fn typeless_format(format: DXGI_FORMAT) -> Option<(DXGI_FORMAT, DXGI_FORMAT)> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the return value semantics now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
match format { | ||
DXGI_FORMAT_R8G8B8A8_UNORM | | ||
DXGI_FORMAT_R8G8B8A8_SNORM | | ||
DXGI_FORMAT_R8G8B8A8_UINT | | ||
DXGI_FORMAT_R8G8B8A8_SINT | | ||
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some(DXGI_FORMAT_R8G8B8A8_TYPELESS), | ||
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some((DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT)), | ||
|
||
// ?` | ||
DXGI_FORMAT_B8G8R8A8_UNORM | | ||
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some(DXGI_FORMAT_B8G8R8A8_TYPELESS), | ||
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some((DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_R8_UNORM | | ||
DXGI_FORMAT_R8_SNORM | | ||
DXGI_FORMAT_R8_UINT | | ||
DXGI_FORMAT_R8_SINT => Some(DXGI_FORMAT_R8_TYPELESS), | ||
DXGI_FORMAT_R8_SINT => Some((DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT)), | ||
|
||
DXGI_FORMAT_R8G8_UNORM | | ||
DXGI_FORMAT_R8G8_SNORM | | ||
DXGI_FORMAT_R8G8_UINT | | ||
DXGI_FORMAT_R8G8_SINT => Some(DXGI_FORMAT_R8G8_TYPELESS), | ||
DXGI_FORMAT_R8G8_SINT => Some((DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT)), | ||
|
||
DXGI_FORMAT_R16_UNORM | | ||
DXGI_FORMAT_R16_SNORM | | ||
DXGI_FORMAT_R16_UINT | | ||
DXGI_FORMAT_R16_SINT | | ||
DXGI_FORMAT_R16_FLOAT => Some(DXGI_FORMAT_R16_TYPELESS), | ||
DXGI_FORMAT_R16_FLOAT => Some((DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT)), | ||
|
||
DXGI_FORMAT_R16G16_UNORM | | ||
DXGI_FORMAT_R16G16_SNORM | | ||
DXGI_FORMAT_R16G16_UINT | | ||
DXGI_FORMAT_R16G16_SINT | | ||
DXGI_FORMAT_R16G16_FLOAT => Some(DXGI_FORMAT_R16G16_TYPELESS), | ||
DXGI_FORMAT_R16G16_FLOAT => Some((DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT)), | ||
|
||
DXGI_FORMAT_R16G16B16A16_UNORM | | ||
DXGI_FORMAT_R16G16B16A16_SNORM | | ||
DXGI_FORMAT_R16G16B16A16_UINT | | ||
DXGI_FORMAT_R16G16B16A16_SINT | | ||
DXGI_FORMAT_R16G16B16A16_FLOAT => Some(DXGI_FORMAT_R16G16B16A16_TYPELESS), | ||
DXGI_FORMAT_R16G16B16A16_FLOAT => Some((DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT)), | ||
|
||
DXGI_FORMAT_D32_FLOAT | | ||
DXGI_FORMAT_R32_UINT | | ||
DXGI_FORMAT_R32_SINT | | ||
DXGI_FORMAT_R32_FLOAT => Some(DXGI_FORMAT_R32_TYPELESS), | ||
DXGI_FORMAT_R32_FLOAT => Some((DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_R32G32_UINT | | ||
DXGI_FORMAT_R32G32_SINT | | ||
DXGI_FORMAT_R32G32_FLOAT => Some(DXGI_FORMAT_R32G32_TYPELESS), | ||
DXGI_FORMAT_R32G32_FLOAT => Some((DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_UINT)), | ||
|
||
DXGI_FORMAT_R32G32B32_UINT | | ||
DXGI_FORMAT_R32G32B32_SINT | | ||
DXGI_FORMAT_R32G32B32_FLOAT => Some(DXGI_FORMAT_R32G32B32_TYPELESS), | ||
DXGI_FORMAT_R32G32B32_FLOAT => Some((DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_UINT)), | ||
|
||
DXGI_FORMAT_R32G32B32A32_UINT | | ||
DXGI_FORMAT_R32G32B32A32_SINT | | ||
DXGI_FORMAT_R32G32B32A32_FLOAT => Some(DXGI_FORMAT_R32G32B32A32_TYPELESS), | ||
DXGI_FORMAT_R32G32B32A32_FLOAT => Some((DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_UINT)), | ||
|
||
DXGI_FORMAT_BC1_UNORM | | ||
DXGI_FORMAT_BC1_UNORM_SRGB => Some((DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_BC2_UNORM | | ||
DXGI_FORMAT_BC2_UNORM_SRGB => Some((DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_BC3_UNORM | | ||
DXGI_FORMAT_BC3_UNORM_SRGB => Some((DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_BC4_UNORM | | ||
DXGI_FORMAT_BC4_SNORM => Some((DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_BC5_UNORM | | ||
DXGI_FORMAT_BC5_SNORM => Some((DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
DXGI_FORMAT_BC6H_UF16 | | ||
DXGI_FORMAT_BC6H_SF16 => Some((DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_R32_UINT)), | ||
|
||
// TODO: srgb craziness | ||
DXGI_FORMAT_BC7_UNORM | | ||
DXGI_FORMAT_BC7_UNORM_SRGB => Some((DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM)), | ||
|
||
/*R5g6b5Unorm => DXGI_FORMAT_B5G6R5_UNORM, | ||
R5g5b5a1Unorm => DXGI_FORMAT_B5G5R5A1_UNORM, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: could probably vectorize this better, e.g.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Beautiful! 😄