diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 2aa3068faed..87f98adaf32 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -24170,6 +24170,7 @@ struct CoopVec : IArray, IArithmeti // /// Store all elements of this CoopVec into a buffer at a specified offset. + /// Pointer accesses are 16-byte aligned. /// @param buffer The destination buffer to store the values into. /// @param byteOffset16ByteAligned The byte offset from the start of the buffer where the data will be stored. Must be 16-byte aligned. [require(cooperative_vector)] @@ -24216,6 +24217,18 @@ struct CoopVec : IArray, IArithmeti } } + [ForceInline] + [require(spirv, cooperative_vector)] + void store(T* buffer, int32_t byteOffset16ByteAligned = 0) + { + let pointer = Ptr(buffer); + let alignment = 16; + return spirv_asm + { + OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned !alignment; + }; + } + [ForceInline] [require(cooperative_vector)] [require(hlsl_coopvec_poc)] @@ -24269,6 +24282,7 @@ struct CoopVec : IArray, IArithmeti } /// Load values from a byte-addressable buffer into a cooperative vector. + /// Pointer accesses are 16-byte aligned. /// @param buffer The source buffer to load data from. /// @param byteOffset16ByteAligned The byte offset from the start of the buffer. Must be 16-byte aligned. /// @return A new cooperative vector containing the loaded values. @@ -24368,6 +24382,19 @@ struct CoopVec : IArray, IArithmeti } } + [ForceInline] + [__NoSideEffect] + [require(spirv, cooperative_vector)] + static CoopVec load(T* buffer, int32_t byteOffset16ByteAligned = 0) + { + let pointer = Ptr(buffer); + let alignment = 16; + return spirv_asm + { + result:$$CoopVec = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned !alignment; + }; + } + // Groupshared [ForceInline] [__NoSideEffect] @@ -25736,6 +25763,13 @@ CoopVec coopVecLoad(RWStructured return CoopVec.load(buffer, byteOffset16ByteAligned); } +[ForceInline] +[require(spirv, cooperative_vector)] +CoopVec coopVecLoad(T* buffer, int32_t byteOffset16ByteAligned = 0) +{ + return CoopVec.load(buffer, byteOffset16ByteAligned); +} + // Groupshared [ForceInline] [require(cooperative_vector)] diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 5dfa1c76c3c..ba238985b1d 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -2069,10 +2069,25 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex inst->getOp() == kIROp_ArrayType ? emitOpTypeArray(inst, elementType, irArrayType->getElementCount()) : emitOpTypeRuntimeArray(inst, elementType); - auto strideInst = irArrayType->getArrayStride(); - if (strideInst && shouldEmitArrayStride(irArrayType->getElementType())) + if (shouldEmitArrayStride(irArrayType->getElementType())) { - int stride = (int)getIntVal(strideInst); + auto stride = 0; + if (auto strideInst = irArrayType->getArrayStride()) + { + stride = (int)getIntVal(strideInst); + } + else + { + // Stride may not have been calculated for basic element types. Calculate it + // here. + IRSizeAndAlignment sizeAndAlignment; + getNaturalSizeAndAlignment( + m_targetProgram->getOptionSet(), + elementType, + &sizeAndAlignment); + stride = (int)sizeAndAlignment.getStride(); + } + emitOpDecorateArrayStride( getSection(SpvLogicalSectionID::Annotations), nullptr, diff --git a/tests/cooperative-vector/load-store-pointer.slang b/tests/cooperative-vector/load-store-pointer.slang new file mode 100644 index 00000000000..7d7b2b6c2d7 --- /dev/null +++ b/tests/cooperative-vector/load-store-pointer.slang @@ -0,0 +1,32 @@ +//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -emit-spirv-directly + +//TEST_INPUT: set inputBuffer = ubuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12], stride=4); +uniform int32_t* inputBuffer; + +//TEST_INPUT: set outputBuffer = out ubuffer(data=[0 0 0 0 0 0 0 0], stride=4); +uniform int32_t* outputBuffer; + +// CHECK: 9 +// CHECK-NEXT: A +// CHECK-NEXT: B +// CHECK-NEXT: C +// CHECK-NEXT: 1 +// CHECK-NEXT: 2 +// CHECK-NEXT: 3 +// CHECK-NEXT: 4 + +[shader("compute")] +[numthreads(1, 1, 1)] +void computeMain() +{ + // First half of input. + let a = coopVecLoad<4, int32_t>(inputBuffer, 0); + // Second half of input. + let b = coopVecLoad<4, int32_t>(inputBuffer + 4, 4*4); + + // Store second half of input to first half of output buffer. + b.store(outputBuffer, 0); + // Store first half of input to second half of output buffer. + a.store(outputBuffer, 4*4); +} +