shader-slang · fairywreath · May 23, 2025 · Apr 15, 2025 · Apr 17, 2025 · Apr 17, 2025
@@ -24170,6 +24170,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
     //
 
     /// Store all elements of this CoopVec into a buffer at a specified offset.
+    /// Pointer accesses are 16-byte aligned.
     /// @param buffer The destination buffer to store the values into.
     /// @param byteOffset16ByteAligned The byte offset from the start of the buffer where the data will be stored. Must be 16-byte aligned.
     [require(cooperative_vector)]
@@ -24216,6 +24217,18 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
         }
     }
 
+    [ForceInline]
+    [require(spirv, cooperative_vector)]
+    void store(T* buffer, int32_t byteOffset16ByteAligned = 0)
+    {
+        let pointer = Ptr<T[]>(buffer);
+        let alignment = 16;
+        return spirv_asm
+        {
+            OpCooperativeVectorStoreNV $pointer $byteOffset16ByteAligned $this Aligned !alignment;
+        };
+    }
+
     [ForceInline]
     [require(cooperative_vector)]
     [require(hlsl_coopvec_poc)]
@@ -24269,6 +24282,7 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
     }
 
     /// Load values from a byte-addressable buffer into a cooperative vector.
+    /// Pointer accesses are 16-byte aligned.
     /// @param buffer The source buffer to load data from.
     /// @param byteOffset16ByteAligned The byte offset from the start of the buffer. Must be 16-byte aligned.
     /// @return A new cooperative vector containing the loaded values.
@@ -24368,6 +24382,19 @@ struct CoopVec<T : __BuiltinArithmeticType, let N : int> : IArray<T>, IArithmeti
         }
     }
 
+    [ForceInline]
+    [__NoSideEffect]
+    [require(spirv, cooperative_vector)]
+    static CoopVec<T, N> load(T* buffer, int32_t byteOffset16ByteAligned = 0)
+    {
+        let pointer = Ptr<T[]>(buffer);
+        let alignment = 16;
+        return spirv_asm
+        {
+            result:$$CoopVec<T, N> = OpCooperativeVectorLoadNV $pointer $byteOffset16ByteAligned Aligned !alignment;
+        };
+    }
+
     // Groupshared
     [ForceInline]
     [__NoSideEffect]
@@ -25736,6 +25763,13 @@ CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(RWStructured
     return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned);
 }
 
+[ForceInline]
+[require(spirv, cooperative_vector)]
+CoopVec<T, N> coopVecLoad<let N : int, T : __BuiltinArithmeticType>(T* buffer, int32_t byteOffset16ByteAligned = 0)
+{
+    return CoopVec<T, N>.load(buffer, byteOffset16ByteAligned);
+}
+
 // Groupshared
 [ForceInline]
 [require(cooperative_vector)]

@@ -2069,10 +2069,25 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
                     inst->getOp() == kIROp_ArrayType
                         ? emitOpTypeArray(inst, elementType, irArrayType->getElementCount())
                         : emitOpTypeRuntimeArray(inst, elementType);
-                auto strideInst = irArrayType->getArrayStride();
-                if (strideInst && shouldEmitArrayStride(irArrayType->getElementType()))
+                if (shouldEmitArrayStride(irArrayType->getElementType()))
                 {
-                    int stride = (int)getIntVal(strideInst);
+                    auto stride = 0;
+                    if (auto strideInst = irArrayType->getArrayStride())
+                    {
+                        stride = (int)getIntVal(strideInst);
+                    }
+                    else
+                    {
+                        // Stride may not have been calculated for basic element types. Calculate it
+                        // here.
+                        IRSizeAndAlignment sizeAndAlignment;
+                        getNaturalSizeAndAlignment(
+                            m_targetProgram->getOptionSet(),
+                            elementType,
+                            &sizeAndAlignment);
+                        stride = (int)sizeAndAlignment.getStride();
+                    }
+
                     emitOpDecorateArrayStride(
                         getSection(SpvLogicalSectionID::Annotations),
                         nullptr,

@@ -0,0 +1,32 @@
+//TEST(compute):COMPARE_COMPUTE(filecheck-buffer=CHECK):-vk -render-feature cooperative-vector -emit-spirv-directly
+
+//TEST_INPUT: set inputBuffer = ubuffer(data=[1 2 3 4 5 6 7 8 9 10 11 12], stride=4);
+uniform int32_t* inputBuffer;
+
+//TEST_INPUT: set outputBuffer = out ubuffer(data=[0 0 0 0 0 0 0 0], stride=4);
+uniform int32_t* outputBuffer;
+
+// CHECK: 9
+// CHECK-NEXT: A
+// CHECK-NEXT: B
+// CHECK-NEXT: C
+// CHECK-NEXT: 1
+// CHECK-NEXT: 2
+// CHECK-NEXT: 3
+// CHECK-NEXT: 4
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void computeMain()
+{
+    // First half of input.
+    let a = coopVecLoad<4, int32_t>(inputBuffer, 0);
+    // Second half of input.
+    let b = coopVecLoad<4, int32_t>(inputBuffer + 4, 4*4);
+
+    // Store second half of input to first half of output buffer.
+    b.store(outputBuffer, 0);
+    // Store first half of input to second half of output buffer.
+    a.store(outputBuffer, 4*4);
+}
+