diff --git a/tools/clang/unittests/HLSLExec/LongVectorOps.def b/tools/clang/unittests/HLSLExec/LongVectorOps.def index c9fc281246..d096b784f7 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorOps.def +++ b/tools/clang/unittests/HLSLExec/LongVectorOps.def @@ -212,12 +212,39 @@ OP_DEFAULT_DEFINES(Wave, WaveActiveBitXor, 1, "TestWaveActiveBitXor", "", " -DFU OP_DEFAULT_DEFINES(Wave, WaveActiveAllEqual, 1, "TestWaveActiveAllEqual", "", " -DFUNC_WAVE_ACTIVE_ALL_EQUAL=1") OP_DEFAULT_DEFINES(Wave, WaveReadLaneAt, 1, "TestWaveReadLaneAt", "", " -DFUNC_WAVE_READ_LANE_AT=1") OP_DEFAULT_DEFINES(Wave, WaveReadLaneFirst, 1, "TestWaveReadLaneFirst", "", " -DFUNC_WAVE_READ_LANE_FIRST=1") -OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1") -OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1") +OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1") +OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1") OP(Wave, WaveMultiPrefixSum, 1, "TestWaveMultiPrefixSum", "", " -DFUNC_WAVE_MULTI_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3) OP(Wave, WaveMultiPrefixProduct, 1, "TestWaveMultiPrefixProduct", "", " -DFUNC_WAVE_MULTI_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3) OP(Wave, WaveMultiPrefixBitAnd, 1, "TestWaveMultiPrefixBitAnd", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_AND=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3) OP(Wave, WaveMultiPrefixBitOr, 1, "TestWaveMultiPrefixBitOr", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_OR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3) OP(Wave, WaveMultiPrefixBitXor, 1, "TestWaveMultiPrefixBitXor", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_XOR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3) +#define OP_DERIVATIVE(GROUP, SYMBOL, DERIVATIVE_INTRINSIC) \ + OP(GROUP, SYMBOL, 1, "TestDerivative", "", "-DFUNC_TEST_DERIVATIVE=1 \ + -DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \ + " -DDERIVATIVE_FUNC=" DERIVATIVE_INTRINSIC, \ + "LongVectorOp", Default2, Default1, Default3) + +OP_DERIVATIVE(Derivative, DerivativeDdx, "ddx") +OP_DERIVATIVE(Derivative, DerivativeDdy, "ddy") +OP_DERIVATIVE(Derivative, DerivativeDdxFine, "ddx_fine") +OP_DERIVATIVE(Derivative, DerivativeDdyFine, "ddy_fine") + +#undef OP_DERIVATIVE + +#define OP_QUAD_READ(GROUP, ARITY, SYMBOL, QUAD_INTRINSIC, SOURCE_LANE_ID) \ + OP(GROUP, SYMBOL, ARITY, "TestQuadRead", "", "-DFUNC_TEST_QUAD_READ=1" \ + " -DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \ + " -DQUAD_READ_FUNC=" QUAD_INTRINSIC \ + " -DSOURCE_LANE_ID=" SOURCE_LANE_ID, \ + "LongVectorOp", Default1, Default2, Default3) + +OP_QUAD_READ(Quad, 2, QuadReadLaneAt, "QuadReadLaneAt", "2") +OP_QUAD_READ(Quad, 1, QuadReadAcrossX, "QuadReadAcrossX", "2") +OP_QUAD_READ(Quad, 1, QuadReadAcrossY, "QuadReadAcrossY", "1") +OP_QUAD_READ(Quad, 1, QuadReadAcrossDiagonal, "QuadReadAcrossDiagonal", "0") + +#undef OP_QUAD_READ + #undef OP diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index b646b6a4b9..c66b23a0fa 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1339,6 +1339,55 @@ template struct ExpectedBuilder { } }; +// +// Derivative Ops +// + +// Coarse derivatives (ddx/ddy): All lanes in quad get same result +// Fine derivatives (ddx_fine/ddy_fine): Each lane gets unique result +// For testing, we validate results on lane 3 to keep validation generic +// +// The value of A in each lane is computed by : A = A + LaneID*2 +// +// Top right (lane 1) - Top Left (lane 0) +DEFAULT_OP_1(OpType::DerivativeDdx, ((A + 2) - (A + 0))); +// Lower left (lane 2) - Top Left (lane 0) +DEFAULT_OP_1(OpType::DerivativeDdy, ((A + 4) - (A + 0))); + +// Bottom right (lane 3) - Bottom left (lane 2) +DEFAULT_OP_1(OpType::DerivativeDdxFine, ((A + 6) - (A + 4))); +// Bottom right (lane 3) - Top right (lane 1) +DEFAULT_OP_1(OpType::DerivativeDdyFine, ((A + 6) - (A + 2))); + +// +// Quad Read Ops +// + +// We keep things generic so we can re-use this macro for all quad ops. +// The lane we write to is determined via a defines in the shader code. +// See TestQuadRead in ShaderOpArith.xml. +// For all cases we simply fill the vector on that lane with the value of the +// third element. +#define QUAD_READ_OP(OP, ARITY) \ + template struct Op : DefaultValidation {}; \ + template struct ExpectedBuilder { \ + static std::vector buildExpected(Op &, \ + const InputSets &Inputs) { \ + DXASSERT_NOMSG(Inputs.size() == ARITY); \ + std::vector Expected; \ + const size_t VectorSize = Inputs[0].size(); \ + Expected.assign(VectorSize, Inputs[0][2]); \ + return Expected; \ + } \ + }; + +QUAD_READ_OP(OpType::QuadReadLaneAt, 2); +QUAD_READ_OP(OpType::QuadReadAcrossX, 1); +QUAD_READ_OP(OpType::QuadReadAcrossY, 1); +QUAD_READ_OP(OpType::QuadReadAcrossDiagonal, 1); + +#undef QUAD_READ_OP + // // Wave Ops // @@ -1658,7 +1707,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, const std::string AdditionalCompilerOptions = "-DWAVE_SIZE=" + std::to_string(WaveSize) + - " -DNUMTHREADS_X=" + std::to_string(WaveSize); + " -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 "; for (size_t VectorSize : InputVectorSizes) { std::vector> Inputs = @@ -2458,6 +2507,60 @@ class DxilConf_SM69_Vectorized { HLK_TEST(LoadAndStore_RD_SB_SRV, double); HLK_TEST(LoadAndStore_RD_SB_UAV, double); + // Derivative + HLK_TEST(DerivativeDdx, HLSLHalf_t); + HLK_TEST(DerivativeDdy, HLSLHalf_t); + HLK_TEST(DerivativeDdxFine, HLSLHalf_t); + HLK_TEST(DerivativeDdyFine, HLSLHalf_t); + HLK_TEST(DerivativeDdx, float); + HLK_TEST(DerivativeDdy, float); + HLK_TEST(DerivativeDdxFine, float); + HLK_TEST(DerivativeDdyFine, float); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLBool_t); + HLK_TEST(QuadReadAcrossX, HLSLBool_t); + HLK_TEST(QuadReadAcrossY, HLSLBool_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLBool_t); + HLK_TEST(QuadReadLaneAt, int16_t); + HLK_TEST(QuadReadAcrossX, int16_t); + HLK_TEST(QuadReadAcrossY, int16_t); + HLK_TEST(QuadReadAcrossDiagonal, int16_t); + HLK_TEST(QuadReadLaneAt, int32_t); + HLK_TEST(QuadReadAcrossX, int32_t); + HLK_TEST(QuadReadAcrossY, int32_t); + HLK_TEST(QuadReadAcrossDiagonal, int32_t); + HLK_TEST(QuadReadLaneAt, int64_t); + HLK_TEST(QuadReadAcrossX, int64_t); + HLK_TEST(QuadReadAcrossY, int64_t); + HLK_TEST(QuadReadAcrossDiagonal, int64_t); + HLK_TEST(QuadReadLaneAt, uint16_t); + HLK_TEST(QuadReadAcrossX, uint16_t); + HLK_TEST(QuadReadAcrossY, uint16_t); + HLK_TEST(QuadReadAcrossDiagonal, uint16_t); + HLK_TEST(QuadReadLaneAt, uint32_t); + HLK_TEST(QuadReadAcrossX, uint32_t); + HLK_TEST(QuadReadAcrossY, uint32_t); + HLK_TEST(QuadReadAcrossDiagonal, uint32_t); + HLK_TEST(QuadReadLaneAt, uint64_t); + HLK_TEST(QuadReadAcrossX, uint64_t); + HLK_TEST(QuadReadAcrossY, uint64_t); + HLK_TEST(QuadReadAcrossDiagonal, uint64_t); + HLK_TEST(QuadReadLaneAt, HLSLHalf_t); + HLK_TEST(QuadReadAcrossX, HLSLHalf_t); + HLK_TEST(QuadReadAcrossY, HLSLHalf_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLHalf_t); + HLK_TEST(QuadReadLaneAt, float); + HLK_TEST(QuadReadAcrossX, float); + HLK_TEST(QuadReadAcrossY, float); + HLK_TEST(QuadReadAcrossDiagonal, float); + HLK_TEST(QuadReadLaneAt, double); + HLK_TEST(QuadReadAcrossX, double); + HLK_TEST(QuadReadAcrossY, double); + HLK_TEST(QuadReadAcrossDiagonal, double); + + // Wave + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLBool_t); HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLBool_t); HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLBool_t); diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 4bac1dddd1..4fd022ef18 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4437,8 +4437,67 @@ void MSMain(uint GID : SV_GroupIndex, } #endif - #ifdef NUMTHREADS_X - #define NUMTHREADS_ATTR [numthreads(NUMTHREADS_X, 1, 1)] + #ifdef FUNC_TEST_DERIVATIVE + void TestDerivative(vector Vector) + { + // 0 == upper-left lane in quad + // 1 == upper-right lane in quad + // 2 == lower-left lane in quad + // 3 == lower-right lane in quad + + const uint LaneIndex = WaveGetLaneIndex(); + + // We need to make sure the values are unique across lanes used in the + // partial derivative calculation so we can get a non-zero partial + // derivative. Multiplying the lane index by 2 is a simple way to + // ensure that. And we do this on all lanes so this function can be + // used generically for coarse and fine partial derivatives. + Vector += ((TYPE)(LaneIndex * 2)); + + vector Result = DERIVATIVE_FUNC(Vector); + + // For coarse derivatives, all lanes in the quad get the same result. + // But for fine derivatives, each lane gets a different result. To + // keep things generic we only store in the third lane as thats the + // lane we arbitrarily chose for validation with fine derivatives. + if(LaneIndex == 3) + { + g_OutputVector.Store< vector >(0, Result); + } + } + #endif + + #ifdef FUNC_TEST_QUAD_READ + void TestQuadRead(vector Vector) + { + const uint LaneIndex = WaveGetLaneIndex(); + + // Fill the long vector with something different on SOURCE_LANE_ID. + // We choose the 3rd element arbitrarily because it makes it easy + // to compute expected values CPU side. + [unroll] + for(uint i = 0; i < NUM; ++i) + { + Vector[i] = (LaneIndex == SOURCE_LANE_ID) ? Vector[2] : Vector[i]; + } + + #if IS_BINARY_OP + // QuadReadLaneAt + vector Result = QUAD_READ_FUNC(Vector, SOURCE_LANE_ID); + #else + // QuadReadAcross* + vector Result = QUAD_READ_FUNC(Vector); + #endif + + if(LaneIndex == 3) + { + g_OutputVector.Store< vector >(0, Result); + } + } + #endif + + #ifdef NUMTHREADS_XYZ + #define NUMTHREADS_ATTR [numthreads(NUMTHREADS_XYZ)] #else #define NUMTHREADS_ATTR [numthreads(1, 1, 1)] #endif @@ -4479,8 +4538,7 @@ void MSMain(uint GID : SV_GroupIndex, #endif vector OutputVector; - #ifdef IS_WAVE_PREFIX_OP - // Wave prefix ops store the output on a specific lane only. + #ifdef OP_STORES_RESULT_ON_SPECIFIC_LANE FUNC(Input1); return; #elif TEST_ARRAY_OPERATOR