From e89f1f3cdfbeddb9672ac764766d6b07c4b1ff0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Mon, 8 Dec 2025 14:38:11 -0800 Subject: [PATCH 01/14] modify test logic to test value changed in different waves and vector positions --- .../clang/unittests/HLSLExec/LongVectors.cpp | 63 ++++++++++++++----- .../unittests/HLSLExec/ShaderOpArith.xml | 21 +++++-- 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 58507d569a..eb64c45139 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1597,9 +1597,11 @@ template struct ExpectedBuilder { static std::vector buildExpected(Op &, const InputSets &, const UINT WaveSize) { - // For this test, the shader arranges it so that lane 0 is different from - // all the other lanes. Besides that all other lines write their result of - // WaveMatch as well. + // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and + // WAVE_SIZE-1 are different from all the other lanes, also those + // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1 + // respectively, if the input vector has enough elements. Besides that all + // other lanes write their result of WaveMatch as well. std::vector Expected; Expected.assign(WaveSize * 4, 0); @@ -1613,21 +1615,52 @@ template struct ExpectedBuilder { const uint64_t HighWaveMask = (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; - const uint64_t LowExpected = ~1ULL & LowWaveMask; - const uint64_t HighExpected = ~0ULL & HighWaveMask; + const UINT MidBit = WaveSize / 2; + const UINT LastBit = WaveSize - 1; - Expected[0] = 1; - Expected[1] = 0; - Expected[2] = 0; - Expected[3] = 0; + uint64_t LowUnchangedLanes = ~1ULL; // Clear bit 0 + uint64_t HighUnchangedLanes = ~0ULL; - // all lanes other than the first one have the same result - for (UINT I = 1; I < WaveSize; ++I) { + if (MidBit < 64) + LowUnchangedLanes &= ~(1ULL << MidBit); + else + HighUnchangedLanes &= ~(1ULL << (MidBit - 64)); + + if (LastBit < 64) + LowUnchangedLanes &= ~(1ULL << LastBit); + else + HighUnchangedLanes &= ~(1ULL << (LastBit - 64)); + + // Removing bits outside the wave size. + LowUnchangedLanes &= LowWaveMask; + HighUnchangedLanes &= HighWaveMask; + + for (UINT I = 0; I < WaveSize; ++I) { const UINT Index = I * 4; - Expected[Index] = static_cast(LowExpected); - Expected[Index + 1] = static_cast(LowExpected >> 32); - Expected[Index + 2] = static_cast(HighExpected); - Expected[Index + 3] = static_cast(HighExpected >> 32); + + if (I == 0 || MidBit == I || LastBit == I) { + uint64_t LowChangedLanes = 0ULL; + uint64_t HighChangedLanes = 0ULL; + + if (I < 64) + LowChangedLanes = (1ULL << I); + else + HighChangedLanes = (1ULL << (I - 64)); + + LowChangedLanes &= LowWaveMask; + HighChangedLanes &= HighWaveMask; + + Expected[Index] = static_cast(LowChangedLanes); + Expected[Index + 1] = static_cast(LowChangedLanes >> 32); + Expected[Index + 2] = static_cast(HighChangedLanes); + Expected[Index + 3] = static_cast(HighChangedLanes >> 32); + continue; + } + + Expected[Index] = static_cast(LowUnchangedLanes); + Expected[Index + 1] = static_cast(LowUnchangedLanes >> 32); + Expected[Index + 2] = static_cast(HighUnchangedLanes); + Expected[Index + 3] = static_cast(HighUnchangedLanes >> 32); } return Expected; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 2cfeb1f225..6956f9c191 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4408,15 +4408,24 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_WAVE_MATCH void TestWaveMatch(vector Vector) { - if(WaveGetLaneIndex() == 0) + uint LaneIndex = WaveGetLaneIndex(); + bool ShouldModify = ( LaneIndex == 0 || + LaneIndex == (WAVE_SIZE / 2) || + LaneIndex == (WAVE_SIZE - 1)); + + if(ShouldModify && LaneIndex < NUM) { - if(Vector[0] == (TYPE)0) - Vector[0] = (TYPE) 1; - else if(Vector[0] == (TYPE)1) - Vector[0] = (TYPE) 0; + if(Vector[LaneIndex] == (TYPE) 0) + Vector[LaneIndex] = (TYPE) 1; + else if(Vector[LaneIndex] == (TYPE) 1) + Vector[LaneIndex] = (TYPE) 0; else - Vector[0] = (TYPE) 1; + Vector[LaneIndex] = (TYPE) 1; } + + // Making sure all lanes finish updating their vectors. + AllMemoryBarrierWithGroupSync(); + uint4 result = WaveMatch(Vector); uint index = WaveGetLaneIndex(); From 343df5a7c5ec79826095bbd367aea8ebed6b17bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Mon, 8 Dec 2025 18:31:59 -0800 Subject: [PATCH 02/14] add Wave struct --- .../clang/unittests/HLSLExec/LongVectors.cpp | 89 ++++++++++--------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index eb64c45139..158e4da395 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1593,6 +1593,38 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; +// Helper struct to build the expected result for WaveMatch tests. +struct Wave { + uint64_t LowWaveMask; + uint64_t HighWaveMask; + + uint64_t LowBits; + uint64_t HighBits; + + Wave(CONST UINT NumWaves, uint64_t LB, uint64_t HB) { + const UINT LowWaves = std::min(64U, NumWaves); + const UINT HighWaves = NumWaves - LowWaves; + LowWaveMask = (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; + HighWaveMask = (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; + LowBits = LB & LowWaveMask; + HighBits = HB & HighWaveMask; + } + + void SetLane(UINT LaneID) { + if (LaneID < 64) + LowBits |= (1ULL << LaneID) & LowWaveMask; + else + HighBits |= (1ULL << (LaneID - 64)) & HighWaveMask; + } + + void ClearLane(UINT LaneID) { + if (LaneID < 64) + LowBits &= ~(1ULL << LaneID) & LowWaveMask; + else + HighBits &= ~(1ULL << (LaneID - 64)) & HighWaveMask; + } +}; + template struct ExpectedBuilder { static std::vector buildExpected(Op &, const InputSets &, @@ -1606,61 +1638,32 @@ template struct ExpectedBuilder { std::vector Expected; Expected.assign(WaveSize * 4, 0); - const UINT LowWaves = std::min(64U, WaveSize); - const UINT HighWaves = WaveSize - LowWaves; - - const uint64_t LowWaveMask = - (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; - - const uint64_t HighWaveMask = - (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; - const UINT MidBit = WaveSize / 2; const UINT LastBit = WaveSize - 1; - uint64_t LowUnchangedLanes = ~1ULL; // Clear bit 0 - uint64_t HighUnchangedLanes = ~0ULL; - - if (MidBit < 64) - LowUnchangedLanes &= ~(1ULL << MidBit); - else - HighUnchangedLanes &= ~(1ULL << (MidBit - 64)); - - if (LastBit < 64) - LowUnchangedLanes &= ~(1ULL << LastBit); - else - HighUnchangedLanes &= ~(1ULL << (LastBit - 64)); - - // Removing bits outside the wave size. - LowUnchangedLanes &= LowWaveMask; - HighUnchangedLanes &= HighWaveMask; + Wave UnchangedLanes(WaveSize, ~0ULL, ~0ULL); + UnchangedLanes.ClearLane(0); + UnchangedLanes.ClearLane(MidBit); + UnchangedLanes.ClearLane(LastBit); for (UINT I = 0; I < WaveSize; ++I) { const UINT Index = I * 4; if (I == 0 || MidBit == I || LastBit == I) { - uint64_t LowChangedLanes = 0ULL; - uint64_t HighChangedLanes = 0ULL; - - if (I < 64) - LowChangedLanes = (1ULL << I); - else - HighChangedLanes = (1ULL << (I - 64)); - - LowChangedLanes &= LowWaveMask; - HighChangedLanes &= HighWaveMask; + Wave ChangedLanes(WaveSize, 0ULL, 0ULL); + ChangedLanes.SetLane(I); - Expected[Index] = static_cast(LowChangedLanes); - Expected[Index + 1] = static_cast(LowChangedLanes >> 32); - Expected[Index + 2] = static_cast(HighChangedLanes); - Expected[Index + 3] = static_cast(HighChangedLanes >> 32); + Expected[Index] = static_cast(ChangedLanes.LowBits); + Expected[Index + 1] = static_cast(ChangedLanes.LowBits >> 32); + Expected[Index + 2] = static_cast(ChangedLanes.HighBits); + Expected[Index + 3] = static_cast(ChangedLanes.HighBits >> 32); continue; } - Expected[Index] = static_cast(LowUnchangedLanes); - Expected[Index + 1] = static_cast(LowUnchangedLanes >> 32); - Expected[Index + 2] = static_cast(HighUnchangedLanes); - Expected[Index + 3] = static_cast(HighUnchangedLanes >> 32); + Expected[Index] = static_cast(UnchangedLanes.LowBits); + Expected[Index + 1] = static_cast(UnchangedLanes.LowBits >> 32); + Expected[Index + 2] = static_cast(UnchangedLanes.HighBits); + Expected[Index + 3] = static_cast(UnchangedLanes.HighBits >> 32); } return Expected; From d1f0d9e2740612089a5160657f0b3d3e299a2ad7 Mon Sep 17 00:00:00 2001 From: Joao Saffran Date: Tue, 9 Dec 2025 11:56:07 -0800 Subject: [PATCH 03/14] address comment from damyan --- .../clang/unittests/HLSLExec/LongVectors.cpp | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 158e4da395..d02315d19a 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1594,20 +1594,22 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; // Helper struct to build the expected result for WaveMatch tests. -struct Wave { +struct WaveMatchResultBuilder { + +private: uint64_t LowWaveMask; uint64_t HighWaveMask; - uint64_t LowBits; uint64_t HighBits; - Wave(CONST UINT NumWaves, uint64_t LB, uint64_t HB) { +public: + WaveMatchResultBuilder(UINT NumWaves) : LowBits(0), HighBits(0) { const UINT LowWaves = std::min(64U, NumWaves); const UINT HighWaves = NumWaves - LowWaves; LowWaveMask = (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; HighWaveMask = (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; - LowBits = LB & LowWaveMask; - HighBits = HB & HighWaveMask; + LowBits &= LowWaveMask; + HighBits &= HighWaveMask; } void SetLane(UINT LaneID) { @@ -1623,6 +1625,18 @@ struct Wave { else HighBits &= ~(1ULL << (LaneID - 64)) & HighWaveMask; } + + void InvertLanes() { + LowBits = ~LowBits & LowWaveMask; + HighBits = ~HighBits & HighWaveMask; + } + + void SetExpected(UINT *Dest) { + Dest[0] = static_cast(LowBits); + Dest[1] = static_cast(LowBits >> 32); + Dest[2] = static_cast(HighBits); + Dest[3] = static_cast(HighBits >> 32); + } }; template struct ExpectedBuilder { @@ -1641,7 +1655,8 @@ template struct ExpectedBuilder { const UINT MidBit = WaveSize / 2; const UINT LastBit = WaveSize - 1; - Wave UnchangedLanes(WaveSize, ~0ULL, ~0ULL); + WaveMatchResultBuilder UnchangedLanes(WaveSize); + UnchangedLanes.InvertLanes(); UnchangedLanes.ClearLane(0); UnchangedLanes.ClearLane(MidBit); UnchangedLanes.ClearLane(LastBit); @@ -1650,20 +1665,14 @@ template struct ExpectedBuilder { const UINT Index = I * 4; if (I == 0 || MidBit == I || LastBit == I) { - Wave ChangedLanes(WaveSize, 0ULL, 0ULL); + WaveMatchResultBuilder ChangedLanes(WaveSize); ChangedLanes.SetLane(I); - Expected[Index] = static_cast(ChangedLanes.LowBits); - Expected[Index + 1] = static_cast(ChangedLanes.LowBits >> 32); - Expected[Index + 2] = static_cast(ChangedLanes.HighBits); - Expected[Index + 3] = static_cast(ChangedLanes.HighBits >> 32); + ChangedLanes.SetExpected(&Expected[Index]); continue; } - Expected[Index] = static_cast(UnchangedLanes.LowBits); - Expected[Index + 1] = static_cast(UnchangedLanes.LowBits >> 32); - Expected[Index + 2] = static_cast(UnchangedLanes.HighBits); - Expected[Index + 3] = static_cast(UnchangedLanes.HighBits >> 32); + UnchangedLanes.SetExpected(&Expected[Index]); } return Expected; @@ -1929,7 +1938,7 @@ class DxilConf_SM69_Vectorized { VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); - WaveSize = WaveOpts.WaveLaneCountMin; + WaveSize = 128; // WaveOpts.WaveLaneCountMin; } DXASSERT_NOMSG(WaveSize > 0); From 64779aa31740ce43cea56bade3bfdb08146d13d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Mon, 5 Jan 2026 15:35:20 -0800 Subject: [PATCH 04/14] address comments from alex --- .../clang/unittests/HLSLExec/LongVectors.cpp | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 7b5e4d3ce4..e92f668400 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1614,49 +1614,53 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; +static constexpr UINT ComputeWaveMask(UINT NumWaves) { + return (NumWaves < 64) ? (1ULL << NumWaves) - 1 : ~0ULL; +} + // Helper struct to build the expected result for WaveMatch tests. struct WaveMatchResultBuilder { private: uint64_t LowWaveMask; uint64_t HighWaveMask; - uint64_t LowBits; - uint64_t HighBits; + uint64_t ActiveLanesLow; + uint64_t ActiveLanesHigh; public: - WaveMatchResultBuilder(UINT NumWaves) : LowBits(0), HighBits(0) { + WaveMatchResultBuilder(UINT NumWaves) + : ActiveLanesLow(0), ActiveLanesHigh(0) { + VERIFY_IS_TRUE(NumWaves <= 128); const UINT LowWaves = std::min(64U, NumWaves); const UINT HighWaves = NumWaves - LowWaves; - LowWaveMask = (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; - HighWaveMask = (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; - LowBits &= LowWaveMask; - HighBits &= HighWaveMask; + LowWaveMask = ComputeWaveMask(LowWaves); + HighWaveMask = ComputeWaveMask(HighWaves); } void SetLane(UINT LaneID) { if (LaneID < 64) - LowBits |= (1ULL << LaneID) & LowWaveMask; + ActiveLanesLow |= (1ULL << LaneID) & LowWaveMask; else - HighBits |= (1ULL << (LaneID - 64)) & HighWaveMask; + ActiveLanesHigh |= (1ULL << (LaneID - 64)) & HighWaveMask; } void ClearLane(UINT LaneID) { if (LaneID < 64) - LowBits &= ~(1ULL << LaneID) & LowWaveMask; + ActiveLanesLow &= ~(1ULL << LaneID) & LowWaveMask; else - HighBits &= ~(1ULL << (LaneID - 64)) & HighWaveMask; + ActiveLanesHigh &= ~(1ULL << (LaneID - 64)) & HighWaveMask; } void InvertLanes() { - LowBits = ~LowBits & LowWaveMask; - HighBits = ~HighBits & HighWaveMask; + ActiveLanesLow = ~ActiveLanesLow & LowWaveMask; + ActiveLanesHigh = ~ActiveLanesHigh & HighWaveMask; } - void SetExpected(UINT *Dest) { - Dest[0] = static_cast(LowBits); - Dest[1] = static_cast(LowBits >> 32); - Dest[2] = static_cast(HighBits); - Dest[3] = static_cast(HighBits >> 32); + void ComputeExpected(UINT *Dest) { + Dest[0] = static_cast(ActiveLanesLow); + Dest[1] = static_cast(ActiveLanesLow >> 32); + Dest[2] = static_cast(ActiveLanesHigh); + Dest[3] = static_cast(ActiveLanesHigh >> 32); } }; @@ -1673,27 +1677,25 @@ template struct ExpectedBuilder { std::vector Expected; Expected.assign(WaveSize * 4, 0); - const UINT MidBit = WaveSize / 2; - const UINT LastBit = WaveSize - 1; + const UINT MidLaneID = WaveSize / 2; + const UINT LastLaneID = WaveSize - 1; WaveMatchResultBuilder UnchangedLanes(WaveSize); UnchangedLanes.InvertLanes(); UnchangedLanes.ClearLane(0); - UnchangedLanes.ClearLane(MidBit); - UnchangedLanes.ClearLane(LastBit); + UnchangedLanes.ClearLane(MidLaneID); + UnchangedLanes.ClearLane(LastLaneID); - for (UINT I = 0; I < WaveSize; ++I) { - const UINT Index = I * 4; + for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { + const UINT Index = LaneID * 4; - if (I == 0 || MidBit == I || LastBit == I) { + if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { WaveMatchResultBuilder ChangedLanes(WaveSize); - ChangedLanes.SetLane(I); - - ChangedLanes.SetExpected(&Expected[Index]); + ChangedLanes.SetLane(LaneID); + ChangedLanes.ComputeExpected(&Expected[Index]); continue; } - - UnchangedLanes.SetExpected(&Expected[Index]); + UnchangedLanes.ComputeExpected(&Expected[Index]); } return Expected; @@ -1959,7 +1961,7 @@ class DxilConf_SM69_Vectorized { VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); - WaveSize = 128; // WaveOpts.WaveLaneCountMin; + WaveSize = WaveOpts.WaveLaneCountMin; } DXASSERT_NOMSG(WaveSize > 0); From f0d2cfdb75122ef0334f6e0fa1050dfe53174157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Wed, 7 Jan 2026 11:43:54 -0800 Subject: [PATCH 05/14] change code to use bitset --- .../clang/unittests/HLSLExec/LongVectors.cpp | 89 +- .../unittests/HLSLExec/ShaderOpArith.xml | 3841 ++++++++++++++--- 2 files changed, 3155 insertions(+), 775 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index e92f668400..c54857245f 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1,3 +1,5 @@ +#include +#include #ifndef NOMINMAX #define NOMINMAX 1 #endif @@ -1614,88 +1616,59 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; -static constexpr UINT ComputeWaveMask(UINT NumWaves) { - return (NumWaves < 64) ? (1ULL << NumWaves) - 1 : ~0ULL; +uint32_t GetWord(const std::bitset<128> &b, uint32_t wordPos) { + uint32_t v = 0; + for (uint32_t i = 0; i < 32; ++i) + v |= uint32_t(b[wordPos * 32 + i]) << i; + return v; } -// Helper struct to build the expected result for WaveMatch tests. -struct WaveMatchResultBuilder { - -private: - uint64_t LowWaveMask; - uint64_t HighWaveMask; - uint64_t ActiveLanesLow; - uint64_t ActiveLanesHigh; - -public: - WaveMatchResultBuilder(UINT NumWaves) - : ActiveLanesLow(0), ActiveLanesHigh(0) { - VERIFY_IS_TRUE(NumWaves <= 128); - const UINT LowWaves = std::min(64U, NumWaves); - const UINT HighWaves = NumWaves - LowWaves; - LowWaveMask = ComputeWaveMask(LowWaves); - HighWaveMask = ComputeWaveMask(HighWaves); - } - - void SetLane(UINT LaneID) { - if (LaneID < 64) - ActiveLanesLow |= (1ULL << LaneID) & LowWaveMask; - else - ActiveLanesHigh |= (1ULL << (LaneID - 64)) & HighWaveMask; - } - - void ClearLane(UINT LaneID) { - if (LaneID < 64) - ActiveLanesLow &= ~(1ULL << LaneID) & LowWaveMask; - else - ActiveLanesHigh &= ~(1ULL << (LaneID - 64)) & HighWaveMask; - } - - void InvertLanes() { - ActiveLanesLow = ~ActiveLanesLow & LowWaveMask; - ActiveLanesHigh = ~ActiveLanesHigh & HighWaveMask; - } - - void ComputeExpected(UINT *Dest) { - Dest[0] = static_cast(ActiveLanesLow); - Dest[1] = static_cast(ActiveLanesLow >> 32); - Dest[2] = static_cast(ActiveLanesHigh); - Dest[3] = static_cast(ActiveLanesHigh >> 32); - } -}; +void StoreWords(UINT *Dest, std::bitset<128> LanesState) { + Dest[0] = GetWord(LanesState, 0); + Dest[1] = GetWord(LanesState, 1); + Dest[2] = GetWord(LanesState, 2); + Dest[3] = GetWord(LanesState, 3); +} template struct ExpectedBuilder { static std::vector buildExpected(Op &, - const InputSets &, + const InputSets &Inputs, const UINT WaveSize) { // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and // WAVE_SIZE-1 are different from all the other lanes, also those // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1 // respectively, if the input vector has enough elements. Besides that all // other lanes write their result of WaveMatch as well. + DXASSERT_NOMSG(Inputs.size() == 1); std::vector Expected; Expected.assign(WaveSize * 4, 0); + const size_t VectorSize = Inputs[0].size(); + + Expected.assign(WaveSize * 4, 0); + const UINT MidLaneID = WaveSize / 2; const UINT LastLaneID = WaveSize - 1; - WaveMatchResultBuilder UnchangedLanes(WaveSize); - UnchangedLanes.InvertLanes(); - UnchangedLanes.ClearLane(0); - UnchangedLanes.ClearLane(MidLaneID); - UnchangedLanes.ClearLane(LastLaneID); + std::bitset<128> UnchangedLanes(~0ULL); + UnchangedLanes &= (1ULL << WaveSize) - 1; + UnchangedLanes = UnchangedLanes.reset(0).reset(MidLaneID); + + if (LastLaneID < VectorSize) + UnchangedLanes.reset(LastLaneID); for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { const UINT Index = LaneID * 4; - if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { - WaveMatchResultBuilder ChangedLanes(WaveSize); - ChangedLanes.SetLane(LaneID); - ChangedLanes.ComputeExpected(&Expected[Index]); + if (LaneID == 0 || LaneID == MidLaneID || + (LastLaneID < VectorSize && LaneID == LastLaneID)) { + std::bitset<128> ChangedLanes(0); + ChangedLanes = ChangedLanes.set(LaneID); + StoreWords(&Expected[Index], ChangedLanes); continue; } - UnchangedLanes.ComputeExpected(&Expected[Index]); + StoreWords(&Expected[Index], UnchangedLanes); } return Expected; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 6956f9c191..16704c08dd 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -1,8 +1,15 @@ - + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1)) + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -11,13 +18,30 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + {.125f, .25f, .5f, 1.0f}, {2.0f, 4.0f, 16.0f, 32.0f}, {32.0f, 64.0f, 128.0f, 256.0f}, {256.0f, 512.0f, 1024.0f, 2048.0f} - + @@ -26,15 +50,23 @@ - + - - + + - + RootFlags(0), UAV(u0) - + @@ -88,13 +129,27 @@ - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2), UAV(u3)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -103,55 +158,128 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + {.125f, .25f, .5f, 1.0f}, {2.0f, 4.0f, 16.0f, 32.0f}, {32.0f, 64.0f, 128.0f, 256.0f}, {256.0f, 512.0f, 1024.0f, 2048.0f} - - - - - + + + + + - - - - + + + + - + - - + + - + - - - - + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2)) - - - - + + + + - - - + + + - + - - + + - + - - - + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -407,48 +610,109 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - - - - + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT), StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -604,56 +882,140 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT), StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -880,45 +1256,106 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - + + + + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) + 1.0f, 0.0f, 100.0f - + 1.0f, 0.5f, 1.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + @@ -1035,10 +1500,14 @@ - + - + @@ -1087,11 +1556,23 @@ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), CBV(b0)) - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + -inf, -1.5f, -denorm, -0, 0, denorm, 1.5f, inf, nan, 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0, @@ -1099,29 +1580,54 @@ 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0 - - + + - + - + - + - + RootFlags(0), UAV(u0) - + @@ -1182,7 +1697,16 @@ RootFlags(0), UAV(u0) - + @@ -1194,7 +1718,16 @@ RootFlags(0), UAV(u0) - + @@ -1206,7 +1739,16 @@ RootFlags(0), UAV(u0) - + @@ -1218,7 +1760,16 @@ RootFlags(0), UAV(u0) - + @@ -1230,7 +1781,16 @@ RootFlags(0), UAV(u0) - + @@ -1243,7 +1803,16 @@ RootFlags(0), UAV(u0) - + @@ -1256,7 +1825,16 @@ RootFlags(0), UAV(u0) - + @@ -1268,7 +1846,16 @@ RootFlags(0), UAV(u0) - + @@ -1281,7 +1868,16 @@ RootFlags(0), UAV(u0) - + @@ -1294,7 +1890,16 @@ RootFlags(0), UAV(u0) - + @@ -1307,7 +1912,16 @@ RootFlags(0), UAV(u0) - + @@ -1320,7 +1934,16 @@ RootFlags(0), UAV(u0) - + @@ -1333,9 +1956,18 @@ RootFlags(0), UAV(u0) - + - + RootFlags(0), UAV(u0) - + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + { { 0.0f, 0.25f , 0.0f }, { 1.0f, 0.0f, 0.0f, 1.0f } }, { { 0.25f, -0.25f , 0.0f }, { 0.0f, 1.0f, 0.0f, 1.0f } }, { { -0.25f, -0.25f , 0.0f }, { 0.0f, 0.0f, 1.0f, 1.0f } } - + - + - - + + @@ -1409,19 +2074,43 @@ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + { { 0.0h, 0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } }, { { 0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } }, { { -0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } } - + - + - - + + @@ -1458,29 +2147,66 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) + 1.25h, 1.75h, 1.25h, 1.875h - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + - + - + - - + + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + - + - + - - + + @@ -1544,12 +2295,22 @@ float4 vColor2 = GetAttributeAtVertex(input.color, 2); return bary.x * vColor0 + bary.y * vColor1 + bary.z * vColor2; } - ]]> + ]]> - + - - + + - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -1599,26 +2425,117 @@ - - - - + + + + > - + - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -1627,10 +2544,36 @@ - - - - + + + + @@ -1638,15 +2581,80 @@ > - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -1655,10 +2663,36 @@ - - - - + + + + @@ -1666,16 +2700,87 @@ > - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -1684,7 +2789,16 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -1693,19 +2807,49 @@ - - - - + + + + - + - + - + @@ -1723,18 +2867,89 @@ - + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -1743,7 +2958,16 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -1752,19 +2976,49 @@ - - - - + + + + - + - + - + @@ -1784,16 +3038,87 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -1802,7 +3127,16 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -1811,19 +3145,49 @@ - - - - + + + + - + - + - + @@ -1844,7 +3208,17 @@ RootFlags(0), UAV(u0) - + @@ -1855,9 +3229,36 @@ RootFlags(0), UAV(u0), UAV(u1), UAV(u2) - - - + + + @@ -1872,13 +3273,27 @@ - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -1887,11 +3302,26 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + - + { 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0, @@ -1903,24 +3333,59 @@ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - + + @@ -1932,23 +3397,46 @@ - + - - + + - + - - - - - - - + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), UAV(u6), UAV(u7), UAV(u8), UAV(u9), UAV(u10), UAV(u11), UAV(u12), UAV(u13), UAV(u14), UAV(u15), UAV(u16), UAV(u17)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -2222,11 +3724,26 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + - + { 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0, @@ -2238,149 +3755,421 @@ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - + + - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - - - + + + + - - + + - - - - - - + + + + + + - - - - - - + + + + + + - + - - + + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -2785,53 +4588,134 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - - + + + + + + - - - - - + + + + + - + - - + + - + - - - - - + + + + + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } }, - - + + - + - + - + - - + + g_testResults : register(u0); - + int ReadAcrossX_DD(int value, bool isLeft) { int d = ddx_fine(value); return isLeft ? value + d : value - d; } - + int ReadAcrossY_DD(int value, bool isTop) { int d = ddy_fine(value); return isTop ? value + d : value - d; } - + int ReadAcrossDiagonal_DD(int value, bool isLeft, bool isTop) { return ReadAcrossY_DD(ReadAcrossX_DD(value, isLeft), isTop); } @@ -3060,23 +4985,23 @@ struct PSInput { float4 pos : SV_POSITION; }; - + PSInput VSMain(float3 pos : POSITION) { PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } uint4 PSMain(PSInput input) : SV_TARGET { bool isLeft = (input.pos.x < 1.0f); bool isTop = (input.pos.y < 1.0f); - + for (int i = 0; i < 2; i++) { int is_helper = IsHelperLane(); int is_helper_accross_X = ReadAcrossX_DD(is_helper, isLeft); int is_helper_accross_Y = ReadAcrossY_DD(is_helper, isTop); int is_helper_accross_Diag = ReadAcrossDiagonal_DD(is_helper, isLeft, isTop); - + if (!isLeft && !isTop) { //bottom right pixel writes results g_testResults[i].is_helper_00 = is_helper_accross_Diag; g_testResults[i].is_helper_10 = is_helper_accross_Y; @@ -3093,39 +5018,131 @@ - - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - + + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } }, - - + + - + - + - + - - - - - - - - - - + + + + + + + + + + g_TestResults : register(u0); - + #define CS_INDEX 0 #define VS_INDEX 0 #define PS_INDEX 1 #define PS_INDEX_AFTER_DISCARD 2 - + HelperLaneWaveTestResult60 RunHelperLaneWaveTests60() { HelperLaneWaveTestResult60 tr; bool is_helper = IsHelperLane(); @@ -3207,7 +5224,7 @@ } #endif tr.waterfallLoopCount = count; - + is_helper = IsHelperLane(); tr.allEqual = WaveActiveAllEqual(is_helper); tr.countBits = WaveActiveCountBits(true); @@ -3224,7 +5241,7 @@ return tr; } - + HelperLaneQuadTestResult RunHelperLaneQuadTests() { HelperLaneQuadTestResult tr; int is_helper = IsHelperLane(); @@ -3233,10 +5250,10 @@ tr.is_helper_across_X = QuadReadAcrossX(is_helper); tr.is_helper_across_Y = QuadReadAcrossY(is_helper); tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper); - + return tr; } - + HelperLaneWaveTestResult65 RunHelperLaneWaveTests65() { HelperLaneWaveTestResult65 tr; uint4 noMaskedBits = (uint4)0xFFFFFFFF; @@ -3251,11 +5268,11 @@ tr.mpBitXor = WaveMultiPrefixBitXor(is_helper ? 1 : 0, noMaskedBits); return tr; } - + struct PSInput { float4 pos : SV_POSITION; }; - + PSInput VSMain(float3 pos : POSITION, uint vid:SV_VertexID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); uint laneID = WaveGetLaneIndex(); @@ -3267,10 +5284,10 @@ g_TestResults[VS_INDEX].sm60_wave = tr60; } PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } - + PSInput VSMain65(float3 pos : POSITION, uint vid:SV_VertexID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); HelperLaneWaveTestResult65 tr65 = RunHelperLaneWaveTests65(); @@ -3284,21 +5301,21 @@ g_TestResults[VS_INDEX].sm65_wave = tr65; } PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } uint4 PSMain(PSInput input) : SV_TARGET { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX].sm60_wave = tr60; g_TestResults[PS_INDEX].sm60_quad = tr60_quad; } if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel discard; - + HelperLaneWaveTestResult60 tr60_disc = RunHelperLaneWaveTests60(); HelperLaneQuadTestResult tr60_quad_disc = RunHelperLaneQuadTests(); @@ -3317,25 +5334,25 @@ tr.sm60_wave = RunHelperLaneWaveTests60(); tr.sm60_quad = RunHelperLaneQuadTests(); tr.sm65_wave = RunHelperLaneWaveTests65(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX] = tr; } if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel discard; - + HelperLaneWaveTestResult tr_disc; tr_disc.sm60_wave = RunHelperLaneWaveTests60(); tr_disc.sm60_quad = RunHelperLaneQuadTests(); tr_disc.sm65_wave = RunHelperLaneWaveTests65(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX_AFTER_DISCARD] = tr_disc; } return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1); } - + [numthreads(3,1,1)] void CSMain(uint3 tid : SV_GroupThreadID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); @@ -3345,7 +5362,7 @@ g_TestResults[CS_INDEX].sm60_quad = tr60_quad; } } - + [numthreads(3,1,1)] void CSMain65() { HelperLaneWaveTestResult tr; @@ -3361,10 +5378,35 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + + + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -3377,24 +5419,32 @@ - + - - + + - + - - - - - - - + + + + + + + Values : register(u0); @@ -3484,57 +5534,165 @@ void MSMain(uint GID : SV_GroupIndex, - - - + + + { 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u} - - + + 10.0 - + 11.0 - + 12.0 - + 13.0 - + 14.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - + 15.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - - + + 20.0 - + 21.0 - - + + 22.0 - + 23.0 - - + + 24.0 0 0 0 - + 25.0 0 0 0 - - + + { -1.0f, 1.0f, 0.0f }, { 1.0f, 1.0f, 0.0f }, { -1.0f, -1.0f, 0.0f }, @@ -3542,62 +5700,197 @@ void MSMain(uint GID : SV_GroupIndex, { 1.0f, 1.0f, 0.0f }, { 1.0f, -1.0f, 0.0f }, - - - - - - + + + + + + - - - - + + + + - - - - - - - - - - + + + + + + + + + + - - - - + + + + - + - + - + - + - - - + + + - + - + @@ -3606,9 +5899,9 @@ void MSMain(uint GID : SV_GroupIndex, { uint idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10; }; - + // globally declare these so that non-fallback won't throw an unknown identifier error - + #ifndef NON_UNIFORM #define INDEXER(idx,ix) idx #else @@ -3620,7 +5913,7 @@ void MSMain(uint GID : SV_GroupIndex, #define ROOT_SIG [RootSignature("RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable( SRV(t0, numDescriptors=6), UAV(u6, numDescriptors=6), CBV(b12), UAV(u13,numDescriptors=3) ), \ DescriptorTable(Sampler(s0, numDescriptors=2), Sampler(s2, numDescriptors=2))")] - + ByteAddressBuffer g_fallback_rawBuf[2] : register(t0); StructuredBuffer g_fallback_structBuf[2] : register(t2); Texture2D g_fallback_tex[2] : register(t4); @@ -3636,11 +5929,11 @@ void MSMain(uint GID : SV_GroupIndex, RWStructuredBuffer g_result : register(u13); RWStructuredBuffer g_resultVS : register(u14); RWStructuredBuffer g_resultPS : register(u15); - + #else // NO FALLBACK #define ROOT_SIG [RootSignature("RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | SAMPLER_HEAP_DIRECTLY_INDEXED | ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)")] - + static const int CBIndex = 12; static ConstantBuffer CB = ResourceDescriptorHeap[CBIndex]; @@ -3648,12 +5941,12 @@ void MSMain(uint GID : SV_GroupIndex, static RWStructuredBuffer g_result = ResourceDescriptorHeap[CBIndex+1]; static RWStructuredBuffer g_resultVS = ResourceDescriptorHeap[CBIndex+2]; static RWStructuredBuffer g_resultPS = ResourceDescriptorHeap[CBIndex+3]; - + #endif // FALLBACK - - + + void TestResources(RWStructuredBuffer result, uint ix) { - + #ifndef FALLBACK ByteAddressBuffer rawBuf = ResourceDescriptorHeap[INDEXER(CB.idx0,ix)]; StructuredBuffer structBuf = ResourceDescriptorHeap[INDEXER(CB.idx2,ix)]; @@ -3661,11 +5954,11 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer rwRawBuf = ResourceDescriptorHeap[INDEXER(CB.idx6,ix)]; RWStructuredBuffer rwStructBuf = ResourceDescriptorHeap[INDEXER(CB.idx8,ix)]; RWTexture1D rwTex = ResourceDescriptorHeap[INDEXER(CB.idx10,ix)]; - + static SamplerState g_samp = SamplerDescriptorHeap[INDEXER(CB.idx0, ix)]; static SamplerComparisonState g_sampCmp = SamplerDescriptorHeap[INDEXER(CB.idx2, ix)]; #endif - + #ifdef FALLBACK ByteAddressBuffer rawBuf = g_fallback_rawBuf[INDEXER(0,ix)]; StructuredBuffer structBuf = g_fallback_structBuf[INDEXER(0,ix)]; @@ -3673,7 +5966,7 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer rwRawBuf = g_fallback_rwRawBuf[INDEXER(0,ix)]; RWStructuredBuffer rwStructBuf = g_fallback_rwStructBuf[INDEXER(0,ix)]; RWTexture1D rwTex = g_fallback_rwTex[INDEXER(0,ix)]; - + static SamplerState g_samp = g_fallback_samp[INDEXER(0, ix)]; static SamplerComparisonState g_sampCmp = g_fallback_sampCmp[INDEXER(0, ix)]; #endif @@ -3693,7 +5986,7 @@ void MSMain(uint GID : SV_GroupIndex, void main(uint ix : SV_GroupIndex) { TestResources(g_result, ix); } - + struct PSInput { float4 position : SV_POSITION; }; @@ -3701,16 +5994,16 @@ void MSMain(uint GID : SV_GroupIndex, ROOT_SIG float4 PSMain(PSInput input) : SV_TARGET { int ix = WaveGetLaneIndex(); - TestResources(g_resultPS, ix); + TestResources(g_resultPS, ix); // This output doesn't actually matter return input.position; } - + ROOT_SIG PSInput VSMain(float3 pos : POSITION, uint ix : SV_VertexID) { TestResources(g_resultVS, ix); PSInput r; - r.position = float4(pos, 1); + r.position = float4(pos, 1); return r; } ]]> @@ -3720,10 +6013,22 @@ void MSMain(uint GID : SV_GroupIndex, UAV(u0), UAV(u1) - - + + @@ -3759,10 +6064,16 @@ void MSMain(uint GID : SV_GroupIndex, SRV(t0), UAV(u1) - - + + @@ -3798,11 +6109,23 @@ void MSMain(uint GID : SV_GroupIndex, DescriptorTable(UAV(u0, numDescriptors=2)) - - + + @@ -3810,9 +6133,18 @@ void MSMain(uint GID : SV_GroupIndex, - - + + @@ -3846,12 +6178,19 @@ void MSMain(uint GID : SV_GroupIndex, - DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) + DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) - - + + @@ -3859,9 +6198,18 @@ void MSMain(uint GID : SV_GroupIndex, - - + + @@ -3894,17 +6242,29 @@ void MSMain(uint GID : SV_GroupIndex, RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - + + - - + + @@ -3939,16 +6299,29 @@ void MSMain(uint GID : SV_GroupIndex, RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - + + - - + + @@ -3958,7 +6331,7 @@ void MSMain(uint GID : SV_GroupIndex, vector data; }; #endif - + [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER @@ -3982,12 +6355,46 @@ void MSMain(uint GID : SV_GroupIndex, UAV(u0), UAV(u1), UAV(u2), UAV(u3) - - - - + + + + @@ -3998,18 +6405,18 @@ void MSMain(uint GID : SV_GroupIndex, TYPE : The type of the input vector, e.g. float, double, int, uint. OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. - + NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, - + FUNC : Used to expand to the HLSL intrinsic being tested. e.g cos, cosh, abs, etc. OR In some cases FUNC is expanded to a function call to handle special logic, e.g. asuint_splitdouble. OR it is intentionally left empty when testing operators like '+', '-', '*', '/', etc. - + OPERATOR : The operator being tested, e.g. '+', '-', '*', '/', etc. - OR for binary operations for an intrinsic it is expanded + OR for binary operations for an intrinsic it is expanded ','/ OR for unary intrinsics it is expanded to ' ' (empty). OR for ternary intrinsics it is always expanded to ','. @@ -4307,7 +6714,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 0 || WaveGetLaneIndex() == 3) { // Clear LSB on lane 0 and lane 3. Lane 0 isn't in the mask so - // shouldn't participate. Lane 3 is the output lane for this prefix + // shouldn't participate. Lane 3 is the output lane for this prefix // op, so we set distinctive bits to verify it doesn't affect its own result. Vector = Vector & ~((OUT_TYPE)0x1); } @@ -4337,7 +6744,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) { - // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create + // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create // predictable OR patterns Vector = Vector & ~((OUT_TYPE)0x2); } @@ -4350,7 +6757,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { - // Lane 3 is the output lane: Set all bits to verify it doesn't + // Lane 3 is the output lane: Set all bits to verify it doesn't // affect its own prefix result (since prefix excludes current lane) Vector = Vector | ~((OUT_TYPE)0x0); } @@ -4409,11 +6816,11 @@ void MSMain(uint GID : SV_GroupIndex, void TestWaveMatch(vector Vector) { uint LaneIndex = WaveGetLaneIndex(); - bool ShouldModify = ( LaneIndex == 0 || - LaneIndex == (WAVE_SIZE / 2) || + bool ShouldModify = ( LaneIndex == 0 || + LaneIndex == (WAVE_SIZE / 2) || LaneIndex == (WAVE_SIZE - 1)); - if(ShouldModify && LaneIndex < NUM) + if(LaneIndex < NUM && ShouldModify) { if(Vector[LaneIndex] == (TYPE) 0) Vector[LaneIndex] = (TYPE) 1; @@ -4425,12 +6832,12 @@ void MSMain(uint GID : SV_GroupIndex, // Making sure all lanes finish updating their vectors. AllMemoryBarrierWithGroupSync(); - + uint4 result = WaveMatch(Vector); uint index = WaveGetLaneIndex(); g_OutputVector.Store(index * sizeof(uint4), result); - } + } #endif #ifdef FUNC_TEST_SELECT @@ -4576,14 +6983,14 @@ void MSMain(uint GID : SV_GroupIndex, // index array from being a compile time constant. const uint IndexCount = 6; const uint IndexList[IndexCount] = { - 0, - OutNum - 1, - 1, - OutNum - 2, - OutNum / 2, + 0, + OutNum - 1, + 1, + OutNum - 2, + OutNum / 2, OutNum / 2 + 1 }; - + OutputVector = 0; uint End = min(OutNum, IndexCount); From 2138c2a360e304b9faa1568765e21a1bda8cd417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Wed, 7 Jan 2026 12:04:58 -0800 Subject: [PATCH 06/14] fix xml editor formating --- .../unittests/HLSLExec/ShaderOpArith.xml | 3854 +++-------------- 1 file changed, 719 insertions(+), 3135 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 16704c08dd..2cfeb1f225 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -1,15 +1,8 @@ - + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1)) + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -18,30 +11,13 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + {.125f, .25f, .5f, 1.0f}, {2.0f, 4.0f, 16.0f, 32.0f}, {32.0f, 64.0f, 128.0f, 256.0f}, {256.0f, 512.0f, 1024.0f, 2048.0f} - + @@ -50,23 +26,15 @@ - + - - + + - + RootFlags(0), UAV(u0) - + @@ -129,27 +88,13 @@ - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2), UAV(u3)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -158,128 +103,55 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + {.125f, .25f, .5f, 1.0f}, {2.0f, 4.0f, 16.0f, 32.0f}, {32.0f, 64.0f, 128.0f, 256.0f}, {256.0f, 512.0f, 1024.0f, 2048.0f} - - - - - + + + + + - - - - + + + + - + - - + + - + - - - - + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2)) - - - - + + + + - - - + + + - + - - + + - + - - - + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -610,109 +407,48 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - - - - + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT), StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -882,140 +604,56 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT), StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -1256,106 +880,45 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - + + + + + - - - + + + - + - - + + - + - - - - + + + + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) + 1.0f, 0.0f, 100.0f - + 1.0f, 0.5f, 1.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + @@ -1500,14 +1035,10 @@ - + - + @@ -1556,23 +1087,11 @@ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), CBV(b0)) - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + -inf, -1.5f, -denorm, -0, 0, denorm, 1.5f, inf, nan, 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0, @@ -1580,54 +1099,29 @@ 0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0 - - + + - + - + - + - + RootFlags(0), UAV(u0) - + @@ -1697,16 +1182,7 @@ RootFlags(0), UAV(u0) - + @@ -1718,16 +1194,7 @@ RootFlags(0), UAV(u0) - + @@ -1739,16 +1206,7 @@ RootFlags(0), UAV(u0) - + @@ -1760,16 +1218,7 @@ RootFlags(0), UAV(u0) - + @@ -1781,16 +1230,7 @@ RootFlags(0), UAV(u0) - + @@ -1803,16 +1243,7 @@ RootFlags(0), UAV(u0) - + @@ -1825,16 +1256,7 @@ RootFlags(0), UAV(u0) - + @@ -1846,16 +1268,7 @@ RootFlags(0), UAV(u0) - + @@ -1868,16 +1281,7 @@ RootFlags(0), UAV(u0) - + @@ -1890,16 +1294,7 @@ RootFlags(0), UAV(u0) - + @@ -1912,16 +1307,7 @@ RootFlags(0), UAV(u0) - + @@ -1934,16 +1320,7 @@ RootFlags(0), UAV(u0) - + @@ -1956,18 +1333,9 @@ RootFlags(0), UAV(u0) - + - + RootFlags(0), UAV(u0) - + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + { { 0.0f, 0.25f , 0.0f }, { 1.0f, 0.0f, 0.0f, 1.0f } }, { { 0.25f, -0.25f , 0.0f }, { 0.0f, 1.0f, 0.0f, 1.0f } }, { { -0.25f, -0.25f , 0.0f }, { 0.0f, 0.0f, 1.0f, 1.0f } } - + - + - - + + @@ -2074,43 +1409,19 @@ RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + { { 0.0h, 0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } }, { { 0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } }, { { -0.25h, -0.25h , 0.0h, 1.0h }, { 1.0h, 1.0h, 1.0h, 1.0h } } - + - + - - + + @@ -2147,66 +1458,29 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2)) + 1.25h, 1.75h, 1.25h, 1.875h - + 1.0f 1.0f 0, 1.0f -1.0f 0.0f, -1.0f -1.0f 0, -1.0f 1.0f 0, 1.0f 1.0f 0.0f, -1.0f -1.0f 0, - + - + - + - - + + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT) - + - + - + - - + + @@ -2295,22 +1544,12 @@ float4 vColor2 = GetAttributeAtVertex(input.color, 2); return bary.x * vColor0 + bary.y * vColor1 + bary.z * vColor2; } - ]]> + ]]> - + - - + + - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -2425,117 +1599,26 @@ - - - - + + + + > - + - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -2544,36 +1627,10 @@ - - - - + + + + @@ -2581,80 +1638,15 @@ > - RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - + RootFlags(0), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + @@ -2663,36 +1655,10 @@ - - - - + + + + @@ -2700,87 +1666,16 @@ > - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -2789,16 +1684,7 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -2807,49 +1693,19 @@ - - - - + + + + - + - + - + @@ -2867,89 +1723,18 @@ - + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -2958,16 +1743,7 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -2976,49 +1752,19 @@ - - - - + + + + - + - + - + @@ -3038,87 +1784,16 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) - - - - - - - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), SRV(t0), SRV(t1), UAV(u0), UAV(u1), DescriptorTable(SRV(t2,numDescriptors=2), UAV(u2,numDescriptors=2)) + + + + + + + + + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f } }, @@ -3127,16 +1802,7 @@ { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } } - + @@ -3145,49 +1811,19 @@ - - - - + + + + - + - + - + @@ -3208,17 +1844,7 @@ RootFlags(0), UAV(u0) - + @@ -3229,36 +1855,9 @@ RootFlags(0), UAV(u0), UAV(u1), UAV(u2) - - - + + + @@ -3273,27 +1872,13 @@ - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -3302,26 +1887,11 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + - + { 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0, @@ -3333,59 +1903,24 @@ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - + + @@ -3397,46 +1932,23 @@ - + - - + + - + - - - - - - - + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4), UAV(u5), UAV(u6), UAV(u7), UAV(u8), UAV(u9), UAV(u10), UAV(u11), UAV(u12), UAV(u13), UAV(u14), UAV(u15), UAV(u16), UAV(u17)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -3724,26 +2222,11 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - + - + { 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0I, 0I, 99999999I, 99999999I, 0I, 0I, 99999999I, 99999999I, 0, 0, 0, 0, @@ -3755,421 +2238,149 @@ 0, 0, 0, 0, 0, 0, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0I, 0, 0, 0, 0, } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - + + - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - - + + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + { 0I, 0I, 99999999I, 99999999I, 0I, 0I, -1I, -1I, 0I, 0I, 0I, 0I, 42I, 42I, 42I, 42I } - + - - - - + + + + - - + + - - - - - - + + + + + + - - - - - - + + + + + + - + - - + + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(UAV(u0), UAV(u1), UAV(u2), UAV(u3), UAV(u4)), StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT) - + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -4588,134 +2785,53 @@ { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } - - - - - - + + + + + + - - - - - + + + + + - + - - + + - + - - - - - + + + + + - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } }, - - + + - + - + - + - - + + g_testResults : register(u0); - + int ReadAcrossX_DD(int value, bool isLeft) { int d = ddx_fine(value); return isLeft ? value + d : value - d; } - + int ReadAcrossY_DD(int value, bool isTop) { int d = ddy_fine(value); return isTop ? value + d : value - d; } - + int ReadAcrossDiagonal_DD(int value, bool isLeft, bool isTop) { return ReadAcrossY_DD(ReadAcrossX_DD(value, isLeft), isTop); } @@ -4985,23 +3060,23 @@ struct PSInput { float4 pos : SV_POSITION; }; - + PSInput VSMain(float3 pos : POSITION) { PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } uint4 PSMain(PSInput input) : SV_TARGET { bool isLeft = (input.pos.x < 1.0f); bool isTop = (input.pos.y < 1.0f); - + for (int i = 0; i < 2; i++) { int is_helper = IsHelperLane(); int is_helper_accross_X = ReadAcrossX_DD(is_helper, isLeft); int is_helper_accross_Y = ReadAcrossY_DD(is_helper, isTop); int is_helper_accross_Diag = ReadAcrossDiagonal_DD(is_helper, isLeft, isTop); - + if (!isLeft && !isTop) { //bottom right pixel writes results g_testResults[i].is_helper_00 = is_helper_accross_Diag; g_testResults[i].is_helper_10 = is_helper_accross_Y; @@ -5018,131 +3093,39 @@ - - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - + + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + { { -1.0f, 1.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f } }, { { 1.0f, -1.0f, 0.0f } }, - - + + - + - + - + - - - - - - - - - - + + + + + + + + + + g_TestResults : register(u0); - + #define CS_INDEX 0 #define VS_INDEX 0 #define PS_INDEX 1 #define PS_INDEX_AFTER_DISCARD 2 - + HelperLaneWaveTestResult60 RunHelperLaneWaveTests60() { HelperLaneWaveTestResult60 tr; bool is_helper = IsHelperLane(); @@ -5224,7 +3207,7 @@ } #endif tr.waterfallLoopCount = count; - + is_helper = IsHelperLane(); tr.allEqual = WaveActiveAllEqual(is_helper); tr.countBits = WaveActiveCountBits(true); @@ -5241,7 +3224,7 @@ return tr; } - + HelperLaneQuadTestResult RunHelperLaneQuadTests() { HelperLaneQuadTestResult tr; int is_helper = IsHelperLane(); @@ -5250,10 +3233,10 @@ tr.is_helper_across_X = QuadReadAcrossX(is_helper); tr.is_helper_across_Y = QuadReadAcrossY(is_helper); tr.is_helper_across_Diag = QuadReadAcrossDiagonal(is_helper); - + return tr; } - + HelperLaneWaveTestResult65 RunHelperLaneWaveTests65() { HelperLaneWaveTestResult65 tr; uint4 noMaskedBits = (uint4)0xFFFFFFFF; @@ -5268,11 +3251,11 @@ tr.mpBitXor = WaveMultiPrefixBitXor(is_helper ? 1 : 0, noMaskedBits); return tr; } - + struct PSInput { float4 pos : SV_POSITION; }; - + PSInput VSMain(float3 pos : POSITION, uint vid:SV_VertexID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); uint laneID = WaveGetLaneIndex(); @@ -5284,10 +3267,10 @@ g_TestResults[VS_INDEX].sm60_wave = tr60; } PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } - + PSInput VSMain65(float3 pos : POSITION, uint vid:SV_VertexID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); HelperLaneWaveTestResult65 tr65 = RunHelperLaneWaveTests65(); @@ -5301,21 +3284,21 @@ g_TestResults[VS_INDEX].sm65_wave = tr65; } PSInput r; - r.pos = float4(pos, 1); + r.pos = float4(pos, 1); return r; } uint4 PSMain(PSInput input) : SV_TARGET { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); HelperLaneQuadTestResult tr60_quad = RunHelperLaneQuadTests(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX].sm60_wave = tr60; g_TestResults[PS_INDEX].sm60_quad = tr60_quad; } if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel discard; - + HelperLaneWaveTestResult60 tr60_disc = RunHelperLaneWaveTests60(); HelperLaneQuadTestResult tr60_quad_disc = RunHelperLaneQuadTests(); @@ -5334,25 +3317,25 @@ tr.sm60_wave = RunHelperLaneWaveTests60(); tr.sm60_quad = RunHelperLaneQuadTests(); tr.sm65_wave = RunHelperLaneWaveTests65(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX] = tr; } if (input.pos.x < 1.0f && input.pos.y < 1.0f) // discard top left pixel discard; - + HelperLaneWaveTestResult tr_disc; tr_disc.sm60_wave = RunHelperLaneWaveTests60(); tr_disc.sm60_quad = RunHelperLaneQuadTests(); tr_disc.sm65_wave = RunHelperLaneWaveTests65(); - + if (input.pos.x > 1.0f && input.pos.y > 1.0f) { // bottom right pixel writes results g_TestResults[PS_INDEX_AFTER_DISCARD] = tr_disc; } return uint4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 1); } - + [numthreads(3,1,1)] void CSMain(uint3 tid : SV_GroupThreadID) { HelperLaneWaveTestResult60 tr60 = RunHelperLaneWaveTests60(); @@ -5362,7 +3345,7 @@ g_TestResults[CS_INDEX].sm60_quad = tr60_quad; } } - + [numthreads(3,1,1)] void CSMain65() { HelperLaneWaveTestResult tr; @@ -5378,35 +3361,10 @@ - RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) - - - + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), UAV(u0) + + + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, @@ -5419,32 +3377,24 @@ - + - - + + - + - - - - - - - + + + + + + + Values : register(u0); @@ -5534,165 +3484,57 @@ void MSMain(uint GID : SV_GroupIndex, - - - + + + { 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u} - - + + 10.0 - + 11.0 - + 12.0 - + 13.0 - + 14.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - + 15.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - - + + 20.0 - + 21.0 - - + + 22.0 - + 23.0 - - + + 24.0 0 0 0 - + 25.0 0 0 0 - - + + { -1.0f, 1.0f, 0.0f }, { 1.0f, 1.0f, 0.0f }, { -1.0f, -1.0f, 0.0f }, @@ -5700,197 +3542,62 @@ void MSMain(uint GID : SV_GroupIndex, { 1.0f, 1.0f, 0.0f }, { 1.0f, -1.0f, 0.0f }, - - - - - - + + + + + + - - - - + + + + - - - - - - - - - - + + + + + + + + + + - - - - + + + + - + - + - + - + - - - + + + - + - + @@ -5899,9 +3606,9 @@ void MSMain(uint GID : SV_GroupIndex, { uint idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10; }; - + // globally declare these so that non-fallback won't throw an unknown identifier error - + #ifndef NON_UNIFORM #define INDEXER(idx,ix) idx #else @@ -5913,7 +3620,7 @@ void MSMain(uint GID : SV_GroupIndex, #define ROOT_SIG [RootSignature("RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable( SRV(t0, numDescriptors=6), UAV(u6, numDescriptors=6), CBV(b12), UAV(u13,numDescriptors=3) ), \ DescriptorTable(Sampler(s0, numDescriptors=2), Sampler(s2, numDescriptors=2))")] - + ByteAddressBuffer g_fallback_rawBuf[2] : register(t0); StructuredBuffer g_fallback_structBuf[2] : register(t2); Texture2D g_fallback_tex[2] : register(t4); @@ -5929,11 +3636,11 @@ void MSMain(uint GID : SV_GroupIndex, RWStructuredBuffer g_result : register(u13); RWStructuredBuffer g_resultVS : register(u14); RWStructuredBuffer g_resultPS : register(u15); - + #else // NO FALLBACK #define ROOT_SIG [RootSignature("RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | SAMPLER_HEAP_DIRECTLY_INDEXED | ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT)")] - + static const int CBIndex = 12; static ConstantBuffer CB = ResourceDescriptorHeap[CBIndex]; @@ -5941,12 +3648,12 @@ void MSMain(uint GID : SV_GroupIndex, static RWStructuredBuffer g_result = ResourceDescriptorHeap[CBIndex+1]; static RWStructuredBuffer g_resultVS = ResourceDescriptorHeap[CBIndex+2]; static RWStructuredBuffer g_resultPS = ResourceDescriptorHeap[CBIndex+3]; - + #endif // FALLBACK - - + + void TestResources(RWStructuredBuffer result, uint ix) { - + #ifndef FALLBACK ByteAddressBuffer rawBuf = ResourceDescriptorHeap[INDEXER(CB.idx0,ix)]; StructuredBuffer structBuf = ResourceDescriptorHeap[INDEXER(CB.idx2,ix)]; @@ -5954,11 +3661,11 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer rwRawBuf = ResourceDescriptorHeap[INDEXER(CB.idx6,ix)]; RWStructuredBuffer rwStructBuf = ResourceDescriptorHeap[INDEXER(CB.idx8,ix)]; RWTexture1D rwTex = ResourceDescriptorHeap[INDEXER(CB.idx10,ix)]; - + static SamplerState g_samp = SamplerDescriptorHeap[INDEXER(CB.idx0, ix)]; static SamplerComparisonState g_sampCmp = SamplerDescriptorHeap[INDEXER(CB.idx2, ix)]; #endif - + #ifdef FALLBACK ByteAddressBuffer rawBuf = g_fallback_rawBuf[INDEXER(0,ix)]; StructuredBuffer structBuf = g_fallback_structBuf[INDEXER(0,ix)]; @@ -5966,7 +3673,7 @@ void MSMain(uint GID : SV_GroupIndex, RWByteAddressBuffer rwRawBuf = g_fallback_rwRawBuf[INDEXER(0,ix)]; RWStructuredBuffer rwStructBuf = g_fallback_rwStructBuf[INDEXER(0,ix)]; RWTexture1D rwTex = g_fallback_rwTex[INDEXER(0,ix)]; - + static SamplerState g_samp = g_fallback_samp[INDEXER(0, ix)]; static SamplerComparisonState g_sampCmp = g_fallback_sampCmp[INDEXER(0, ix)]; #endif @@ -5986,7 +3693,7 @@ void MSMain(uint GID : SV_GroupIndex, void main(uint ix : SV_GroupIndex) { TestResources(g_result, ix); } - + struct PSInput { float4 position : SV_POSITION; }; @@ -5994,16 +3701,16 @@ void MSMain(uint GID : SV_GroupIndex, ROOT_SIG float4 PSMain(PSInput input) : SV_TARGET { int ix = WaveGetLaneIndex(); - TestResources(g_resultPS, ix); + TestResources(g_resultPS, ix); // This output doesn't actually matter return input.position; } - + ROOT_SIG PSInput VSMain(float3 pos : POSITION, uint ix : SV_VertexID) { TestResources(g_resultVS, ix); PSInput r; - r.position = float4(pos, 1); + r.position = float4(pos, 1); return r; } ]]> @@ -6013,22 +3720,10 @@ void MSMain(uint GID : SV_GroupIndex, UAV(u0), UAV(u1) - - + + @@ -6064,16 +3759,10 @@ void MSMain(uint GID : SV_GroupIndex, SRV(t0), UAV(u1) - - + + @@ -6109,23 +3798,11 @@ void MSMain(uint GID : SV_GroupIndex, DescriptorTable(UAV(u0, numDescriptors=2)) - - + + @@ -6133,18 +3810,9 @@ void MSMain(uint GID : SV_GroupIndex, - - + + @@ -6178,19 +3846,12 @@ void MSMain(uint GID : SV_GroupIndex, - DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) + DescriptorTable(SRV(t0, numDescriptors=1), UAV(u0, numDescriptors=1)) - - + + @@ -6198,18 +3859,9 @@ void MSMain(uint GID : SV_GroupIndex, - - + + @@ -6242,29 +3894,17 @@ void MSMain(uint GID : SV_GroupIndex, RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - + + - - + + @@ -6299,29 +3939,16 @@ void MSMain(uint GID : SV_GroupIndex, RootFlags(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) - - + + - - + + @@ -6331,7 +3958,7 @@ void MSMain(uint GID : SV_GroupIndex, vector data; }; #endif - + [numthreads(1,1,1)] void main(uint GI : SV_GroupIndex) { #if USE_STRUCTURED_BUFFER @@ -6355,46 +3982,12 @@ void MSMain(uint GID : SV_GroupIndex, UAV(u0), UAV(u1), UAV(u2), UAV(u3) - - - - + + + + @@ -6405,18 +3998,18 @@ void MSMain(uint GID : SV_GroupIndex, TYPE : The type of the input vector, e.g. float, double, int, uint. OUT_TYPE : The type of the output vector, e.g. float, double, int, uint. In most cases OUT_TYPE == TYPE. - + NUM : The number of elements in the vector, e.g. 2, 3, 4, 8, 16, 32, - + FUNC : Used to expand to the HLSL intrinsic being tested. e.g cos, cosh, abs, etc. OR In some cases FUNC is expanded to a function call to handle special logic, e.g. asuint_splitdouble. OR it is intentionally left empty when testing operators like '+', '-', '*', '/', etc. - + OPERATOR : The operator being tested, e.g. '+', '-', '*', '/', etc. - OR for binary operations for an intrinsic it is expanded + OR for binary operations for an intrinsic it is expanded ','/ OR for unary intrinsics it is expanded to ' ' (empty). OR for ternary intrinsics it is always expanded to ','. @@ -6714,7 +4307,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 0 || WaveGetLaneIndex() == 3) { // Clear LSB on lane 0 and lane 3. Lane 0 isn't in the mask so - // shouldn't participate. Lane 3 is the output lane for this prefix + // shouldn't participate. Lane 3 is the output lane for this prefix // op, so we set distinctive bits to verify it doesn't affect its own result. Vector = Vector & ~((OUT_TYPE)0x1); } @@ -6744,7 +4337,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 1 || WaveGetLaneIndex() == 2 || WaveGetLaneIndex() == 3) { - // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create + // Lanes 1,2,3 (inside the mask): Clear bit 1 (0x2) to create // predictable OR patterns Vector = Vector & ~((OUT_TYPE)0x2); } @@ -6757,7 +4350,7 @@ void MSMain(uint GID : SV_GroupIndex, if(WaveGetLaneIndex() == 3) { - // Lane 3 is the output lane: Set all bits to verify it doesn't + // Lane 3 is the output lane: Set all bits to verify it doesn't // affect its own prefix result (since prefix excludes current lane) Vector = Vector | ~((OUT_TYPE)0x0); } @@ -6815,29 +4408,20 @@ void MSMain(uint GID : SV_GroupIndex, #ifdef FUNC_WAVE_MATCH void TestWaveMatch(vector Vector) { - uint LaneIndex = WaveGetLaneIndex(); - bool ShouldModify = ( LaneIndex == 0 || - LaneIndex == (WAVE_SIZE / 2) || - LaneIndex == (WAVE_SIZE - 1)); - - if(LaneIndex < NUM && ShouldModify) + if(WaveGetLaneIndex() == 0) { - if(Vector[LaneIndex] == (TYPE) 0) - Vector[LaneIndex] = (TYPE) 1; - else if(Vector[LaneIndex] == (TYPE) 1) - Vector[LaneIndex] = (TYPE) 0; + if(Vector[0] == (TYPE)0) + Vector[0] = (TYPE) 1; + else if(Vector[0] == (TYPE)1) + Vector[0] = (TYPE) 0; else - Vector[LaneIndex] = (TYPE) 1; + Vector[0] = (TYPE) 1; } - - // Making sure all lanes finish updating their vectors. - AllMemoryBarrierWithGroupSync(); - uint4 result = WaveMatch(Vector); uint index = WaveGetLaneIndex(); g_OutputVector.Store(index * sizeof(uint4), result); - } + } #endif #ifdef FUNC_TEST_SELECT @@ -6983,14 +4567,14 @@ void MSMain(uint GID : SV_GroupIndex, // index array from being a compile time constant. const uint IndexCount = 6; const uint IndexList[IndexCount] = { - 0, - OutNum - 1, - 1, - OutNum - 2, - OutNum / 2, + 0, + OutNum - 1, + 1, + OutNum - 2, + OutNum / 2, OutNum / 2 + 1 }; - + OutputVector = 0; uint End = min(OutNum, IndexCount); From f958d39ecd2baa78c251d0c67b081d515fb9ef92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Wed, 7 Jan 2026 12:08:08 -0800 Subject: [PATCH 07/14] modify test to account for multiple lanes being modified --- .../unittests/HLSLExec/ShaderOpArith.xml | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 2cfeb1f225..a02e5fb8e8 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4405,23 +4405,32 @@ void MSMain(uint GID : SV_GroupIndex, } #endif - #ifdef FUNC_WAVE_MATCH + #ifdef FUNC_WAVE_MATCH void TestWaveMatch(vector Vector) { - if(WaveGetLaneIndex() == 0) + uint LaneIndex = WaveGetLaneIndex(); + bool ShouldModify = ( LaneIndex == 0 || + LaneIndex == (WAVE_SIZE / 2) || + LaneIndex == (WAVE_SIZE - 1)); + + if(LaneIndex < NUM && ShouldModify) { - if(Vector[0] == (TYPE)0) - Vector[0] = (TYPE) 1; - else if(Vector[0] == (TYPE)1) - Vector[0] = (TYPE) 0; + if(Vector[LaneIndex] == (TYPE) 0) + Vector[LaneIndex] = (TYPE) 1; + else if(Vector[LaneIndex] == (TYPE) 1) + Vector[LaneIndex] = (TYPE) 0; else - Vector[0] = (TYPE) 1; + Vector[LaneIndex] = (TYPE) 1; } + + // Making sure all lanes finish updating their vectors. + AllMemoryBarrierWithGroupSync(); + uint4 result = WaveMatch(Vector); uint index = WaveGetLaneIndex(); g_OutputVector.Store(index * sizeof(uint4), result); - } + } #endif #ifdef FUNC_TEST_SELECT From 4986ef497f9c879b1bb937e8ae15aab93f2c29d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Wed, 7 Jan 2026 12:11:25 -0800 Subject: [PATCH 08/14] fix editor auto messes --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index c54857245f..7da01ad447 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1,5 +1,3 @@ -#include -#include #ifndef NOMINMAX #define NOMINMAX 1 #endif @@ -1617,10 +1615,10 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; uint32_t GetWord(const std::bitset<128> &b, uint32_t wordPos) { - uint32_t v = 0; - for (uint32_t i = 0; i < 32; ++i) - v |= uint32_t(b[wordPos * 32 + i]) << i; - return v; + uint32_t Word = 0; + for (uint32_t I = 0; I < 32; ++I) + Word |= uint32_t(b[wordPos * 32 + I]) << I; + return Word; } void StoreWords(UINT *Dest, std::bitset<128> LanesState) { @@ -1653,7 +1651,7 @@ template struct ExpectedBuilder { std::bitset<128> UnchangedLanes(~0ULL); UnchangedLanes &= (1ULL << WaveSize) - 1; - UnchangedLanes = UnchangedLanes.reset(0).reset(MidLaneID); + UnchangedLanes.reset(0).reset(MidLaneID); if (LastLaneID < VectorSize) UnchangedLanes.reset(LastLaneID); From c7775b7316a8073c4d4f4e499a2a17879418164f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Wed, 7 Jan 2026 12:34:54 -0800 Subject: [PATCH 09/14] fix editor mess --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++-- tools/clang/unittests/HLSLExec/ShaderOpArith.xml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 7da01ad447..96ae691fdb 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1614,10 +1614,10 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; -uint32_t GetWord(const std::bitset<128> &b, uint32_t wordPos) { +uint32_t GetWord(const std::bitset<128> &b, uint32_t WordPos) { uint32_t Word = 0; for (uint32_t I = 0; I < 32; ++I) - Word |= uint32_t(b[wordPos * 32 + I]) << I; + Word |= uint32_t(b[WordPos * 32 + I]) << I; return Word; } diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index a02e5fb8e8..d7c48749a6 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4405,7 +4405,7 @@ void MSMain(uint GID : SV_GroupIndex, } #endif - #ifdef FUNC_WAVE_MATCH + #ifdef FUNC_WAVE_MATCH void TestWaveMatch(vector Vector) { uint LaneIndex = WaveGetLaneIndex(); @@ -4430,7 +4430,7 @@ void MSMain(uint GID : SV_GroupIndex, uint index = WaveGetLaneIndex(); g_OutputVector.Store(index * sizeof(uint4), result); - } + } #endif #ifdef FUNC_TEST_SELECT From a2376ec0b929e13e36aa8bbaa198a073be207842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Thu, 8 Jan 2026 14:26:47 -0800 Subject: [PATCH 10/14] addres alex's comments --- .../clang/unittests/HLSLExec/LongVectors.cpp | 58 ++++++++++--------- .../unittests/HLSLExec/ShaderOpArith.xml | 4 +- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 96ae691fdb..3d5152e885 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1614,18 +1614,26 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; -uint32_t GetWord(const std::bitset<128> &b, uint32_t WordPos) { - uint32_t Word = 0; - for (uint32_t I = 0; I < 32; ++I) - Word |= uint32_t(b[WordPos * 32 + I]) << I; - return Word; +static constexpr std::bitset<128> ComputeWaveMask(UINT NumWaves) { + return (NumWaves < 64) ? (1ULL << NumWaves) - 1 : ~0UL; } -void StoreWords(UINT *Dest, std::bitset<128> LanesState) { - Dest[0] = GetWord(LanesState, 0); - Dest[1] = GetWord(LanesState, 1); - Dest[2] = GetWord(LanesState, 2); - Dest[3] = GetWord(LanesState, 3); +void StoreWords(UINT *Dest, const std::bitset<128> &LanesState, + const UINT WaveSize) { + + const UINT LowWaves = std::min(64U, WaveSize); + const UINT HighWaves = WaveSize - LowWaves; + const std::bitset<128> LowWaveMask = ComputeWaveMask(LowWaves); + const std::bitset<128> HighWaveMask = ComputeWaveMask(HighWaves); + + const uint64_t LowActiveLanes = (LanesState & LowWaveMask).to_ullong(); + const uint64_t HighActiveLanes = + ((LanesState >> 64) & HighWaveMask).to_ullong(); + + Dest[0] = static_cast(LowActiveLanes); + Dest[1] = static_cast(LowActiveLanes << 32); + Dest[2] = static_cast(HighActiveLanes); + Dest[3] = static_cast(HighActiveLanes << 32); } template struct ExpectedBuilder { @@ -1638,35 +1646,29 @@ template struct ExpectedBuilder { // respectively, if the input vector has enough elements. Besides that all // other lanes write their result of WaveMatch as well. DXASSERT_NOMSG(Inputs.size() == 1); - + const UINT VectorSize = static_cast(Inputs[0].size()); std::vector Expected; Expected.assign(WaveSize * 4, 0); - - const size_t VectorSize = Inputs[0].size(); - - Expected.assign(WaveSize * 4, 0); - const UINT MidLaneID = WaveSize / 2; - const UINT LastLaneID = WaveSize - 1; - - std::bitset<128> UnchangedLanes(~0ULL); - UnchangedLanes &= (1ULL << WaveSize) - 1; - UnchangedLanes.reset(0).reset(MidLaneID); + const UINT LastLaneID = std::min(WaveSize - 1, VectorSize - 1); - if (LastLaneID < VectorSize) - UnchangedLanes.reset(LastLaneID); + std::bitset<128> UnchangedLanes; + for (UINT I = 0; I < WaveSize; ++I) + UnchangedLanes.set(I); + UnchangedLanes.reset(0); + UnchangedLanes.reset(MidLaneID); + UnchangedLanes.reset(LastLaneID); for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { const UINT Index = LaneID * 4; - if (LaneID == 0 || LaneID == MidLaneID || - (LastLaneID < VectorSize && LaneID == LastLaneID)) { + if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { std::bitset<128> ChangedLanes(0); - ChangedLanes = ChangedLanes.set(LaneID); - StoreWords(&Expected[Index], ChangedLanes); + ChangedLanes.set(LaneID); + StoreWords(&Expected[Index], ChangedLanes, WaveSize); continue; } - StoreWords(&Expected[Index], UnchangedLanes); + StoreWords(&Expected[Index], UnchangedLanes, WaveSize); } return Expected; diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index d7c48749a6..4b95c7580b 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4411,9 +4411,9 @@ void MSMain(uint GID : SV_GroupIndex, uint LaneIndex = WaveGetLaneIndex(); bool ShouldModify = ( LaneIndex == 0 || LaneIndex == (WAVE_SIZE / 2) || - LaneIndex == (WAVE_SIZE - 1)); + LaneIndex == min(WAVE_SIZE - 1, NUM - 1)); - if(LaneIndex < NUM && ShouldModify) + if(ShouldModify) { if(Vector[LaneIndex] == (TYPE) 0) Vector[LaneIndex] = (TYPE) 1; From 4354caae6d6db344899b392ff40d56744b4ddbcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Thu, 8 Jan 2026 14:32:27 -0800 Subject: [PATCH 11/14] capilalize variables in Shadeer --- tools/clang/unittests/HLSLExec/ShaderOpArith.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 4b95c7580b..5c8fbacdb2 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4426,10 +4426,10 @@ void MSMain(uint GID : SV_GroupIndex, // Making sure all lanes finish updating their vectors. AllMemoryBarrierWithGroupSync(); - uint4 result = WaveMatch(Vector); - uint index = WaveGetLaneIndex(); + uint4 Result = WaveMatch(Vector); + uint Index = WaveGetLaneIndex(); - g_OutputVector.Store(index * sizeof(uint4), result); + g_OutputVector.Store(Index * sizeof(uint4), Result); } #endif From dd8e4e958fee7a571e1a3c07c49353469db27af5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Fri, 9 Jan 2026 11:59:58 -0800 Subject: [PATCH 12/14] fix naming and improve encapsulation --- .../clang/unittests/HLSLExec/LongVectors.cpp | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 3d5152e885..ccf5b26fd0 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1618,23 +1618,34 @@ static constexpr std::bitset<128> ComputeWaveMask(UINT NumWaves) { return (NumWaves < 64) ? (1ULL << NumWaves) - 1 : ~0UL; } -void StoreWords(UINT *Dest, const std::bitset<128> &LanesState, - const UINT WaveSize) { - - const UINT LowWaves = std::min(64U, WaveSize); - const UINT HighWaves = WaveSize - LowWaves; - const std::bitset<128> LowWaveMask = ComputeWaveMask(LowWaves); - const std::bitset<128> HighWaveMask = ComputeWaveMask(HighWaves); - - const uint64_t LowActiveLanes = (LanesState & LowWaveMask).to_ullong(); - const uint64_t HighActiveLanes = - ((LanesState >> 64) & HighWaveMask).to_ullong(); - - Dest[0] = static_cast(LowActiveLanes); - Dest[1] = static_cast(LowActiveLanes << 32); - Dest[2] = static_cast(HighActiveLanes); - Dest[3] = static_cast(HighActiveLanes << 32); -} +struct WaveMatchExpectedResultWritter { +private: + UINT LowWaves; + UINT HighWaves; + std::bitset<128> LowWaveMask; + std::bitset<128> HighWaveMask; + +public: + WaveMatchExpectedResultWritter(UINT WaveSize) { + LowWaves = std::min(64U, WaveSize); + HighWaves = WaveSize - LowWaves; + LowWaveMask = ComputeWaveMask(LowWaves); + HighWaveMask = ComputeWaveMask(HighWaves); + } + + void WriteExpectedValueForLane(UINT *Dest, const UINT Lane, + const std::bitset<128> &LanesState) { + const uint64_t LowActiveLanes = (LanesState & LowWaveMask).to_ullong(); + const uint64_t HighActiveLanes = + ((LanesState >> 64) & HighWaveMask).to_ullong(); + + const UINT LaneIndex = 4 * Lane; + Dest[LaneIndex + 0] = static_cast(LowActiveLanes); + Dest[LaneIndex + 1] = static_cast(LowActiveLanes << 32); + Dest[LaneIndex + 2] = static_cast(HighActiveLanes); + Dest[LaneIndex + 3] = static_cast(HighActiveLanes << 32); + } +}; template struct ExpectedBuilder { static std::vector buildExpected(Op &, @@ -1642,33 +1653,38 @@ template struct ExpectedBuilder { const UINT WaveSize) { // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and // WAVE_SIZE-1 are different from all the other lanes, also those - // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1 - // respectively, if the input vector has enough elements. Besides that all - // other lanes write their result of WaveMatch as well. + // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1. + // Besides that all other lanes write their result of WaveMatch as well. DXASSERT_NOMSG(Inputs.size() == 1); + const UINT VectorSize = static_cast(Inputs[0].size()); std::vector Expected; + WaveMatchExpectedResultWritter ExpectedWritter(WaveSize); Expected.assign(WaveSize * 4, 0); + const UINT MidLaneID = WaveSize / 2; const UINT LastLaneID = std::min(WaveSize - 1, VectorSize - 1); - std::bitset<128> UnchangedLanes; + // Use a std::bitset<128> to represent the uint4 returned by WaveMatch as + // its convenient this way in c++ + std::bitset<128> DefaultExpectedValue; + for (UINT I = 0; I < WaveSize; ++I) - UnchangedLanes.set(I); - UnchangedLanes.reset(0); - UnchangedLanes.reset(MidLaneID); - UnchangedLanes.reset(LastLaneID); + DefaultExpectedValue.set(I); + DefaultExpectedValue.reset(0); + DefaultExpectedValue.reset(MidLaneID); + DefaultExpectedValue.reset(LastLaneID); for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) { - const UINT Index = LaneID * 4; - if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) { - std::bitset<128> ChangedLanes(0); - ChangedLanes.set(LaneID); - StoreWords(&Expected[Index], ChangedLanes, WaveSize); + std::bitset<128> ExpectedValue(0); + ExpectedValue.set(LaneID); + ExpectedWritter.WriteExpectedValueForLane(Expected.data(), LaneID, + ExpectedValue); continue; } - StoreWords(&Expected[Index], UnchangedLanes, WaveSize); + ExpectedWritter.WriteExpectedValueForLane(Expected.data(), LaneID, + DefaultExpectedValue); } return Expected; From 5c17f0cee1f288a7768527355b00fe7a20ac7032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Fri, 9 Jan 2026 15:49:43 -0800 Subject: [PATCH 13/14] fix bug in high wave logic --- .../clang/unittests/HLSLExec/LongVectors.cpp | 35 ++++++++----------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index ccf5b26fd0..ae51b7159f 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1620,30 +1620,24 @@ static constexpr std::bitset<128> ComputeWaveMask(UINT NumWaves) { struct WaveMatchExpectedResultWritter { private: - UINT LowWaves; - UINT HighWaves; std::bitset<128> LowWaveMask; - std::bitset<128> HighWaveMask; public: WaveMatchExpectedResultWritter(UINT WaveSize) { - LowWaves = std::min(64U, WaveSize); - HighWaves = WaveSize - LowWaves; + const UINT LowWaves = std::min(64U, WaveSize); LowWaveMask = ComputeWaveMask(LowWaves); - HighWaveMask = ComputeWaveMask(HighWaves); } - void WriteExpectedValueForLane(UINT *Dest, const UINT Lane, - const std::bitset<128> &LanesState) { - const uint64_t LowActiveLanes = (LanesState & LowWaveMask).to_ullong(); - const uint64_t HighActiveLanes = - ((LanesState >> 64) & HighWaveMask).to_ullong(); - - const UINT LaneIndex = 4 * Lane; - Dest[LaneIndex + 0] = static_cast(LowActiveLanes); - Dest[LaneIndex + 1] = static_cast(LowActiveLanes << 32); - Dest[LaneIndex + 2] = static_cast(HighActiveLanes); - Dest[LaneIndex + 3] = static_cast(HighActiveLanes << 32); + void WriteExpectedValueForLane(UINT *Dest, const UINT LaneID, + const std::bitset<128> &ExpectedValue) { + const uint64_t LowActiveLanes = (ExpectedValue & LowWaveMask).to_ullong(); + const uint64_t HighActiveLanes = (ExpectedValue >> 64).to_ullong(); + + const UINT I = 4 * LaneID; + Dest[I + 0] = static_cast(LowActiveLanes); + Dest[I + 1] = static_cast(LowActiveLanes >> 32); + Dest[I + 2] = static_cast(HighActiveLanes); + Dest[I + 3] = static_cast(HighActiveLanes >> 32); } }; @@ -1651,10 +1645,9 @@ template struct ExpectedBuilder { static std::vector buildExpected(Op &, const InputSets &Inputs, const UINT WaveSize) { - // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and - // WAVE_SIZE-1 are different from all the other lanes, also those - // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1. - // Besides that all other lanes write their result of WaveMatch as well. + // This test, sets lanes (0, WAVE_SIZE/2, and min(WAVE_SIZE-1, + // VECTOR_SIZE-1)) to unique values and has them modify the vector at their + // respective indices. Remaining lanes remain unchanged. DXASSERT_NOMSG(Inputs.size() == 1); const UINT VectorSize = static_cast(Inputs[0].size()); From 72f1548a81fcea5286e0eda9d1026969d3d3d83f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Saffran?= Date: Fri, 9 Jan 2026 23:14:56 -0800 Subject: [PATCH 14/14] remove reduntant functions --- tools/clang/unittests/HLSLExec/LongVectors.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index ae51b7159f..f95f43c0f0 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -1614,10 +1614,6 @@ template T waveMultiPrefixProduct(T A, UINT) { template struct Op : StrictValidation {}; -static constexpr std::bitset<128> ComputeWaveMask(UINT NumWaves) { - return (NumWaves < 64) ? (1ULL << NumWaves) - 1 : ~0UL; -} - struct WaveMatchExpectedResultWritter { private: std::bitset<128> LowWaveMask; @@ -1625,19 +1621,19 @@ struct WaveMatchExpectedResultWritter { public: WaveMatchExpectedResultWritter(UINT WaveSize) { const UINT LowWaves = std::min(64U, WaveSize); - LowWaveMask = ComputeWaveMask(LowWaves); + LowWaveMask = (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; } void WriteExpectedValueForLane(UINT *Dest, const UINT LaneID, const std::bitset<128> &ExpectedValue) { - const uint64_t LowActiveLanes = (ExpectedValue & LowWaveMask).to_ullong(); - const uint64_t HighActiveLanes = (ExpectedValue >> 64).to_ullong(); + const uint64_t Lo = (ExpectedValue & LowWaveMask).to_ullong(); + const uint64_t Hi = (ExpectedValue >> 64).to_ullong(); const UINT I = 4 * LaneID; - Dest[I + 0] = static_cast(LowActiveLanes); - Dest[I + 1] = static_cast(LowActiveLanes >> 32); - Dest[I + 2] = static_cast(HighActiveLanes); - Dest[I + 3] = static_cast(HighActiveLanes >> 32); + Dest[I + 0] = static_cast(Lo); + Dest[I + 1] = static_cast(Lo >> 32); + Dest[I + 2] = static_cast(Hi); + Dest[I + 3] = static_cast(Hi >> 32); } };