Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 315 additions & 0 deletions test/WaveOps/WaveActiveMax.fp16.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
#--- source.hlsl
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of these HLSL sources appear to be identical except the base type used. Is there any way to reference a shared source file and use compilation arguments, like -D TYPE=half instead?

#define VALUE_SETS 2
#define NUM_MASKS 4
#define NUM_THREADS 4

struct MaskStruct {
int mask[NUM_THREADS];
};

StructuredBuffer<half4> In : register(t0);
RWStructuredBuffer<half> Out1 : register(u1); // test scalar
RWStructuredBuffer<half2> Out2 : register(u2); // test half2
RWStructuredBuffer<half4> Out3 : register(u3); // test half3
RWStructuredBuffer<half4> Out4 : register(u4); // test half4
RWStructuredBuffer<half4> Out5 : register(u5); // constant folding
StructuredBuffer<MaskStruct> Masks : register(t6);


[numthreads(NUM_THREADS,1,1)]
void main(uint3 tid : SV_GroupThreadID)
{
for (uint ValueSet = 0; ValueSet < VALUE_SETS; ValueSet++) {
const uint ValueSetOffset = ValueSet * NUM_MASKS * NUM_THREADS;
for (uint MaskIdx = 0; MaskIdx < NUM_MASKS; MaskIdx++) {
half4 v = In[ValueSet * ValueSetOffset + MaskIdx * NUM_THREADS + tid.x];
const uint OutIdx = ValueSetOffset + MaskIdx * NUM_THREADS + tid.x;
if (Masks[MaskIdx].mask[tid.x]) {
Out1[OutIdx] = WaveActiveMax( v.x );
Out2[OutIdx].xy = WaveActiveMax( v.xy );
Out3[OutIdx].xyz = WaveActiveMax( v.xyz );
Out4[OutIdx] = WaveActiveMax( v );
}
}
}

// constant folding case
Out5[0] = WaveActiveMax(half4(1,2,3,4));
}


//--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]
Buffers:
- Name: In
Format: Float16
Stride: 8
# 2 value sets
# For each value set,
# and for each specific one of the 4 thread masks in that value set,
# and for each of the 4 threads in that thread mask,
# there will be a unique set of 4 values, such that
# none of the other threads in that thread mask share any values
Data: [
0x2000, 0x2200, 0x2400, 0x2800, # <-- Value set 0, thread mask 0, thread id 0 will read these In values
0x2A00, 0x2C00, 0x2E00, 0x3000, # <-- Value set 0, thread mask 0, thread id 1 will read these In values
0x3200, 0x3400, 0x3600, 0x3800,
0x3900, 0x3A00, 0x3B00, 0x3BC0,
0x2200, 0x2400, 0x2800, 0x2A00, # <-- Value set 0, thread mask 1, thread id 0 will read these In values
0x2C00, 0x2E00, 0x3000, 0x3200,
0x3400, 0x3600, 0x3800, 0x3900,
0x3A00, 0x3B00, 0x3BC0, 0x2000,
0x2400, 0x2800, 0x2A00, 0x2C00,
0x2E00, 0x3000, 0x3200, 0x3400,
0x3600, 0x3800, 0x3900, 0x3A00,
0x3B00, 0x3BC0, 0x2000, 0x2200,
0x2800, 0x2A00, 0x2C00, 0x2E00,
0x3000, 0x3200, 0x3400, 0x3600,
0x3800, 0x3900, 0x3A00, 0x3B00,
0x3BC0, 0x2000, 0x2200, 0x2400,
0x2800, 0x2400, 0x2200, 0x2000, # <-- Value set 1, thread mask 0, thread id 0 will read these In values
0x3000, 0x2E00, 0x2C00, 0x2A00,
0x3800, 0x3600, 0x3400, 0x3200,
0x3BC0, 0x3B00, 0x3A00, 0x3900,
0x2A00, 0x2800, 0x2400, 0x2200,
0x3200, 0x3000, 0x2E00, 0x2C00,
0x3900, 0x3800, 0x3600, 0x3400,
0x2000, 0x3BC0, 0x3B00, 0x3A00,
0x2C00, 0x2A00, 0x2800, 0x2400,
0x3400, 0x3200, 0x3000, 0x2E00,
0x3A00, 0x3900, 0x3800, 0x3600,
0x2200, 0x2000, 0x3BC0, 0x3B00,
0x2E00, 0x2C00, 0x2A00, 0x2800,
0x3600, 0x3400, 0x3200, 0x3000,
0x3B00, 0x3A00, 0x3900, 0x3800,
0x2400, 0x2200, 0x2000, 0x3BC0 ]

- Name: Out1
Format: Float16
Stride: 2
# 1 half is 2 bytes, * 4 halves for 4 threads, * 4 thread masks, * 2 value sets
ZeroInitSize: 64
- Name: Out2
Format: Float16
Stride: 4
ZeroInitSize: 128
- Name: Out3
Format: Float16
Stride: 8
ZeroInitSize: 256
- Name: Out4
Format: Float16
Stride: 8
ZeroInitSize: 256
- Name: Out5
Format: Float16
Stride: 8
ZeroInitSize: 8
- Name: Masks
Format: Int32
Stride: 16
# 4 active mask sets for threads 0, 1, 2, 3:
# 0 0 0 0
# 1 1 1 1
# 1 0 0 0
# 0 1 1 0
Data: [
0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0]
- Name: ExpectedOut1
Format: Float16
Stride: 8
# 2 value sets, 4 masks per value set, 4 threads per mask, 1 result value per thread
Data: [ 0x0, 0x0, 0x0, 0x0,
0x3A00, 0x3A00, 0x3A00, 0x3A00,
0x2400, 0x0, 0x0, 0x0,
0x0, 0x3800, 0x3800, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3900, 0x3900, 0x3900, 0x3900,
0x2C00, 0x0, 0x0, 0x0,
0x0, 0x3B00, 0x3B00, 0x0 ]
- Name: ExpectedOut2
Format: Float16
Stride: 8
# 2 value sets, 4 masks per value set, 4 threads per mask, 1 result value per thread
Data: [ 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3A00, 0x3B00, 0x3A00, 0x3B00,
0x3A00, 0x3B00, 0x3A00, 0x3B00,
0x2400, 0x2800, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x3800, 0x3900,
0x3800, 0x3900, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3900, 0x3BC0, 0x3900, 0x3BC0,
0x3900, 0x3BC0, 0x3900, 0x3BC0,
0x2C00, 0x2A00, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x3B00, 0x3A00,
0x3B00, 0x3A00, 0x0, 0x0 ]
- Name: ExpectedOut3
Format: Float16
Stride: 8
# 2 value sets, 4 masks per value set, 4 threads per mask, 4 result values per thread
# Note, vecs of 3 must be aligned, so the 3 result values are placed into a 4 element vec
Data: [ 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3A00, 0x3B00, 0x3BC0, 0x0,
0x3A00, 0x3B00, 0x3BC0, 0x0,
0x3A00, 0x3B00, 0x3BC0, 0x0,
0x3A00, 0x3B00, 0x3BC0, 0x0,
0x2400, 0x2800, 0x2A00, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3800, 0x3900, 0x3A00, 0x0,
0x3800, 0x3900, 0x3A00, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3900, 0x3BC0, 0x3B00, 0x0,
0x3900, 0x3BC0, 0x3B00, 0x0,
0x3900, 0x3BC0, 0x3B00, 0x0,
0x3900, 0x3BC0, 0x3B00, 0x0,
0x2C00, 0x2A00, 0x2800, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3B00, 0x3A00, 0x3900, 0x0,
0x3B00, 0x3A00, 0x3900, 0x0,
0x0, 0x0, 0x0, 0x0 ]
- Name: ExpectedOut4
Format: Float16
Stride: 8
Data: [ 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3A00, 0x3B00, 0x3BC0, 0x3900,
0x3A00, 0x3B00, 0x3BC0, 0x3900,
0x3A00, 0x3B00, 0x3BC0, 0x3900,
0x3A00, 0x3B00, 0x3BC0, 0x3900,
0x2400, 0x2800, 0x2A00, 0x2C00,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3800, 0x3900, 0x3A00, 0x3B00,
0x3800, 0x3900, 0x3A00, 0x3B00,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3900, 0x3BC0, 0x3B00, 0x3A00,
0x3900, 0x3BC0, 0x3B00, 0x3A00,
0x3900, 0x3BC0, 0x3B00, 0x3A00,
0x3900, 0x3BC0, 0x3B00, 0x3A00,
0x2C00, 0x2A00, 0x2800, 0x2400,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x3B00, 0x3A00, 0x3900, 0x3800,
0x3B00, 0x3A00, 0x3900, 0x3800,
0x0, 0x0, 0x0, 0x0 ]
- Name: ExpectedOut5
Format: Float16
Stride: 8
Data: [ 0x3C00, 0x4000, 0x4200, 0x4400 ]
Results:
- Result: ExpectedOut1
Rule: BufferExact
Actual: Out1
Expected: ExpectedOut1
- Result: ExpectedOut2
Rule: BufferExact
Actual: Out2
Expected: ExpectedOut2
- Result: ExpectedOut3
Rule: BufferExact
Actual: Out3
Expected: ExpectedOut3
- Result: ExpectedOut4
Rule: BufferExact
Actual: Out4
Expected: ExpectedOut4
- Result: ExpectedOut5
Rule: BufferExact
Actual: Out5
Expected: ExpectedOut5
DescriptorSets:
- Resources:
- Name: In
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: Out1
Kind: RWStructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: Out2
Kind: RWStructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out3
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
- Name: Out4
Kind: RWStructuredBuffer
DirectXBinding:
Register: 4
Space: 0
VulkanBinding:
Binding: 4
- Name: Out5
Kind: RWStructuredBuffer
DirectXBinding:
Register: 5
Space: 0
VulkanBinding:
Binding: 5
- Name: Masks
Kind: StructuredBuffer
DirectXBinding:
Register: 6
Space: 0
VulkanBinding:
Binding: 6

...
#--- end

# Bug https://github.com/llvm/llvm-project/issues/156775
# XFAIL: Clang

# Bug https://github.com/llvm/offload-test-suite/issues/393
# XFAIL: Metal

# RUN: split-file %s %t
# RUN: %dxc_target -enable-16bit-types -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
Loading
Loading