Conversation
Contributor
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/cl/device.jl b/lib/cl/device.jl
index 64bb1f9..cdfa4d5 100644
--- a/lib/cl/device.jl
+++ b/lib/cl/device.jl
@@ -315,7 +315,7 @@ function sub_group_size(d::Device)
end
end
function sub_group_shuffle_supported_types(d::Device)
- if "cl_khr_subgroup_shuffle" in d.extensions
+ return if "cl_khr_subgroup_shuffle" in d.extensions
res = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32]
"cl_khr_fp16" in d.extensions && push!(res, Float16)
"cl_khr_fp64" in d.extensions && push!(res, Float64)
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index dbef9fd..a3c9b4d 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -20,7 +20,7 @@ const simd_ns = (Sys.iswindows() && ispocl) ? [3, 4] : [2, 3, 4, 8, 16]
@testset "barrier" begin
-# work-group
+ # work-group
@on_device barrier(OpenCL.LOCAL_MEM_FENCE)
@on_device barrier(OpenCL.GLOBAL_MEM_FENCE)
@on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
@@ -39,10 +39,10 @@ const simd_ns = (Sys.iswindows() && ispocl) ? [3, 4] : [2, 3, 4, 8, 16]
cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_all_svm_devices)
@on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_sub_group)
-# sub-group
-@on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE)
-@on_device sub_group_barrier(OpenCL.GLOBAL_MEM_FENCE)
-@on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
+ # sub-group
+ @on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE)
+ @on_device sub_group_barrier(OpenCL.GLOBAL_MEM_FENCE)
+ @on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
end
@testset "mem_fence" begin
@@ -171,102 +171,102 @@ end
@test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
end
-if cl.sub_groups_supported(cl.device())
-
-struct SubgroupData
- sub_group_size::UInt32
- max_sub_group_size::UInt32
- num_sub_groups::UInt32
- sub_group_id::UInt32
- sub_group_local_id::UInt32
-end
-function test_subgroup_kernel(results)
- i = get_global_id(1)
-
- if i <= length(results)
- @inbounds results[i] = SubgroupData(
- get_sub_group_size(),
- get_max_sub_group_size(),
- get_num_sub_groups(),
- get_sub_group_id(),
- get_sub_group_local_id()
- )
- end
- return
-end
-
-@testset "Sub-groups" begin
- sg_size = cl.sub_group_size(cl.device())
-
- @testset "Indexing intrinsics" begin
- # Test with small kernel
- sg_n = 2
- local_size = sg_size * sg_n
- numworkgroups = 2
- N = local_size * numworkgroups
-
- results = CLVector{SubgroupData}(undef, N)
- kernel = @opencl launch = false test_subgroup_kernel(results)
-
- kernel(results; local_size, global_size=N)
-
- host_results = Array(results)
-
- # Verify results make sense
- for (i, sg_data) in enumerate(host_results)
- @test sg_data.sub_group_size == sg_size
- @test sg_data.max_sub_group_size == sg_size
- @test sg_data.num_sub_groups == sg_n
-
- # Group ID should be 1-based
- expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
- @test sg_data.sub_group_id == expected_sub_group
-
- # Local ID should be 1-based within group
- expected_sg_local = ((i - 1) % sg_size) + 1
- @test sg_data.sub_group_local_id == expected_sg_local
- end
- end
-
- @testset "shuffle idx" begin
- function shfl_idx_kernel(d)
- i = get_sub_group_local_id()
- j = get_sub_group_size() - i + 0x1
-
- d[i] = sub_group_shuffle(d[i], j)
-
- return
- end
-
- @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
- a = rand(T, sg_size)
- d_a = CLArray(a)
- @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
- @test Array(d_a) == reverse(a)
- end
- end
- @testset "shuffle xor" begin
- function shfl_xor_kernel(in)
- i = get_sub_group_local_id()
-
- # val = in[i]
- new_val = sub_group_shuffle_xor(in[i], 1)
-
- in[i] = new_val
- return
- end
-
- # tests that each pair of values a get swapped using sub_group_shuffle_xor
- @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
- in = rand(T, sg_size)
- idxs = xor.(0:(sg_size - 1), 1) .+ 1
- d_in = CLArray(in)
- @opencl local_size = sg_size global_size = sg_size shfl_xor_kernel(d_in)
- @test Array(d_in) == in[idxs]
- end
- end
-end
-end # if cl.sub_groups_supported(cl.device())
+ if cl.sub_groups_supported(cl.device())
+
+ struct SubgroupData
+ sub_group_size::UInt32
+ max_sub_group_size::UInt32
+ num_sub_groups::UInt32
+ sub_group_id::UInt32
+ sub_group_local_id::UInt32
+ end
+ function test_subgroup_kernel(results)
+ i = get_global_id(1)
+
+ if i <= length(results)
+ @inbounds results[i] = SubgroupData(
+ get_sub_group_size(),
+ get_max_sub_group_size(),
+ get_num_sub_groups(),
+ get_sub_group_id(),
+ get_sub_group_local_id()
+ )
+ end
+ return
+ end
+
+ @testset "Sub-groups" begin
+ sg_size = cl.sub_group_size(cl.device())
+
+ @testset "Indexing intrinsics" begin
+ # Test with small kernel
+ sg_n = 2
+ local_size = sg_size * sg_n
+ numworkgroups = 2
+ N = local_size * numworkgroups
+
+ results = CLVector{SubgroupData}(undef, N)
+ kernel = @opencl launch = false test_subgroup_kernel(results)
+
+ kernel(results; local_size, global_size = N)
+
+ host_results = Array(results)
+
+ # Verify results make sense
+ for (i, sg_data) in enumerate(host_results)
+ @test sg_data.sub_group_size == sg_size
+ @test sg_data.max_sub_group_size == sg_size
+ @test sg_data.num_sub_groups == sg_n
+
+ # Group ID should be 1-based
+ expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
+ @test sg_data.sub_group_id == expected_sub_group
+
+ # Local ID should be 1-based within group
+ expected_sg_local = ((i - 1) % sg_size) + 1
+ @test sg_data.sub_group_local_id == expected_sg_local
+ end
+ end
+
+ @testset "shuffle idx" begin
+ function shfl_idx_kernel(d)
+ i = get_sub_group_local_id()
+ j = get_sub_group_size() - i + 0x01
+
+ d[i] = sub_group_shuffle(d[i], j)
+
+ return
+ end
+
+ @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
+ a = rand(T, sg_size)
+ d_a = CLArray(a)
+ @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
+ @test Array(d_a) == reverse(a)
+ end
+ end
+ @testset "shuffle xor" begin
+ function shfl_xor_kernel(in)
+ i = get_sub_group_local_id()
+
+ # val = in[i]
+ new_val = sub_group_shuffle_xor(in[i], 1)
+
+ in[i] = new_val
+ return
+ end
+
+ # tests that each pair of values a get swapped using sub_group_shuffle_xor
+ @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
+ in = rand(T, sg_size)
+ idxs = xor.(0:(sg_size - 1), 1) .+ 1
+ d_in = CLArray(in)
+ @opencl local_size = sg_size global_size = sg_size shfl_xor_kernel(d_in)
+ @test Array(d_in) == in[idxs]
+ end
+ end
+ end
+ end # if cl.sub_groups_supported(cl.device())
@testset "SIMD - $N x $T" for N in simd_ns, T in float_types
# codegen emits i48 here, which SPIR-V doesn't support |
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #413 +/- ##
==========================================
+ Coverage 80.84% 81.02% +0.18%
==========================================
Files 12 12
Lines 736 743 +7
==========================================
+ Hits 595 602 +7
Misses 141 141 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
1ffbb85 to
d540763
Compare
d540763 to
aee71aa
Compare
aee71aa to
ef8e5d2
Compare
c58b3e9 to
92f0e6e
Compare
92f0e6e to
922f426
Compare
Member
simeonschaub
left a comment
There was a problem hiding this comment.
Very nice work! I have a couple of comments, but overall looks great!
simeonschaub
approved these changes
Feb 18, 2026
Member
simeonschaub
left a comment
There was a problem hiding this comment.
Thanks, this looks great from my side! Feel free to merge and tag a new version
This comment was marked as outdated.
This comment was marked as outdated.
c51aa73 to
f7ff4e0
Compare
f7ff4e0 to
0155bd1
Compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Depends on #418
Add initial support for subgroups and subgroup shuffles.