[OpenCL] Subgroup support by christiangnrd · Pull Request #413 · JuliaGPU/OpenCL.jl

christiangnrd · 2026-01-11T23:47:33Z

Depends on #418
Add initial support for subgroups and subgroup shuffles.

github-actions · 2026-01-11T23:48:30Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/cl/device.jl b/lib/cl/device.jl
index 64bb1f9..cdfa4d5 100644
--- a/lib/cl/device.jl
+++ b/lib/cl/device.jl
@@ -315,7 +315,7 @@ function sub_group_size(d::Device)
     end
 end
 function sub_group_shuffle_supported_types(d::Device)
-    if "cl_khr_subgroup_shuffle" in d.extensions
+    return if "cl_khr_subgroup_shuffle" in d.extensions
         res = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32]
         "cl_khr_fp16" in d.extensions && push!(res, Float16)
         "cl_khr_fp64" in d.extensions && push!(res, Float64)
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index dbef9fd..a3c9b4d 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -20,7 +20,7 @@ const simd_ns = (Sys.iswindows() && ispocl) ? [3, 4] : [2, 3, 4, 8, 16]
 
 @testset "barrier" begin
 
-# work-group
+        # work-group
 @on_device barrier(OpenCL.LOCAL_MEM_FENCE)
 @on_device barrier(OpenCL.GLOBAL_MEM_FENCE)
 @on_device barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
@@ -39,10 +39,10 @@ const simd_ns = (Sys.iswindows() && ispocl) ? [3, 4] : [2, 3, 4, 8, 16]
 cl.memory_backend() isa cl.SVMBackend && @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_all_svm_devices)
 @on_device work_group_barrier(OpenCL.LOCAL_MEM_FENCE, OpenCL.memory_scope_sub_group)
 
-# sub-group
-@on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE)
-@on_device sub_group_barrier(OpenCL.GLOBAL_MEM_FENCE)
-@on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
+        # sub-group
+        @on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE)
+        @on_device sub_group_barrier(OpenCL.GLOBAL_MEM_FENCE)
+        @on_device sub_group_barrier(OpenCL.LOCAL_MEM_FENCE | OpenCL.GLOBAL_MEM_FENCE)
 end
 
 @testset "mem_fence" begin
@@ -171,102 +171,102 @@ end
     @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
 end
 
-if cl.sub_groups_supported(cl.device())
-
-struct SubgroupData
-    sub_group_size::UInt32
-    max_sub_group_size::UInt32
-    num_sub_groups::UInt32
-    sub_group_id::UInt32
-    sub_group_local_id::UInt32
-end
-function test_subgroup_kernel(results)
-    i = get_global_id(1)
-
-    if i <= length(results)
-        @inbounds results[i] = SubgroupData(
-            get_sub_group_size(),
-            get_max_sub_group_size(),
-            get_num_sub_groups(),
-            get_sub_group_id(),
-            get_sub_group_local_id()
-        )
-    end
-    return
-end
-
-@testset "Sub-groups" begin
-    sg_size = cl.sub_group_size(cl.device())
-
-    @testset "Indexing intrinsics" begin
-        # Test with small kernel
-        sg_n = 2
-        local_size = sg_size * sg_n
-        numworkgroups = 2
-        N = local_size * numworkgroups
-
-        results = CLVector{SubgroupData}(undef, N)
-        kernel = @opencl launch = false test_subgroup_kernel(results)
-
-        kernel(results; local_size, global_size=N)
-
-        host_results = Array(results)
-
-        # Verify results make sense
-        for (i, sg_data) in enumerate(host_results)
-            @test sg_data.sub_group_size == sg_size
-            @test sg_data.max_sub_group_size == sg_size
-            @test sg_data.num_sub_groups == sg_n
-
-            # Group ID should be 1-based
-            expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
-            @test sg_data.sub_group_id == expected_sub_group
-
-            # Local ID should be 1-based within group
-            expected_sg_local = ((i - 1) % sg_size) + 1
-            @test sg_data.sub_group_local_id == expected_sg_local
-        end
-    end
-
-    @testset "shuffle idx" begin
-        function shfl_idx_kernel(d)
-            i = get_sub_group_local_id()
-            j = get_sub_group_size() - i + 0x1
-
-            d[i] = sub_group_shuffle(d[i], j)
-
-            return
-        end
-
-        @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
-            a = rand(T, sg_size)
-            d_a = CLArray(a)
-            @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
-            @test Array(d_a) == reverse(a)
-        end
-    end
-    @testset "shuffle xor" begin
-        function shfl_xor_kernel(in)
-            i = get_sub_group_local_id()
-
-            # val = in[i]
-            new_val = sub_group_shuffle_xor(in[i], 1)
-
-            in[i] = new_val
-            return
-        end
-
-        # tests that each pair of values a get swapped using sub_group_shuffle_xor
-        @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
-            in = rand(T, sg_size)
-            idxs = xor.(0:(sg_size - 1), 1) .+ 1
-            d_in = CLArray(in)
-            @opencl local_size = sg_size global_size = sg_size shfl_xor_kernel(d_in)
-            @test Array(d_in) == in[idxs]
-        end
-    end
-end
-end # if cl.sub_groups_supported(cl.device())
+        if cl.sub_groups_supported(cl.device())
+
+            struct SubgroupData
+                sub_group_size::UInt32
+                max_sub_group_size::UInt32
+                num_sub_groups::UInt32
+                sub_group_id::UInt32
+                sub_group_local_id::UInt32
+            end
+            function test_subgroup_kernel(results)
+                i = get_global_id(1)
+
+                if i <= length(results)
+                    @inbounds results[i] = SubgroupData(
+                        get_sub_group_size(),
+                        get_max_sub_group_size(),
+                        get_num_sub_groups(),
+                        get_sub_group_id(),
+                        get_sub_group_local_id()
+                    )
+                end
+                return
+            end
+
+            @testset "Sub-groups" begin
+                sg_size = cl.sub_group_size(cl.device())
+
+                @testset "Indexing intrinsics" begin
+                    # Test with small kernel
+                    sg_n = 2
+                    local_size = sg_size * sg_n
+                    numworkgroups = 2
+                    N = local_size * numworkgroups
+
+                    results = CLVector{SubgroupData}(undef, N)
+                    kernel = @opencl launch = false test_subgroup_kernel(results)
+
+                    kernel(results; local_size, global_size = N)
+
+                    host_results = Array(results)
+
+                    # Verify results make sense
+                    for (i, sg_data) in enumerate(host_results)
+                        @test sg_data.sub_group_size == sg_size
+                        @test sg_data.max_sub_group_size == sg_size
+                        @test sg_data.num_sub_groups == sg_n
+
+                        # Group ID should be 1-based
+                        expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
+                        @test sg_data.sub_group_id == expected_sub_group
+
+                        # Local ID should be 1-based within group
+                        expected_sg_local = ((i - 1) % sg_size) + 1
+                        @test sg_data.sub_group_local_id == expected_sg_local
+                    end
+                end
+
+                @testset "shuffle idx" begin
+                    function shfl_idx_kernel(d)
+                        i = get_sub_group_local_id()
+                        j = get_sub_group_size() - i + 0x01
+
+                        d[i] = sub_group_shuffle(d[i], j)
+
+                        return
+                    end
+
+                    @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
+                        a = rand(T, sg_size)
+                        d_a = CLArray(a)
+                        @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
+                        @test Array(d_a) == reverse(a)
+                    end
+                end
+                @testset "shuffle xor" begin
+                    function shfl_xor_kernel(in)
+                        i = get_sub_group_local_id()
+
+                        # val = in[i]
+                        new_val = sub_group_shuffle_xor(in[i], 1)
+
+                        in[i] = new_val
+                        return
+                    end
+
+                    # tests that each pair of values a get swapped using sub_group_shuffle_xor
+                    @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
+                        in = rand(T, sg_size)
+                        idxs = xor.(0:(sg_size - 1), 1) .+ 1
+                        d_in = CLArray(in)
+                        @opencl local_size = sg_size global_size = sg_size shfl_xor_kernel(d_in)
+                        @test Array(d_in) == in[idxs]
+                    end
+                end
+            end
+        end # if cl.sub_groups_supported(cl.device())
 
 @testset "SIMD - $N x $T" for N in simd_ns, T in float_types
     # codegen emits i48 here, which SPIR-V doesn't support

codecov · 2026-01-12T00:03:45Z

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 81.02%. Comparing base (d476f50) to head (0155bd1).
⚠️ Report is 1 commits behind head on master.

Additional details and impacted files

@@            Coverage Diff             @@
##           master     #413      +/-   ##
==========================================
+ Coverage   80.84%   81.02%   +0.18%     
==========================================
  Files          12       12              
  Lines         736      743       +7     
==========================================
+ Hits          595      602       +7     
  Misses        141      141

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

simeonschaub

Very nice work! I have a couple of comments, but overall looks great!

src/compiler/compilation.jl

lib/intrinsics/src/shuffle.jl

lib/intrinsics/src/synchronization.jl

lib/intrinsics/Project.toml

simeonschaub

Thanks, this looks great from my side! Feel free to merge and tag a new version

test/intrinsics.jl

christiangnrd marked this pull request as draft January 15, 2026 20:54

christiangnrd force-pushed the clsubgroups branch from 1ffbb85 to d540763 Compare January 16, 2026 01:55

christiangnrd marked this pull request as ready for review January 16, 2026 03:00

VarLad mentioned this pull request Jan 18, 2026

Support subgroups #284

Open

christiangnrd force-pushed the clsubgroups branch from d540763 to aee71aa Compare January 18, 2026 19:53

christiangnrd requested a review from simeonschaub January 19, 2026 01:47

christiangnrd force-pushed the clsubgroups branch from aee71aa to ef8e5d2 Compare January 22, 2026 14:52

christiangnrd force-pushed the clsubgroups branch 2 times, most recently from c58b3e9 to 92f0e6e Compare February 1, 2026 00:44

christiangnrd force-pushed the clsubgroups branch from 92f0e6e to 922f426 Compare February 13, 2026 23:00

christiangnrd changed the title ~~Subgroup support~~ [OpenCL] Subgroup support Feb 13, 2026

christiangnrd mentioned this pull request Feb 13, 2026

[SPIRVIntrinsics] Subgroup shuffle and barrier intrinsics #418

Closed

simeonschaub reviewed Feb 16, 2026

View reviewed changes

src/compiler/compilation.jl Outdated Show resolved Hide resolved

lib/intrinsics/src/shuffle.jl Show resolved Hide resolved

lib/intrinsics/src/synchronization.jl Show resolved Hide resolved

lib/intrinsics/Project.toml Show resolved Hide resolved

christiangnrd requested a review from simeonschaub February 18, 2026 15:17

simeonschaub approved these changes Feb 18, 2026

View reviewed changes

test/intrinsics.jl Outdated Show resolved Hide resolved

This comment was marked as outdated.

Sign in to view

christiangnrd force-pushed the clsubgroups branch from c51aa73 to f7ff4e0 Compare February 18, 2026 18:33

christiangnrd enabled auto-merge (rebase) February 18, 2026 18:33

christiangnrd mentioned this pull request Feb 18, 2026

[SPIRVIntrinsics] Sub-group shuffle & barrier #420

Merged

[OpenCL] Sub-group shuffle

0155bd1

christiangnrd force-pushed the clsubgroups branch from f7ff4e0 to 0155bd1 Compare February 18, 2026 21:47

christiangnrd disabled auto-merge February 18, 2026 21:54

christiangnrd merged commit bea5703 into master Feb 18, 2026
37 of 39 checks passed

christiangnrd deleted the clsubgroups branch February 18, 2026 22:17

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[OpenCL] Subgroup support#413

[OpenCL] Subgroup support#413
christiangnrd merged 1 commit intomasterfrom
clsubgroups

christiangnrd commented Jan 11, 2026 •

edited

Loading

Uh oh!

github-actions bot commented Jan 11, 2026 •

edited

Loading

Uh oh!

codecov bot commented Jan 12, 2026 •

edited

Loading

Uh oh!

simeonschaub left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

simeonschaub left a comment

Uh oh!

Uh oh!

This comment was marked as outdated.

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Comments

Conversation

christiangnrd commented Jan 11, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Jan 11, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

codecov bot commented Jan 12, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

simeonschaub left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

simeonschaub left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

This comment was marked as outdated.

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Comments

christiangnrd commented Jan 11, 2026 •

edited

Loading

github-actions bot commented Jan 11, 2026 •

edited

Loading

codecov bot commented Jan 12, 2026 •

edited

Loading