From cb5d65b30e560eb020177bd5cc875455bd525eb6 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sun, 18 Jan 2026 15:52:19 -0400 Subject: [PATCH 1/2] Sub-group shuffle Co-Authored-By: Simeon David Schaub --- lib/intrinsics/src/SPIRVIntrinsics.jl | 1 + lib/intrinsics/src/shuffle.jl | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 lib/intrinsics/src/shuffle.jl diff --git a/lib/intrinsics/src/SPIRVIntrinsics.jl b/lib/intrinsics/src/SPIRVIntrinsics.jl index bd15fdd9..b2bca59d 100644 --- a/lib/intrinsics/src/SPIRVIntrinsics.jl +++ b/lib/intrinsics/src/SPIRVIntrinsics.jl @@ -23,6 +23,7 @@ include("printf.jl") include("math.jl") include("integer.jl") include("atomic.jl") +include("shuffle.jl") # helper macro to import all names from this package, even non-exported ones. macro import_all() diff --git a/lib/intrinsics/src/shuffle.jl b/lib/intrinsics/src/shuffle.jl new file mode 100644 index 00000000..4f8a0aa2 --- /dev/null +++ b/lib/intrinsics/src/shuffle.jl @@ -0,0 +1,11 @@ +export sub_group_shuffle, sub_group_shuffle_xor + +const gentypes = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float16, Float32, Float64] + +for gentype in gentypes + @eval begin + # cl_khr_subgroup_shuffle extension operations + @device_function sub_group_shuffle(x::$gentype, i::Integer) = @builtin_ccall("sub_group_shuffle", $gentype, ($gentype, Int32), x, i % Int32 - 1i32) + @device_function sub_group_shuffle_xor(x::$gentype, mask::Integer) = @builtin_ccall("sub_group_shuffle_xor", $gentype, ($gentype, Int32), x, mask % Int32) + end +end From 48905ce547f1340fe028b69595e01f2d8f9246ee Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sun, 18 Jan 2026 15:52:49 -0400 Subject: [PATCH 2/2] Sub-group barrier --- lib/intrinsics/Project.toml | 2 +- lib/intrinsics/src/synchronization.jl | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml index 338bb329..5f2dafaa 100644 --- a/lib/intrinsics/Project.toml +++ b/lib/intrinsics/Project.toml @@ -1,7 +1,7 @@ name = "SPIRVIntrinsics" uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8" authors = ["Tim Besard "] -version = "0.5.6" +version = "0.5.7" [deps] ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" diff --git a/lib/intrinsics/src/synchronization.jl b/lib/intrinsics/src/synchronization.jl index eab1349a..7d88b69f 100644 --- a/lib/intrinsics/src/synchronization.jl +++ b/lib/intrinsics/src/synchronization.jl @@ -149,10 +149,14 @@ write_mem_fence(flags) = atomic_work_item_fence(flags, memory_order_release, mem ## OpenCL execution barriers -export barrier, work_group_barrier +export barrier, work_group_barrier, sub_group_barrier @inline work_group_barrier(flags, scope = memory_scope_work_group) = control_barrier(Scope.Workgroup, cl_scope_to_spirv(scope), MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags)) +@inline sub_group_barrier(flags, scope = memory_scope_sub_group) = + control_barrier(Scope.Subgroup, cl_scope_to_spirv(scope), + MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags)) + barrier(flags) = work_group_barrier(flags)