From f296f53baf9e1bf582e46c90109fdd0462fba6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1pai=20D=C3=A9nes?= Date: Fri, 2 Jan 2026 13:06:49 +0100 Subject: [PATCH] Use Int in Int - Int comparison --- problems/p30/p30.mojo | 6 +++--- problems/p31/p31.mojo | 6 +++--- problems/p32/p32.mojo | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/problems/p30/p30.mojo b/problems/p30/p30.mojo index 63c07e2c..b18956e1 100644 --- a/problems/p30/p30.mojo +++ b/problems/p30/p30.mojo @@ -24,7 +24,7 @@ fn kernel1[ b: LayoutTensor[dtype, layout, ImmutAnyOrigin], size: Int, ): - i = block_dim.x * block_idx.x + thread_idx.x + i = Int(block_dim.x * block_idx.x + thread_idx.x) if i < size: output[i] = a[i] + b[i] @@ -41,7 +41,7 @@ fn kernel2[ b: LayoutTensor[dtype, layout, ImmutAnyOrigin], size: Int, ): - tid = block_idx.x * block_dim.x + thread_idx.x + tid = Int(block_idx.x * block_dim.x + thread_idx.x) stride = 512 i = tid @@ -62,7 +62,7 @@ fn kernel3[ b: LayoutTensor[dtype, layout, ImmutAnyOrigin], size: Int, ): - tid = block_idx.x * block_dim.x + thread_idx.x + tid = Int(block_idx.x * block_dim.x + thread_idx.x) total_threads = (SIZE // 1024) * 1024 for step in range(0, size, total_threads): diff --git a/problems/p31/p31.mojo b/problems/p31/p31.mojo index 01455356..b155d930 100644 --- a/problems/p31/p31.mojo +++ b/problems/p31/p31.mojo @@ -24,7 +24,7 @@ fn minimal_kernel[ size: Int, ): """Minimal SAXPY kernel - simple and register-light for high occupancy.""" - i = block_dim.x * block_idx.x + thread_idx.x + i = Int(block_dim.x * block_idx.x + thread_idx.x) if i < size: # Direct computation: y[i] = alpha * x[i] + y[i] # Uses minimal registers (~8), no shared memory @@ -53,7 +53,7 @@ fn sophisticated_kernel[ address_space = AddressSpace.SHARED, ].stack_allocation() # 48KB - i = block_dim.x * block_idx.x + thread_idx.x + i = Int(block_dim.x * block_idx.x + thread_idx.x) local_i = thread_idx.x if i < size: @@ -150,7 +150,7 @@ fn balanced_kernel[ address_space = AddressSpace.SHARED, ].stack_allocation() # 16KB total - i = block_dim.x * block_idx.x + thread_idx.x + i = Int(block_dim.x * block_idx.x + thread_idx.x) local_i = thread_idx.x if i < size: diff --git a/problems/p32/p32.mojo b/problems/p32/p32.mojo index 1db251e5..452876bb 100644 --- a/problems/p32/p32.mojo +++ b/problems/p32/p32.mojo @@ -36,7 +36,7 @@ fn no_conflict_kernel[ address_space = AddressSpace.SHARED, ].stack_allocation() - global_i = block_dim.x * block_idx.x + thread_idx.x + global_i = Int(block_dim.x * block_idx.x + thread_idx.x) local_i = thread_idx.x # Load from global memory to shared memory - no conflicts @@ -79,7 +79,7 @@ fn two_way_conflict_kernel[ address_space = AddressSpace.SHARED, ].stack_allocation() - global_i = block_dim.x * block_idx.x + thread_idx.x + global_i = Int(block_dim.x * block_idx.x + thread_idx.x) local_i = thread_idx.x # CONFLICT: stride-2 access creates 2-way bank conflicts