diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 639dc50..baa39d2 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -19,4 +19,3 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} ssh: ${{ secrets.DOCUMENTER_KEY }} dispatch: true - changelog: false diff --git a/docs/src/features/bit-arrays.md b/docs/src/features/bit-arrays.md index ec21d39..61519b9 100644 --- a/docs/src/features/bit-arrays.md +++ b/docs/src/features/bit-arrays.md @@ -1,6 +1,6 @@ -# BitVector Support +# BitArray Support -AdaptiveArrayPools.jl includes specialized support for `BitArray` (specifically `BitVector`), enabling **~8x memory savings** for boolean arrays compared to standard `Vector{Bool}`. +AdaptiveArrayPools.jl includes specialized support for `BitArray` (including `BitVector` and N-dimensional `BitArray{N}`), enabling **~8x memory savings** for boolean arrays compared to standard `Vector{Bool}`. ## The `Bit` Sentinel Type @@ -14,31 +14,34 @@ To distinguish between standard boolean arrays (`Vector{Bool}`, 1 byte/element) ## Usage ### 1D Arrays (BitVector) -For 1D arrays, `acquire!` returns a view into a pooled `BitVector`. +For 1D arrays, `acquire!` returns a native `BitVector`. This design choice enables full SIMD optimization, making operations significantly faster (10x~100x) than using views. ```julia @with_pool pool begin # Acquire a BitVector of length 1000 bv = acquire!(pool, Bit, 1000) - + # Use like normal bv .= true bv[1] = false - - # Supports standard operations + + # Supports standard operations with full SIMD acceleration count(bv) end ``` -### N-D Arrays (BitArray / Reshaped) -For multi-dimensional arrays, `acquire!` returns a `ReshapedArray` wrapper around the linear `BitVector`. This maintains zero-allocation efficiency while providing N-D indexing. +### N-D Arrays (BitArray) +For multi-dimensional arrays, `acquire!` returns a `BitArray{N}` (specifically `BitMatrix` for 2D). This preserves the packed memory layout and SIMD benefits while providing N-D indexing. ```julia @with_pool pool begin - # 100x100 bit matrix + # 100x100 bit matrix (returns BitMatrix) mask = zeros!(pool, Bit, 100, 100) - + mask[5, 5] = true + + # 3D BitArray + volume = acquire!(pool, Bit, 10, 10, 10) end ``` @@ -50,29 +53,66 @@ For specific `BitVector` operations, prefer `trues!` and `falses!` which mirror @with_pool pool begin # Filled with false (equivalent to `falses(256)`) mask = falses!(pool, 256) - + # Filled with true (equivalent to `trues(256)`) flags = trues!(pool, 256) - + # Multidimensional grid = trues!(pool, 100, 100) - + # Similar to existing BitArray A = BitVector(undef, 50) B = similar!(pool, A) # Reuses eltype(A) -> Bool - + # To explicit get Bit-packed from pool irrespective of source - C = similar!(pool, A, Bit) + C = similar!(pool, A, Bit) end +``` Note: `zeros!(pool, Bit, ...)` and `ones!(pool, Bit, ...)` are also supported (aliased to `falses!` and `trues!`). + +## Performance & Safety + +### Why Native BitArray? +The pool returns native `BitVector`/`BitArray` types instead of `SubArray` views for **performance**. +Operations like `count()`, `sum()`, and bitwise broadcasting are **10x~100x faster** on native bit arrays because they utilize SIMD instructions on packed 64-bit chunks. + +### N-D Caching & Zero Allocation + +The pool uses an N-way associative cache to efficiently reuse `BitArray{N}` instances: + +| Scenario | Allocation | +|----------|------------| +| First call with new dims | ~944 bytes (new `BitArray{N}` created) | +| Subsequent call with same dims | **0 bytes** (cached instance reused) | +| Same ndims, different dims | **0 bytes** (dims/len fields modified in-place) | +| Different ndims | ~944 bytes (new `BitArray{N}` created and cached) | + +Unlike regular `Array` where dimensions are immutable, `BitArray` allows in-place modification of its `dims` and `len` fields. The pool exploits this to achieve **zero allocation** on repeated calls with matching dimensionality. + +```julia +@with_pool pool begin + # First call: allocates BitMatrix wrapper (~944 bytes) + m1 = acquire!(pool, Bit, 100, 100) + + # Rewind to reuse the same slot + rewind!(pool) + + # Same dims: 0 allocation (exact cache hit) + m2 = acquire!(pool, Bit, 100, 100) + + rewind!(pool) + + # Different dims but same ndims: 0 allocation (dims modified in-place) + m3 = acquire!(pool, Bit, 50, 200) +end ``` -## How It Works +### ⚠️ Important: Do Not Resize + +While the returned arrays are standard `BitVector` types, they share their underlying memory chunks with the pool. -The pool maintains a separate `BitTypedPool` specifically for `BitVector` storage. -- **Sentinel**: `acquire!(..., Bit, ...)` dispatches to this special pool. -- **Views**: 1D returns `SubArray{Bool, 1, BitVector, ...}`. -- **Reshaping**: N-D returns `ReshapedArray{Bool, N, SubArray{...}}`. +!!! warning "Do Not Resize" + **NEVER** resize (`push!`, `pop!`, `resize!`) a pooled `BitVector` or `BitArray`. -This ensures that even for complex shapes, the underlying storage is always a compact `BitVector` reused from the pool. + The underlying memory is owned and managed by the pool. Resizing it will detach it from the pool or potentially corrupt the shared state. Treat these arrays as **fixed-size** scratch buffers only. diff --git a/src/AdaptiveArrayPools.jl b/src/AdaptiveArrayPools.jl index 61f691a..7092822 100644 --- a/src/AdaptiveArrayPools.jl +++ b/src/AdaptiveArrayPools.jl @@ -28,6 +28,9 @@ include("utils.jl") # Acquisition operations: get_view!, acquire!, unsafe_acquire!, aliases include("acquire.jl") +# BitArray-specific acquisition (SIMD-optimized BitVector operations) +include("bitarray.jl") + # Convenience functions: zeros!, ones!, similar! include("convenience.jl") diff --git a/src/acquire.jl b/src/acquire.jl index b8ddcf6..428738b 100644 --- a/src/acquire.jl +++ b/src/acquire.jl @@ -6,29 +6,12 @@ @inline allocate_vector(::AbstractTypedPool{T,Vector{T}}, n::Int) where {T} = Vector{T}(undef, n) -# BitTypedPool allocates BitVector (used when acquiring with Bit type) -@inline allocate_vector(::BitTypedPool, n::Int) = BitVector(undef, n) - -# Bit type returns Bool element type for fill operations (zero/one) -@inline Base.zero(::Type{Bit}) = false -@inline Base.one(::Type{Bit}) = true - # Wrap flat view into N-D array (dispatch point for extensions) @inline function wrap_array(::AbstractTypedPool{T,Vector{T}}, flat_view, dims::NTuple{N,Int}) where {T,N} unsafe_wrap(Array{T,N}, pointer(flat_view), dims) end -# BitTypedPool cannot use unsafe_wrap - throw clear error -# Called from _unsafe_acquire_impl! dispatches for Bit type -@noinline function _throw_bit_unsafe_error() - throw(ArgumentError( - "unsafe_acquire!(pool, Bit, ...) is not supported. " * - "BitArray stores data in immutable chunks::Vector{UInt64} that cannot be wrapped with unsafe_wrap. " * - "Use acquire!(pool, Bit, ...) instead, which returns a view." - )) -end - # ============================================================================== # Helper: Overflow-Safe Product # ============================================================================== @@ -245,11 +228,6 @@ end # Similar-style @inline _unsafe_acquire_impl!(pool::AbstractArrayPool, x::AbstractArray) = _unsafe_acquire_impl!(pool, eltype(x), size(x)) -# Bit type: unsafe_acquire! not supported (throw clear error early) -@inline _unsafe_acquire_impl!(::AbstractArrayPool, ::Type{Bit}, ::Int) = _throw_bit_unsafe_error() -@inline _unsafe_acquire_impl!(::AbstractArrayPool, ::Type{Bit}, ::Vararg{Int,N}) where {N} = _throw_bit_unsafe_error() -@inline _unsafe_acquire_impl!(::AbstractArrayPool, ::Type{Bit}, ::NTuple{N,Int}) where {N} = _throw_bit_unsafe_error() - # ============================================================================== # Acquisition API (User-facing with untracked marking) # ============================================================================== @@ -450,11 +428,6 @@ const _acquire_array_impl! = _unsafe_acquire_impl! @inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims) @inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x) -# --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) -@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) - # --- Generic DisabledPool fallbacks (unknown backend → error) --- @inline acquire!(::DisabledPool{B}, _args...) where {B} = _throw_backend_not_loaded(B) @inline unsafe_acquire!(::DisabledPool{B}, _args...) where {B} = _throw_backend_not_loaded(B) diff --git a/src/bitarray.jl b/src/bitarray.jl new file mode 100644 index 0000000..eb9addf --- /dev/null +++ b/src/bitarray.jl @@ -0,0 +1,220 @@ +# ============================================================================== +# BitArray Acquisition (N-D Cached BitArray API) +# ============================================================================== +# +# This file contains BitArray-specific pool operations, separated from the +# generic Array acquisition code in acquire.jl for maintainability. +# +# Key components: +# - Base.zero/one(::Type{Bit}) - Fill value dispatch for Bit sentinel type +# - get_bitarray! - N-D BitArray with shared chunks and N-way caching +# - _acquire_impl! for Bit - Delegates to _unsafe_acquire_impl! for performance +# - _unsafe_acquire_impl! for Bit - Raw BitArray acquisition with caching +# - DisabledPool fallbacks for Bit type +# +# Design Decision: Unified BitArray Return Type +# ============================================= +# Unlike regular types where acquire! returns SubArray and unsafe_acquire! +# returns Array, for Bit type BOTH return BitArray{N}. This design choice is +# intentional for several reasons: +# +# 1. **SIMD Performance**: BitArray operations like `count()`, `sum()`, and +# bitwise operations are ~(10x ~ 100x) faster than their SubArray equivalents +# because they use SIMD-optimized chunked algorithms. +# +# 2. **API Simplicity**: Users always get BitArray regardless of which API +# they call. No need to remember "use unsafe_acquire! for performance". +# +# 3. **N-D Caching**: BitArray{N} can be reused by modifying dims/len fields +# when ndims matches, achieving 0 allocation on repeated calls. This is +# unique to BitArray - regular Array cannot modify dims in place. +# +# 4. **Backwards Compatibility**: Code using trues!/falses! just works with +# optimal performance - these convenience functions return BitVector. +# +# Implementation: +# - _acquire_impl!(pool, Bit, ...) delegates to _unsafe_acquire_impl! +# - get_bitarray! creates BitArray shells sharing pool's chunks +# - N-way cache stores BitArray{N} entries, reused via dims modification +# ============================================================================== + +# ============================================================================== +# Fill Value Dispatch (BitArray-specific) +# ============================================================================== + +# Bit type returns Bool element type for fill operations (zero/one) +@inline Base.zero(::Type{Bit}) = false +@inline Base.one(::Type{Bit}) = true + +# ============================================================================== +# BitArray Acquisition (N-D caching with chunks sharing) +# ============================================================================== + +""" + get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) -> BitArray{N} + +Get a BitArray{N} that shares `chunks` with the pooled BitVector. + +Uses N-way cache for BitArray reuse. Unlike Array which requires unsafe_wrap +for each shape, BitArray can reuse cached entries by modifying `dims`/`len` +fields when ndims matches (0 bytes allocation). + +## Cache Strategy +- **Exact match**: Return cached BitArray directly (0 bytes) +- **Same ndims**: Modify dims/len/chunks of cached entry (0 bytes) +- **Different ndims**: Create new BitArray{N} and cache it (~944 bytes) + +## Implementation Notes +- BitVector (N=1): `size()` uses `len` field, `dims` is ignored +- BitArray{N>1}: `size()` uses `dims` field +- All BitArrays share `chunks` with the pool's backing BitVector + +## Safety +The returned BitArray is only valid within the `@with_pool` scope. +Do NOT use after the scope ends (use-after-free risk). +""" +function get_bitarray!(tp::BitTypedPool, dims::NTuple{N,Int}) where {N} + total_len = safe_prod(dims) + tp.n_active += 1 + idx = tp.n_active + + # 1. Pool expansion needed (new slot) + if idx > length(tp.vectors) + pool_bv = BitVector(undef, total_len) + push!(tp.vectors, pool_bv) + + # Create BitArray sharing chunks + ba = BitArray{N}(undef, dims) + ba.chunks = pool_bv.chunks + + # Expand N-way cache (CACHE_WAYS entries per slot) + for _ in 1:CACHE_WAYS + push!(tp.nd_arrays, nothing) + push!(tp.nd_dims, nothing) + push!(tp.nd_ptrs, UInt(0)) + end + push!(tp.nd_next_way, 0) + + # Cache in first way + base = (idx - 1) * CACHE_WAYS + 1 + @inbounds tp.nd_arrays[base] = ba + @inbounds tp.nd_dims[base] = dims + @inbounds tp.nd_ptrs[base] = UInt(pointer(pool_bv.chunks)) + + # Warn at powers of 2 (possible missing rewind!) + if idx >= 512 && (idx & (idx - 1)) == 0 + total_bytes = sum(_vector_bytes, tp.vectors) + @warn "BitTypedPool growing large ($idx arrays, ~$(Base.format_bytes(total_bytes))). Missing rewind!()?" + end + + return ba + end + + # 2. Ensure pool_bv has correct size + @inbounds pool_bv = tp.vectors[idx] + if length(pool_bv) != total_len + resize!(pool_bv, total_len) + end + current_ptr = UInt(pointer(pool_bv.chunks)) + base = (idx - 1) * CACHE_WAYS + + # 3. Check N-way cache for hit + for k in 1:CACHE_WAYS + cache_idx = base + k + @inbounds cached_dims = tp.nd_dims[cache_idx] + @inbounds cached_ptr = tp.nd_ptrs[cache_idx] + + # Must check isa FIRST for type stability (avoids boxing in == comparison) + if cached_dims isa NTuple{N,Int} && cached_ptr == current_ptr + if cached_dims == dims + # Exact match - return cached BitArray directly (0 alloc) + return @inbounds tp.nd_arrays[cache_idx]::BitArray{N} + else + # Same ndims but different dims - reuse by modifying fields (0 alloc!) + ba = @inbounds tp.nd_arrays[cache_idx]::BitArray{N} + ba.len = total_len + ba.dims = dims + ba.chunks = pool_bv.chunks + # Update cache metadata + @inbounds tp.nd_dims[cache_idx] = dims + return ba + end + end + end + + # 4. Cache miss - create new BitArray{N} + ba = BitArray{N}(undef, dims) + ba.chunks = pool_bv.chunks + + # Round-robin replacement + @inbounds way_offset = tp.nd_next_way[idx] + target_idx = base + way_offset + 1 + @inbounds tp.nd_arrays[target_idx] = ba + @inbounds tp.nd_dims[target_idx] = dims + @inbounds tp.nd_ptrs[target_idx] = current_ptr + @inbounds tp.nd_next_way[idx] = (way_offset + 1) % CACHE_WAYS + + return ba +end + +# Convenience: 1D case wraps to tuple +@inline get_bitarray!(tp::BitTypedPool, n::Int) = get_bitarray!(tp, (n,)) + +# ============================================================================== +# Acquire Implementation (Bit type → delegates to unsafe_acquire for performance) +# ============================================================================== +# +# Unlike other types where acquire! returns SubArray (view-based) and +# unsafe_acquire! returns Array (raw), Bit type always returns BitArray{N}. +# This is because BitArray's SIMD-optimized operations (count, sum, etc.) +# are ~(10x ~ 100x) faster than SubArray equivalents. +# +# The delegation is transparent: users calling acquire!(pool, Bit, dims...) get +# BitArray{N} without needing to know about unsafe_acquire!. + +# Bit type: delegates to _unsafe_acquire_impl! for SIMD performance +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, n::Int) + return _unsafe_acquire_impl!(pool, Bit, n) +end + +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} + return _unsafe_acquire_impl!(pool, Bit, dims...) +end + +@inline function _acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} + return _unsafe_acquire_impl!(pool, Bit, dims...) +end + +# ============================================================================== +# Unsafe Acquire Implementation (Bit type) +# ============================================================================== + +# Bit type: returns BitArray{N} with shared chunks (SIMD optimized, N-D cached) +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, n::Int) + tp = get_typed_pool!(pool, Bit)::BitTypedPool + return get_bitarray!(tp, n) +end + +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::Vararg{Int,N}) where {N} + tp = get_typed_pool!(pool, Bit)::BitTypedPool + return get_bitarray!(tp, dims) +end + +@inline function _unsafe_acquire_impl!(pool::AbstractArrayPool, ::Type{Bit}, dims::NTuple{N,Int}) where {N} + tp = get_typed_pool!(pool, Bit)::BitTypedPool + return get_bitarray!(tp, dims) +end + +# ============================================================================== +# DisabledPool Fallbacks (Bit type) +# ============================================================================== + +# --- acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) +@inline acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) + +# --- unsafe_acquire! for DisabledPool{:cpu} with Bit type (returns BitArray) --- +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, n::Int) = BitVector(undef, n) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::Vararg{Int,N}) where {N} = BitArray{N}(undef, dims) +@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{Bit}, dims::NTuple{N,Int}) where {N} = BitArray{N}(undef, dims) diff --git a/src/state.jl b/src/state.jl index 9cb09ab..c9b2a66 100644 --- a/src/state.jl +++ b/src/state.jl @@ -206,12 +206,34 @@ end # ============================================================================== """ - empty!(tp::AbstractTypedPool) + empty!(tp::BitTypedPool) -Clear all internal storage, releasing all memory. +Clear all internal storage for BitTypedPool, releasing all memory. Restores sentinel values for 1-based sentinel pattern. """ -function Base.empty!(tp::AbstractTypedPool) +function Base.empty!(tp::BitTypedPool) + empty!(tp.vectors) + # Clear N-way wrapper cache + empty!(tp.nd_arrays) + empty!(tp.nd_dims) + empty!(tp.nd_ptrs) + empty!(tp.nd_next_way) + tp.n_active = 0 + # Restore sentinel values (1-based sentinel pattern) + empty!(tp._checkpoint_n_active) + push!(tp._checkpoint_n_active, 0) # Sentinel: n_active=0 at depth=0 + empty!(tp._checkpoint_depths) + push!(tp._checkpoint_depths, 0) # Sentinel: depth=0 = no checkpoint + return tp +end + +""" + empty!(tp::TypedPool) + +Clear all internal storage for TypedPool, releasing all memory. +Restores sentinel values for 1-based sentinel pattern. +""" +function Base.empty!(tp::TypedPool) empty!(tp.vectors) empty!(tp.views) empty!(tp.view_lengths) diff --git a/src/types.jl b/src/types.jl index 2b1a070..0b6f62f 100644 --- a/src/types.jl +++ b/src/types.jl @@ -225,28 +225,45 @@ bit-packed arrays (1 bit per element vs 1 byte for `Vector{Bool}`). ## Usage ```julia @with_pool pool begin - # BitVector view (1 bit per element, ~8x memory savings) + # BitVector (1 bit per element, ~8x memory savings) bv = acquire!(pool, Bit, 1000) # vs Vector{Bool} (1 byte per element) vb = acquire!(pool, Bool, 1000) # Convenience functions work too - mask = zeros!(pool, Bit, 100) # BitVector filled with false - flags = ones!(pool, Bit, 100) # BitVector filled with true + mask = falses!(pool, 100) # BitVector filled with false + flags = trues!(pool, 100) # BitVector filled with true end ``` -## Return Types -- **1D**: `SubArray{Bool,1,BitVector,...}` -- **N-D**: `ReshapedArray{Bool,N,...}` (reshaped view of 1D BitVector) +## Return Types (Unified for Performance) +Unlike other types, `Bit` always returns native `BitVector`/`BitArray`: +- **1D**: `BitVector` (both `acquire!` and `unsafe_acquire!`) +- **N-D**: `BitArray{N}` (reshaped, preserves SIMD optimization) -## Limitation -`unsafe_acquire!(pool, Bit, ...)` is **not supported** because Julia's -`BitArray` stores data in immutable `chunks::Vector{UInt64}` that cannot -be wrapped with `unsafe_wrap`. +This design ensures users always get SIMD-optimized performance without +needing to remember which API to use. -See also: [`acquire!`](@ref), [`BitTypedPool`](@ref) +## Performance +`BitVector` operations like `count()`, `sum()`, and bitwise operations are +~(10x ~ 100x) faster than equivalent operations on `SubArray{Bool}` because they +use SIMD-optimized algorithms on packed 64-bit chunks. + +```julia +@with_pool pool begin + bv = acquire!(pool, Bit, 10000) + fill!(bv, true) + count(bv) # Uses fast SIMD path automatically +end +``` + +## Memory Safety +The returned `BitVector` shares its internal `chunks` array with the pool. +It is only valid within the `@with_pool` scope - using it after the scope +ends leads to undefined behavior (use-after-free risk). + +See also: [`trues!`](@ref), [`falses!`](@ref), [`BitTypedPool`](@ref) """ struct Bit end @@ -262,46 +279,58 @@ Specialized pool for `BitVector` arrays with memory reuse. Unlike `TypedPool{Bool}` which stores `Vector{Bool}` (1 byte per element), this pool stores `BitVector` (1 bit per element, ~8x memory efficiency). -## Important Limitation -**`unsafe_acquire!` is NOT supported for BitArray** because Julia's `BitArray` -stores data in a `chunks::Vector{UInt64}` field that cannot be wrapped with -`unsafe_wrap`. Only view-based acquisition via `acquire!(pool, Bit, ...)` is available. +## Unified API (Always Returns BitVector) +Unlike other types, both `acquire!` and `unsafe_acquire!` return `BitVector` +for the `Bit` type. This design ensures users always get SIMD-optimized +performance without needing to choose between APIs. + +- `acquire!(pool, Bit, n)` → `BitVector` (SIMD optimized) +- `unsafe_acquire!(pool, Bit, n)` → `BitVector` (same behavior) +- `trues!(pool, n)` → `BitVector` filled with `true` +- `falses!(pool, n)` → `BitVector` filled with `false` ## Fields - `vectors`: Backing `BitVector` storage -- `views`: Cached `SubArray` views for zero-allocation 1D access -- `view_lengths`: Cached lengths for fast comparison -- `nd_*`: Empty N-D cache fields (for `empty!` compatibility, unused) +- `nd_arrays`: Cached wrapper BitVectors (chunks sharing) +- `nd_dims`: Cached lengths for wrapper cache validation +- `nd_ptrs`: Cached chunk pointers for invalidation detection +- `nd_next_way`: Round-robin counter for N-way cache - `n_active`: Count of currently active arrays - `_checkpoint_*`: State management stacks (1-based sentinel pattern) ## Usage ```julia @with_pool pool begin - bv = acquire!(pool, Bit, 100) # SubArray{Bool,1,BitVector,...} - ba = acquire!(pool, Bit, 10, 10) # ReshapedArray{Bool,2,...} - t = trues!(pool, 50) # Filled with true - f = falses!(pool, 50) # Filled with false + # All return BitVector with SIMD performance + bv = acquire!(pool, Bit, 100) # BitVector + count(bv) # Fast SIMD path + + # Convenience functions + t = trues!(pool, 50) # BitVector filled with true + f = falses!(pool, 50) # BitVector filled with false end ``` -See also: [`trues!`](@ref), [`falses!`](@ref) +## Performance +Operations like `count()`, `sum()`, and bitwise operations are ~(10x ~ 100x) faster +than equivalent operations on `SubArray{Bool}` because `BitVector` uses +SIMD-optimized algorithms on packed 64-bit chunks. + +See also: [`trues!`](@ref), [`falses!`](@ref), [`Bit`](@ref) """ mutable struct BitTypedPool <: AbstractTypedPool{Bool, BitVector} # --- Storage --- vectors::Vector{BitVector} - # --- 1D Cache (1:1 mapping) --- - views::Vector{SubArray{Bool, 1, BitVector, Tuple{UnitRange{Int64}}, true}} - view_lengths::Vector{Int} - - # --- N-D Array Cache (empty, for empty! compatibility) --- - # BitArray cannot use unsafe_wrap, so no N-D caching is possible. - # These fields exist only for compatibility with empty!(::AbstractTypedPool). - nd_arrays::Vector{Any} - nd_dims::Vector{Any} - nd_ptrs::Vector{UInt} - nd_next_way::Vector{Int} + # --- N-D BitArray Cache (N-way set associative) --- + # Unlike TypedPool which uses views for 1D and nd_* for N-D, + # BitTypedPool uses nd_* for ALL dimensions (1D, 2D, 3D, etc.). + # No views needed since we always return BitArray{N}, not SubArray. + # BitArray.dims is mutable, enabling 0-alloc reuse for same-ndims requests. + nd_arrays::Vector{Any} # Cached BitArray{N} instances + nd_dims::Vector{Any} # Cached dims (NTuple{N,Int}) + nd_ptrs::Vector{UInt} # pointer validation + nd_next_way::Vector{Int} # round-robin counter per slot # --- State Management (1-based sentinel pattern) --- n_active::Int @@ -312,10 +341,7 @@ end BitTypedPool() = BitTypedPool( # Storage BitVector[], - # 1D Cache - SubArray{Bool, 1, BitVector, Tuple{UnitRange{Int64}}, true}[], - Int[], - # N-D Array Cache (empty, for compatibility) + # 1D BitVector Wrapper Cache (N-way) Any[], Any[], UInt[], diff --git a/src/utils.jl b/src/utils.jl index 5950744..f252aaa 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -79,14 +79,24 @@ _validate_pool_return(val, ::DisabledPool) = nothing # Statistics & Pretty Printing # ============================================================================== +# --- Helper functions for pool_stats (type-specific behavior) --- +_default_type_name(::TypedPool{T}) where {T} = string(T) +_default_type_name(::BitTypedPool) = "Bit" + +_vector_bytes(v::Vector) = Base.summarysize(v) +_vector_bytes(v::BitVector) = sizeof(v.chunks) + +_count_label(::TypedPool) = "elements" +_count_label(::BitTypedPool) = "bits" + """ - pool_stats(tp::TypedPool{T}; io::IO=stdout, indent::Int=0, name::String="") + pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::String="") -Print statistics for a single TypedPool. +Print statistics for a TypedPool or BitTypedPool. """ -function pool_stats(tp::TypedPool{T}; io::IO=stdout, indent::Int=0, name::String="") where {T} +function pool_stats(tp::AbstractTypedPool; io::IO=stdout, indent::Int=0, name::String="") prefix = " "^indent - type_name = isempty(name) ? string(T) : name + type_name = isempty(name) ? _default_type_name(tp) : name n_arrays = length(tp.vectors) if n_arrays == 0 @@ -95,8 +105,8 @@ function pool_stats(tp::TypedPool{T}; io::IO=stdout, indent::Int=0, name::String return end - total_elements = sum(length(v) for v in tp.vectors) - total_bytes = sum(Base.summarysize(v) for v in tp.vectors) + total_count = sum(length(v) for v in tp.vectors) + total_bytes = sum(_vector_bytes(v) for v in tp.vectors) bytes_str = Base.format_bytes(total_bytes) # Header @@ -110,8 +120,8 @@ function pool_stats(tp::TypedPool{T}; io::IO=stdout, indent::Int=0, name::String printstyled(io, tp.n_active, color=:blue) printstyled(io, ")\n", color=:dark_gray) - printstyled(io, prefix, " elements: ", color=:dark_gray) - printstyled(io, total_elements, color=:blue) + printstyled(io, prefix, " ", _count_label(tp), ": ", color=:dark_gray) + printstyled(io, total_count, color=:blue) printstyled(io, " ($bytes_str)\n", color=:dark_gray) return nothing end @@ -141,8 +151,8 @@ function pool_stats(pool::AdaptiveArrayPool; io::IO=stdout) foreach_fixed_slot(pool) do tp if !isempty(tp.vectors) has_content = true - T = typeof(tp).parameters[1] # Extract T from TypedPool{T} - pool_stats(tp; io, indent=2, name="$T (fixed)") + name = _default_type_name(tp) * " (fixed)" + pool_stats(tp; io, indent=2, name) end end @@ -175,10 +185,7 @@ function pool_stats(; io::IO=stdout) pool_stats(:cpu; io) # Show CUDA pools if extension is loaded and pools exist try - pools = get_task_local_cuda_pools() - for pool in values(pools) - pool_stats(pool; io) - end + pool_stats(Val(:cuda); io) catch e e isa MethodError || rethrow() # CUDA extension not loaded - silently skip @@ -212,20 +219,26 @@ end # Base.show (delegates to pool_stats) # ============================================================================== -# Compact one-line show for TypedPool -function Base.show(io::IO, tp::TypedPool{T}) where {T} +# --- Helper for Base.show (full type name for display) --- +_show_type_name(::TypedPool{T}) where {T} = "TypedPool{$T}" +_show_type_name(::BitTypedPool) = "BitTypedPool" + +# Compact one-line show for all AbstractTypedPool +function Base.show(io::IO, tp::AbstractTypedPool) + name = _show_type_name(tp) n_vectors = length(tp.vectors) if n_vectors == 0 - print(io, "TypedPool{$T}(empty)") + print(io, "$name(empty)") else total = sum(length(v) for v in tp.vectors) - print(io, "TypedPool{$T}(slots=$n_vectors, active=$(tp.n_active), elements=$total)") + label = _count_label(tp) + print(io, "$name(slots=$n_vectors, active=$(tp.n_active), $label=$total)") end end -# Multi-line show for TypedPool -function Base.show(io::IO, ::MIME"text/plain", tp::TypedPool{T}) where {T} - pool_stats(tp; io, name="TypedPool{$T}") +# Multi-line show for all AbstractTypedPool +function Base.show(io::IO, ::MIME"text/plain", tp::AbstractTypedPool) + pool_stats(tp; io, name=_show_type_name(tp)) end # Compact one-line show for AdaptiveArrayPool diff --git a/test/runtests.jl b/test/runtests.jl index c4417de..2525187 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ else include("test_convenience.jl") include("test_bitarray.jl") include("test_coverage.jl") + include("test_allocation.jl") # CUDA extension tests (auto-detect, skip with TEST_CUDA=false) if get(ENV, "TEST_CUDA", "true") != "false" diff --git a/test/test_allocation.jl b/test/test_allocation.jl new file mode 100644 index 0000000..270fb8f --- /dev/null +++ b/test/test_allocation.jl @@ -0,0 +1,30 @@ +@with_pool pool function foo() + float64_vec = acquire!(pool, Float64, 10) + float32_vec = acquire!(pool, Float32, 10) + + float64_mat = acquire!(pool, Float64, 10, 10) + float32_mat = acquire!(pool, Float32, 10, 10) + + bv = acquire!(pool, Bit, 100) + ba2 = acquire!(pool, Bit, 10, 10) + ba3 = acquire!(pool, Bit, 5, 5, 4) + + tt1 = trues!(pool, 256) + tt2 = ones!(pool, Bit, 10, 20) + ff1 = falses!(pool, 100, 5) + ff2 = zeros!(pool, Bit, 100) + + C = similar!(pool, tt1) +end + + +@testset "zero allocation on reuse" begin + + alloc1 = @allocated foo() + alloc2 = @allocated foo() + alloc3 = @allocated foo() + + @test alloc1 > 0 # First call allocates + @test alloc2 == 0 # Subsequent calls reuse cached arrays + @test alloc3 == 0 # Further calls also zero allocation +end \ No newline at end of file diff --git a/test/test_bitarray.jl b/test/test_bitarray.jl index a5dbeca..12a2e5b 100644 --- a/test/test_bitarray.jl +++ b/test/test_bitarray.jl @@ -26,13 +26,14 @@ @test isempty(pool.bits.vectors) end - @testset "acquire!(pool, Bit, n) - 1D" begin + @testset "acquire!(pool, Bit, n) - 1D (returns BitVector for SIMD performance)" begin pool = AdaptiveArrayPool() bv = acquire!(pool, Bit, 100) @test length(bv) == 100 @test eltype(bv) == Bool - @test bv isa SubArray{Bool, 1, BitVector} + # Returns BitVector (not SubArray) for SIMD-optimized operations + @test bv isa BitVector @test pool.bits.n_active == 1 # Write and read back @@ -45,6 +46,7 @@ # Second acquire bv2 = acquire!(pool, Bit, 50) @test length(bv2) == 50 + @test bv2 isa BitVector @test pool.bits.n_active == 2 # Independent values @@ -53,14 +55,15 @@ @test count(bv) == 99 # bv unchanged end - @testset "acquire!(pool, Bit, dims...) - N-D" begin + @testset "acquire!(pool, Bit, dims...) - N-D (returns BitArray for SIMD performance)" begin pool = AdaptiveArrayPool() - # 2D + # 2D - returns BitMatrix (Julia's reshape(BitVector, dims) returns BitArray) ba2 = acquire!(pool, Bit, 10, 10) @test size(ba2) == (10, 10) @test eltype(ba2) == Bool - @test ba2 isa Base.ReshapedArray + # Note: reshape(BitVector, dims) returns BitArray{N}, not ReshapedArray + @test ba2 isa BitMatrix @test pool.bits.n_active == 1 # Test indexing @@ -75,108 +78,126 @@ # 3D ba3 = acquire!(pool, Bit, 4, 5, 3) @test size(ba3) == (4, 5, 3) + @test ba3 isa BitArray{3} @test pool.bits.n_active == 2 # Tuple form ba_tuple = acquire!(pool, Bit, (3, 4, 2)) @test size(ba_tuple) == (3, 4, 2) + @test ba_tuple isa BitArray{3} @test pool.bits.n_active == 3 end - @testset "ones!(pool, Bit, dims...) - filled with true" begin + @testset "ones!(pool, Bit, dims...) - BitVector filled with true" begin pool = AdaptiveArrayPool() - # 1D + # 1D - returns BitVector t1 = ones!(pool, Bit, 100) @test length(t1) == 100 @test all(t1) + @test t1 isa BitVector @test pool.bits.n_active == 1 - # 2D + # 2D - returns BitMatrix (reshape of BitVector) t2 = ones!(pool, Bit, 10, 10) @test size(t2) == (10, 10) @test all(t2) @test count(t2) == 100 + @test t2 isa BitMatrix # Tuple form t3 = ones!(pool, Bit, (5, 5, 4)) @test size(t3) == (5, 5, 4) @test all(t3) + @test t3 isa BitArray{3} end - @testset "zeros!(pool, Bit, dims...) - filled with false" begin + @testset "zeros!(pool, Bit, dims...) - BitVector filled with false" begin pool = AdaptiveArrayPool() - # 1D + # 1D - returns BitVector f1 = zeros!(pool, Bit, 100) @test length(f1) == 100 @test !any(f1) + @test f1 isa BitVector @test pool.bits.n_active == 1 - # 2D + # 2D - returns BitMatrix (reshape of BitVector) f2 = zeros!(pool, Bit, 10, 10) @test size(f2) == (10, 10) @test !any(f2) @test count(f2) == 0 + @test f2 isa BitMatrix # Tuple form f3 = zeros!(pool, Bit, (5, 5, 4)) @test size(f3) == (5, 5, 4) @test !any(f3) + @test f3 isa BitArray{3} end - @testset "trues!(pool, dims...) - convenience for BitArray filled with true" begin + @testset "trues!(pool, dims...) - BitVector filled with true (SIMD optimized)" begin pool = AdaptiveArrayPool() - # 1D + # 1D - returns BitVector t1 = trues!(pool, 100) @test length(t1) == 100 @test all(t1) @test eltype(t1) == Bool + @test t1 isa BitVector @test pool.bits.n_active == 1 - # 2D + # 2D - returns BitMatrix (reshape of BitVector) t2 = trues!(pool, 10, 10) @test size(t2) == (10, 10) @test all(t2) @test count(t2) == 100 + @test t2 isa BitMatrix # Tuple form t3 = trues!(pool, (5, 5, 4)) @test size(t3) == (5, 5, 4) @test all(t3) + @test t3 isa BitArray{3} # Equivalent to ones!(pool, Bit, ...) t4 = trues!(pool, 50) t5 = ones!(pool, Bit, 50) @test all(t4 .== t5) + @test t4 isa BitVector + @test t5 isa BitVector end - @testset "falses!(pool, dims...) - convenience for BitArray filled with false" begin + @testset "falses!(pool, dims...) - BitVector filled with false (SIMD optimized)" begin pool = AdaptiveArrayPool() - # 1D + # 1D - returns BitVector f1 = falses!(pool, 100) @test length(f1) == 100 @test !any(f1) @test eltype(f1) == Bool + @test f1 isa BitVector @test pool.bits.n_active == 1 - # 2D + # 2D - returns BitMatrix (reshape of BitVector) f2 = falses!(pool, 10, 10) @test size(f2) == (10, 10) @test !any(f2) @test count(f2) == 0 + @test f2 isa BitMatrix # Tuple form f3 = falses!(pool, (5, 5, 4)) @test size(f3) == (5, 5, 4) @test !any(f3) + @test f3 isa BitArray{3} # Equivalent to zeros!(pool, Bit, ...) f4 = falses!(pool, 50) f5 = zeros!(pool, Bit, 50) @test all(f4 .== f5) + @test f4 isa BitVector + @test f5 isa BitVector end @testset "State management" begin @@ -237,7 +258,7 @@ end @testset "DisabledPool fallback" begin - # acquire! with Bit + # --- acquire! with Bit --- bv = acquire!(DISABLED_CPU, Bit, 100) @test bv isa BitVector @test length(bv) == 100 @@ -252,6 +273,21 @@ @test ba_tuple isa BitArray{2} @test size(ba_tuple) == (5, 5) + # --- unsafe_acquire! with Bit (covers bitarray.jl:206-208) --- + ubv = unsafe_acquire!(DISABLED_CPU, Bit, 100) + @test ubv isa BitVector + @test length(ubv) == 100 + + # N-D + uba = unsafe_acquire!(DISABLED_CPU, Bit, 10, 10) + @test uba isa BitArray{2} + @test size(uba) == (10, 10) + + # Tuple form + uba_tuple = unsafe_acquire!(DISABLED_CPU, Bit, (5, 5)) + @test uba_tuple isa BitArray{2} + @test size(uba_tuple) == (5, 5) + # ones! with Bit (like trues) t = ones!(DISABLED_CPU, Bit, 50) @test t isa BitVector @@ -405,14 +441,14 @@ @testset "Mixed Bool types" begin pool = AdaptiveArrayPool() - # Vector{Bool} via acquire! with Bool + # Vector{Bool} via acquire! with Bool - returns SubArray (view) vb = acquire!(pool, Bool, 100) @test vb isa SubArray{Bool, 1, Vector{Bool}} @test pool.bool.n_active == 1 - # BitVector via acquire! with Bit + # BitVector via acquire! with Bit - returns BitVector (for SIMD) bv = acquire!(pool, Bit, 100) - @test bv isa SubArray{Bool, 1, BitVector} + @test bv isa BitVector # Note: Bit returns BitVector, not SubArray @test pool.bits.n_active == 1 # Both should work independently @@ -445,24 +481,60 @@ @test outer_result == (100, 0) end - @testset "unsafe_acquire! not supported" begin + @testset "unsafe_acquire! returns BitVector with shared chunks" begin pool = AdaptiveArrayPool() - # unsafe_acquire! with Bit should throw a clear error - @test_throws ArgumentError unsafe_acquire!(pool, Bit, 100) - @test_throws ArgumentError unsafe_acquire!(pool, Bit, 10, 10) + # unsafe_acquire! with Bit returns a real BitVector (not SubArray) + bv = unsafe_acquire!(pool, Bit, 100) + @test bv isa BitVector + @test length(bv) == 100 - # Tuple form (covers acquire.jl:251) - @test_throws ArgumentError unsafe_acquire!(pool, Bit, (10, 10)) + # N-D returns BitArray (reshape of BitVector becomes BitArray in Julia) + ba = unsafe_acquire!(pool, Bit, 10, 10) + @test ba isa BitMatrix # reshape(BitVector, dims) → BitArray + @test size(ba) == (10, 10) - # Verify the error message is helpful - try - unsafe_acquire!(pool, Bit, 100) - catch e - @test e isa ArgumentError - @test occursin("unsafe_acquire!", e.msg) - @test occursin("Bit", e.msg) - @test occursin("acquire!", e.msg) # Suggests alternative + # Tuple form + ba_tuple = unsafe_acquire!(pool, Bit, (10, 10)) + @test ba_tuple isa BitMatrix + @test size(ba_tuple) == (10, 10) + + # Verify chunks sharing (key feature!) + @with_pool pool2 begin + bv2 = unsafe_acquire!(pool2, Bit, 100) + pool_bv = pool2.bits.vectors[1] + @test bv2.chunks === pool_bv.chunks # Same chunks object! + + # Verify data is shared + bv2[1] = true + @test pool_bv[1] == true + bv2[1] = false + @test pool_bv[1] == false + end + end + + @testset "Unified BitVector API - both acquire! and unsafe_acquire! return BitVector" begin + # Both acquire! and unsafe_acquire! return BitVector for Bit type + # This is a deliberate design choice for SIMD performance + pool = AdaptiveArrayPool() + + @with_pool pool begin + n = 10000 + + # unsafe_acquire! returns BitVector + bv_unsafe = unsafe_acquire!(pool, Bit, n) + fill!(bv_unsafe, true) + @test count(bv_unsafe) == n + @test bv_unsafe isa BitVector + + # acquire! ALSO returns BitVector (not SubArray) + bv_acquire = acquire!(pool, Bit, n) + fill!(bv_acquire, true) + @test count(bv_acquire) == n + @test bv_acquire isa BitVector # Same type as unsafe_acquire! + + # Both benefit from SIMD-optimized count() + # (No performance difference since both return BitVector) end end @@ -481,6 +553,12 @@ @test eltype(v_bool) == Bool @test eltype(v_bit) == Bool + # Note: acquire! returns SubArray for most types, but BitVector for Bit + @test v_f64 isa SubArray + @test v_i32 isa SubArray + @test v_bool isa SubArray + @test v_bit isa BitVector # Special case for SIMD performance + # zeros!/ones! work consistently z_f64 = zeros!(pool, Float64, 10) z_bit = zeros!(pool, Bit, 10) @@ -491,29 +569,37 @@ @test !any(z_bit) @test all(o_f64 .== 1.0) @test all(o_bit) + + # Type consistency for convenience functions + @test z_bit isa BitVector + @test o_bit isa BitVector end - @testset "NTuple form coverage" begin + @testset "NTuple form coverage (all return BitArray types)" begin pool = AdaptiveArrayPool() # Test NTuple forms for trues!/falses! (covers _trues_impl! and _falses_impl! NTuple overloads) t_tuple = trues!(pool, (5, 5)) @test size(t_tuple) == (5, 5) @test all(t_tuple) + @test t_tuple isa BitMatrix f_tuple = falses!(pool, (5, 5)) @test size(f_tuple) == (5, 5) @test !any(f_tuple) + @test f_tuple isa BitMatrix # Test NTuple forms for zeros!/ones! with Bit type # (covers _zeros_impl! and _ones_impl! with Bit NTuple overloads) z_bit_tuple = zeros!(pool, Bit, (4, 4)) @test size(z_bit_tuple) == (4, 4) @test !any(z_bit_tuple) + @test z_bit_tuple isa BitMatrix o_bit_tuple = ones!(pool, Bit, (4, 4)) @test size(o_bit_tuple) == (4, 4) @test all(o_bit_tuple) + @test o_bit_tuple isa BitMatrix end @testset "Generic DisabledPool fallback for unknown backend" begin @@ -564,10 +650,42 @@ z = AdaptiveArrayPools._zeros_impl!(pool, Bit, (3, 3)) @test size(z) == (3, 3) @test !any(z) + @test z isa BitMatrix o = AdaptiveArrayPools._ones_impl!(pool, Bit, (3, 3)) @test size(o) == (3, 3) @test all(o) + @test o isa BitMatrix + + # Test _acquire_impl! returns BitVector (not SubArray) + bv = AdaptiveArrayPools._acquire_impl!(pool, Bit, 100) + @test bv isa BitVector + @test length(bv) == 100 + + bv = AdaptiveArrayPools._acquire_impl!(pool, Bit, (10, 10)) + @test bv isa BitMatrix + @test size(bv) == (10, 10) + end + @testset "BitTypedPool growth warning at 512 arrays" begin + # Use a fresh pool to ensure we start from 0 + pool = AdaptiveArrayPool() + + @test pooling_enabled(pool) == true + + # Acquire 511 arrays without rewind - no warning yet + for i in 1:511 + acquire!(pool, Bit, 10) + end + @test pool.bits.n_active == 511 + + # The 512th acquire should trigger a warning + @test_logs (:warn, r"BitTypedPool growing large \(512 arrays") begin + acquire!(pool, Bit, 10) + end + @test pool.bits.n_active == 512 + + # Clean up + empty!(pool) end end # BitArray Support diff --git a/test/test_utils.jl b/test/test_utils.jl index ddbde0c..4efd0d2 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -196,8 +196,8 @@ end rewind!(pool) end - @testset "Base.show for TypedPool" begin - import AdaptiveArrayPools: TypedPool + @testset "Base.show for TypedPool & BitTypedPool" begin + import AdaptiveArrayPools: TypedPool, BitTypedPool # Empty TypedPool - compact show tp_empty = TypedPool{Float64}() @@ -210,6 +210,8 @@ end acquire!(pool, Float64, 100) acquire!(pool, Float64, 50) + acquire!(pool, Bit, 10) + output = sprint(show, pool.float64) @test occursin("TypedPool{Float64}", output) @test occursin("slots=2", output) @@ -222,6 +224,16 @@ end @test occursin("slots:", output) @test occursin("active:", output) + # BitTypedPool - compact show + output = sprint(show, pool.bits) + @test output == "BitTypedPool(slots=1, active=1, bits=10)" + # Multi-line show (MIME"text/plain") + output = sprint(show, MIME("text/plain"), pool.bits) + @test occursin("BitTypedPool", output) + @test occursin("slots:", output) + @test occursin("active:", output) + @test occursin("bits:", output) + rewind!(pool) end @@ -266,6 +278,54 @@ end @test occursin("empty", output) end + @testset "pool_stats for BitTypedPool" begin + import AdaptiveArrayPools: BitTypedPool + + # Empty BitTypedPool + btp = BitTypedPool() + output = @capture_out pool_stats(btp) + @test occursin("Bit", output) + @test occursin("empty", output) + + # BitTypedPool with content (via AdaptiveArrayPool) + pool = AdaptiveArrayPool() + checkpoint!(pool) + + # Acquire some BitVectors + bv1 = acquire!(pool, Bit, 100) + bv2 = acquire!(pool, Bit, 200) + + output = @capture_out pool_stats(pool) + @test occursin("Bit (fixed)", output) + @test occursin("slots: 2", output) + @test occursin("active: 2", output) + @test occursin("bits:", output) # BitTypedPool uses "bits" label, not "elements" + @test occursin("300", output) # Total bits: 100 + 200 + + rewind!(pool) + + # Test direct BitTypedPool stats + btp2 = BitTypedPool() + # Manually add vectors for testing + push!(btp2.vectors, BitVector(undef, 64)) + btp2.n_active = 1 + + output = @capture_out pool_stats(btp2) + @test occursin("Bit", output) + @test occursin("slots: 1", output) + @test occursin("bits: 64", output) + end + + @testset "direct call of internal helpers" begin + import AdaptiveArrayPools: _default_type_name, _vector_bytes, _count_label, TypedPool, BitTypedPool + @test _default_type_name(TypedPool{Float64}()) == "Float64" + @test _default_type_name(BitTypedPool()) == "Bit" + @test _vector_bytes([1, 2, 3]) == Base.summarysize([1, 2, 3]) + @test _vector_bytes(BitVector(undef, 100)) == sizeof(BitVector(undef, 100).chunks) + @test _count_label(TypedPool{Int}()) == "elements" + @test _count_label(BitTypedPool()) == "bits" + end + @testset "_validate_pool_return with N-D arrays" begin pool = AdaptiveArrayPool() checkpoint!(pool)