FixedSizeArrays.jl

What Array probably should have been

Mosè Giordano

UCL

Oscar Smith

JuliaHub

Neven Sajko

2025-07-25

QR code to see this presentation

Introduction

Motivation

Implementation

struct FixedSizeArray{T,N,Mem<:DenseVector{T}} <: DenseArray{T,N}
    mem::Mem
    size::NTuple{N,Int}
    global function new_fixed_size_array(mem::DenseVector{T}, size::NTuple{N,Int}) where {T,N}
        new{T,N,typeof(mem)}(mem, size)
    end
end

Comparison with other array types

Size set at… Data backend Growable Mutable elements
Base.Array runtime Memory
FixedSizeArray runtime Memory
MArray compile time Tuple
SArray compile time Tuple

Show off

Fewer allocations

using FixedSizeArrays, BenchmarkTools

for n in (0, 2^0, 2^5, 2^10)
    @info "n = $(n)"
    @btime fill!(Vector{Float64}(undef, $(n)), 0.0)
    @btime fill!(FixedSizeVectorDefault{Float64}(undef, $(n)), 0.0)
end
[ Info: n = 0
  7.752 ns (1 allocation: 32 bytes)
  3.716 ns (0 allocations: 0 bytes)
[ Info: n = 1
  15.289 ns (2 allocations: 64 bytes)
  11.472 ns (1 allocation: 32 bytes)
[ Info: n = 32
  19.104 ns (2 allocations: 320 bytes)
  13.071 ns (1 allocation: 288 bytes)
[ Info: n = 1024
  427.075 ns (3 allocations: 8.07 KiB)
  386.203 ns (2 allocations: 8.04 KiB)

Constprop of length

using FixedSizeArrays

@noinline get_len(A::AbstractArray) = length(A)

code_llvm(() -> get_len(Vector{Float64}(undef, 3)), (); debuginfo=:none)
; Function Signature: var"#2"()
define i64 @"julia_#2_6477"() local_unnamed_addr #0 {
top:
  %gcframe1 = alloca [3 x ptr], align 16
  call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
  %thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #11
  %tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
  %tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
  store i64 4, ptr %gcframe1, align 8
  %frame.prev = getelementptr inbounds nuw i8, ptr %gcframe1, i64 8
  %task.gcstack = load ptr, ptr %tls_pgcstack, align 8
  store ptr %task.gcstack, ptr %frame.prev, align 8
  store ptr %gcframe1, ptr %tls_pgcstack, align 8
  %ptls_field = getelementptr inbounds nuw i8, ptr %tls_pgcstack, i64 16
  %ptls_load = load ptr, ptr %ptls_field, align 8
  %"Memory{Float64}[]" = call noalias nonnull align 8 dereferenceable(48) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 456, i32 48, i64 140671111092640) #7
  %"Memory{Float64}[].tag_addr" = getelementptr inbounds i8, ptr %"Memory{Float64}[]", i64 -8
  store atomic i64 140671111092640, ptr %"Memory{Float64}[].tag_addr" unordered, align 8
  %memory_ptr = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 8
  %memory_data = getelementptr inbounds nuw i8, ptr %"Memory{Float64}[]", i64 16
  store ptr %memory_data, ptr %memory_ptr, align 8
  store i64 3, ptr %"Memory{Float64}[]", align 8
  %gc_slot_addr_0 = getelementptr inbounds nuw i8, ptr %gcframe1, i64 16
  store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 8
  %ptls_load11 = load ptr, ptr %ptls_field, align 8
  %"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load11, i32 408, i32 32, i64 140671054949728) #7
  %"new::Array.tag_addr" = getelementptr inbounds i8, ptr %"new::Array", i64 -8
  store atomic i64 140671054949728, ptr %"new::Array.tag_addr" unordered, align 8
  %0 = getelementptr inbounds nuw i8, ptr %"new::Array", i64 8
  store ptr %memory_data, ptr %"new::Array", align 8
  store ptr %"Memory{Float64}[]", ptr %0, align 8
  %"new::Array.size_ptr" = getelementptr inbounds nuw i8, ptr %"new::Array", i64 16
  store i64 3, ptr %"new::Array.size_ptr", align 8
  store ptr %"new::Array", ptr %gc_slot_addr_0, align 8
  %1 = call i64 @j_get_len_6481(ptr nonnull %"new::Array")
  %frame.prev14 = load ptr, ptr %frame.prev, align 8
  store ptr %frame.prev14, ptr %tls_pgcstack, align 8
  ret i64 %1
}

Constprop of length (cont.)

code_llvm(() -> get_len(FixedSizeVector{Float64}(undef, 3)), (); debuginfo=:none)
; Function Signature: var"#5"()
define i64 @"julia_#5_6545"() local_unnamed_addr #0 {
top:
  ret i64 3
}

Better effects inference

using FixedSizeArrays

@noinline reshape_2_2(v::AbstractVector{Float64}) = reshape(v, 2, 2)

Base.infer_effects(() -> reshape_2_2(Vector{Float64}(undef, 3)), ())
(?c,+e,!n,+t,+s,+m,+u,+o,+r)
Base.infer_effects(() -> reshape_2_2(FixedSizeVector{Float64}(undef, 3)), ())
(+c,+e,!n,+t,+s,+m,+u,+o,+r)
Base.infer_effects(() -> reshape_2_2(Vector{Float64}(undef, 4)), ())
(?c,+e,!n,+t,+s,+m,+u,+o,+r)
Base.infer_effects(() -> reshape_2_2(FixedSizeVector{Float64}(undef, 4)), ())
(?c,+e,+n,+t,+s,+m,+u,+o,+r)

Eliding memory allocations

using FixedSizeArrays, Random

function g(T)
    v = T(undef, 250)
    rand!(v)
    return foldl(+, v)
end

@info "Vector" allocations=@allocated g(Vector{Float64})
@info "FixedSizeVector" allocations=@allocated g(FixedSizeVectorDefault{Float64})
Info: Vector
  allocations = 2064
Info: FixedSizeVector
  allocations = 0

Faster gradient with Enzyme

julia> using BenchmarkTools, FixedSizeArrays, Enzyme

julia> function g(x::AbstractVector{T}, y::AbstractVector{T}) where {T}
           sum = zero(float(T))
           for idx in eachindex(x, y)
               sum += sin(x[idx]) ^ 2 + cos(y[idx]) ^ 2
           end
           return sum
       end
g (generic function with 1 method)

julia> @btime gradient(Forward, g, x, y) setup=(x = randn(3); y = randn(3));
  753.210 ns (28 allocations: 1.09 KiB)

julia> @btime gradient(Forward, g, x, y) setup=(x = FixedSizeArray(randn(3)); y = FixedSizeArray(randn(3)));
  487.737 ns (20 allocations: 880 bytes)

Conclusions

  • Future improvements…