Skip to content

Do we want fixed-size arrays? #5857

@timholy

Description

@timholy

One of the fun things you can do with Cartesian is easily generate unrolled matrix multiplication algorithms. This might be particularly useful for a fixed-size array type (see also ImmutableArrays; the major difference is that ImmutableArrays generates types called Matrix2x3 etc, whereas here the size is encoded as a parameter).

Here's a short proof-of-concept demo (which takes many shortcuts in the interest of brevity):

module FixedArrays

using Base.Cartesian
import Base: A_mul_B!

type FixedArray{T,N,SZ,L} <: DenseArray{T,N}
    data::NTuple{L,T}
end

function fixedvector{T}(::Type{T}, len::Integer, data)
    length(data) == len || error("The vector length $len is inconsistent with $(length(data)) elements")
    FixedArray{T,1,(int(len),),int(len)}(ntuple(length(x), i->convert(T, data[i])))  # needs to be faster than this
end
fixedvector(data) = fixedvector(typeof(data[1]), length(data), data)

function fixedmatrix{T}(::Type{T}, cols::Integer, rows::Integer, data)
    rows*cols == length(data) || error("The number $(length(data)) of elements supplied is not equal to $(cols*rows)")
    FixedArray{T,2,(int(cols),int(rows)),int(rows)*int(cols)}(ntuple(length(data), i->convert(T, data[i])))
end
fixedmatrix(data::AbstractMatrix) = fixedmatrix(eltype(data), size(data,1), size(data,2), data)

# For now, output is an array to bypass the slow FixedArray constructor
for N=1:4, K=1:4, M=1:4
    @eval begin
        function A_mul_B_FA!{TA,TB}(dest::AbstractMatrix, A::FixedArray{TA,2,($M,$K)}, B::FixedArray{TB,2,($K,$N)})
            TP = typeof(one(TA)*one(TB) + one(TA)*one(TB))
            @nexprs $K d2->(@nexprs $M d1->A_d1_d2 = A.data[(d2-1)*$M+d1])
            @nexprs $N d2->(@nexprs $K d1->B_d1_d2 = B.data[(d2-1)*$K+d1])
            @nexprs $N n->(@nexprs $M m->begin
                tmp = zero(TP)
                @nexprs $K k->(tmp += A_m_k * B_k_n)
                dest[m,n] = tmp
            end)
            dest
        end
    end
end

end

and the results in a session:

julia> AA = rand(2,2)
2x2 Array{Float64,2}:
 0.73037   0.525699
 0.581208  0.973336

julia> BB = rand(2,3)
2x3 Array{Float64,2}:
 0.527632  0.0111126   0.701888
 0.840294  0.00627326  0.450177

julia> include("FixedArrays.jl")

julia> A = FixedArrays.fixedmatrix(AA);

julia> B = FixedArrays.fixedmatrix(BB);

julia> C = Array(Float64, 2, 3);

julia> FixedArrays.A_mul_B_FA!(C, A, B)
2x3 Array{Float64,2}:
 0.827108  0.0114142  0.749295
 1.12455   0.0125647  0.846116

julia> AA*BB
2x3 Array{Float64,2}:
 0.827108  0.0114142  0.749295
 1.12455   0.0125647  0.846116

julia> @time (for i = 1:10^5; FixedArrays.A_mul_B_FA!(C, A, B); end)
elapsed time: 0.004008817 seconds (0 bytes allocated)

julia> @time (for i = 1:10^5; FixedArrays.A_mul_B_FA!(C, A, B); end)
elapsed time: 0.004217504 seconds (0 bytes allocated)

julia> @time (for i = 1:10^5; A_mul_B!(C, AA, BB); end)
elapsed time: 0.099574707 seconds (3856 bytes allocated)

julia> @time (for i = 1:10^5; A_mul_B!(C, AA, BB); end)
elapsed time: 0.098794949 seconds (0 bytes allocated)

Because of the use of globals, this 20-fold improvement may even underestimate the advantage this would confer.

Do we want this? If so, I recommend it as a GSoC project. It would actually be quite a lot of work, especially to get these working in conjunction with more traditional array types---the number of required methods is not small and indeed somewhat scary.

For the record, here's what the generated code looks like for the case of 2x2 multiplication (look Ma, no loops!):

julia> using Base.Cartesian

julia> M = K = N = 2
2

julia> macroexpand(:(function A_mul_B_FA!{TA,TB}(dest::AbstractMatrix, A::FixedArray{TA,2,($M,$K)}, B::FixedArray{TB,2,($K,$N)})
                   TP = typeof(one(TA)*one(TB) + one(TA)*one(TB))
                   @nexprs $K d2->(@nexprs $M d1->A_d1_d2 = A.data[(d2-1)*$M+d1])
                   @nexprs $N d2->(@nexprs $K d1->B_d1_d2 = B.data[(d2-1)*$K+d1])
                   @nexprs $N n->(@nexprs $M m->begin
                       tmp = zero(TP)
                       @nexprs $K k->(tmp += A_m_k * B_k_n)
                       dest[m,n] = tmp
                   end)
                   dest
               end))
:(function A_mul_B_FA!{TA,TB}(dest::AbstractMatrix,A::FixedArray{TA,2,(2,2)},B::FixedArray{TB,2,(2,2)}) # none, line 2:
        TP = typeof(one(TA) * one(TB) + one(TA) * one(TB)) # line 3:
        begin 
            begin 
                A_1_1 = A.data[1]
                A_2_1 = A.data[2]
            end
            begin 
                A_1_2 = A.data[3]
                A_2_2 = A.data[4]
            end
        end # line 4:
        begin 
            begin 
                B_1_1 = B.data[1]
                B_2_1 = B.data[2]
            end
            begin 
                B_1_2 = B.data[3]
                B_2_2 = B.data[4]
            end
        end # line 5:
        begin 
            begin 
                begin  # none, line 6:
                    tmp = zero(TP) # line 7:
                    begin 
                        tmp += A_1_1 * B_1_1
                        tmp += A_1_2 * B_2_1
                    end # line 8:
                    dest[1,1] = tmp
                end
                begin  # none, line 6:
                    tmp = zero(TP) # line 7:
                    begin 
                        tmp += A_2_1 * B_1_1
                        tmp += A_2_2 * B_2_1
                    end # line 8:
                    dest[2,1] = tmp
                end
            end
            begin 
                begin  # none, line 6:
                    tmp = zero(TP) # line 7:
                    begin 
                        tmp += A_1_1 * B_1_2
                        tmp += A_1_2 * B_2_2
                    end # line 8:
                    dest[1,2] = tmp
                end
                begin  # none, line 6:
                    tmp = zero(TP) # line 7:
                    begin 
                        tmp += A_2_1 * B_1_2
                        tmp += A_2_2 * B_2_2
                    end # line 8:
                    dest[2,2] = tmp
                end
            end
        end # line 10:
        dest
    end)

Metadata

Metadata

Assignees

No one assigned

    Labels

    help wantedIndicates that a maintainer wants help on an issue or pull request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions