-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Closed
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster
Description
It seems that for both f(args...) and f(args) (called with a tuple), the function is specialized to take a tuple as argument. However, in the first case, the argument is always boxed into a jl_value_t* before passing in while for the second case (at least after) the tuple type change, no boxing/gc frame is necessary.
From the output from the example code below, it seems that the function is already specialized on the types of the arguments although they are somehow still using the generic jl_value_t* type as the type for both input and output.
Example code
@noinline f1(args...) = args
@noinline f2(args) = args
g1() = f1(1, 2, 3)
g2() = f2((1, 2, 3))
function time_func(f::Function, args...)
println(f)
f(args...)
gc()
@time for i in 1:1000000
f(args...)
end
gc()
end
@code_llvm g1()
@code_llvm f1(1, 2, 3)
@code_llvm g2()
@code_llvm f2((1, 2, 3))
time_func(g1)
time_func(g2)Output
define [3 x i64] @julia_g1_44267() {
top:
%0 = alloca [5 x %jl_value_t*], align 8
%.sub = getelementptr inbounds [5 x %jl_value_t*]* %0, i64 0, i64 0
%1 = getelementptr [5 x %jl_value_t*]* %0, i64 0, i64 2
%2 = bitcast [5 x %jl_value_t*]* %0 to i64*
store i64 6, i64* %2, align 8
%3 = getelementptr [5 x %jl_value_t*]* %0, i64 0, i64 1
%4 = bitcast %jl_value_t** %3 to %jl_value_t***
%5 = load %jl_value_t*** @jl_pgcstack, align 8
store %jl_value_t** %5, %jl_value_t*** %4, align 8
store %jl_value_t** %.sub, %jl_value_t*** @jl_pgcstack, align 8
%6 = getelementptr [5 x %jl_value_t*]* %0, i64 0, i64 3
%7 = getelementptr [5 x %jl_value_t*]* %0, i64 0, i64 4
store %jl_value_t* inttoptr (i64 139909651832960 to %jl_value_t*), %jl_value_t** %1, align 8
store %jl_value_t* inttoptr (i64 139909651833008 to %jl_value_t*), %jl_value_t** %6, align 8
store %jl_value_t* inttoptr (i64 139909651833056 to %jl_value_t*), %jl_value_t** %7, align 8
%8 = call %jl_value_t* @julia_f1_44268(%jl_value_t* inttoptr (i64 139909684716976 to %jl_value_t*), %jl_value_t** %1, i32 3)
%9 = bitcast %jl_value_t* %8 to [3 x i64]*
%10 = load [3 x i64]* %9, align 8
%11 = load %jl_value_t*** %4, align 8
store %jl_value_t** %11, %jl_value_t*** @jl_pgcstack, align 8
ret [3 x i64] %10
}
define %jl_value_t* @julia_f1_44268(%jl_value_t*, %jl_value_t**, i32) {
top:
%3 = alloca [3 x %jl_value_t*], align 8
%.sub = getelementptr inbounds [3 x %jl_value_t*]* %3, i64 0, i64 0
%4 = getelementptr [3 x %jl_value_t*]* %3, i64 0, i64 2
%5 = bitcast [3 x %jl_value_t*]* %3 to i64*
store i64 2, i64* %5, align 8
%6 = getelementptr [3 x %jl_value_t*]* %3, i64 0, i64 1
%7 = bitcast %jl_value_t** %6 to %jl_value_t***
%8 = load %jl_value_t*** @jl_pgcstack, align 8
store %jl_value_t** %8, %jl_value_t*** %7, align 8
store %jl_value_t** %.sub, %jl_value_t*** @jl_pgcstack, align 8
store %jl_value_t* null, %jl_value_t** %4, align 8
%9 = call %jl_value_t* @jl_f_tuple(%jl_value_t* null, %jl_value_t** %1, i32 %2)
store %jl_value_t* %9, %jl_value_t** %4, align 8
%10 = load %jl_value_t*** %7, align 8
store %jl_value_t** %10, %jl_value_t*** @jl_pgcstack, align 8
ret %jl_value_t* %9
}
define [3 x i64] @julia_g2_44275() {
top:
%0 = call [3 x i64] @julia_f2_44276([3 x i64] [i64 1, i64 2, i64 3])
ret [3 x i64] %0
}
define [3 x i64] @julia_f2_44276([3 x i64]) {
top:
ret [3 x i64] %0
}
g1
elapsed time: 0.193401883 seconds (61 MB allocated, 14.06% gc time in 2 pauses with 0 full sweep)
g2
elapsed time: 0.025389954 seconds (30 MB allocated, 1.64% gc time in 1 pauses with 0 full sweep)(Related to #11244)
Metadata
Metadata
Assignees
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster