-
Notifications
You must be signed in to change notification settings - Fork 14.5k
Open
Labels
Description
Example from rust-lang/rust#101060:
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i64 @test(ptr %arr) {
entry:
br label %loop
loop:
%accum = phi i64 [ %accum.next, %loop ], [ 0, %entry ]
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%iv.next = add nuw i64 %iv, 1
%gep = getelementptr inbounds i64, ptr %arr, i64 %iv
%value = load i64, ptr %gep, align 8
%ctpop = tail call i64 @llvm.ctpop.i64(i64 %value)
%accum.next = add i64 %accum, %ctpop
%exitcond = icmp eq i64 %iv.next, 2
br i1 %exitcond, label %exit, label %loop
exit:
%lcssa = phi i64 [ %accum.next, %loop ]
ret i64 %lcssa
}
declare i64 @llvm.ctpop.i64(i64)
This two-iteration loop gets vectorized by opt -loop-vectorize -mcpu=znver2
(https://llvm.godbolt.org/z/M8qTdTbfE), because we assign cost 1 to scalar ctpop and cost 3 to the vector ctpop, so it's nominally "profitable". At least for low iteration count, this is not actually the case.