[SLP] Include vectorized calls in spill cost #125650

lukel97 · 2025-02-04T08:38:28Z

Even though an intrinsic may be vectorized, the backend may end up scalarizing it.

Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost.

This fixes examples like

#include <math.h>
void f(double *f) {
  double a = f[0], b = f[1];
  a += 1;
  b += 1;
  a = tanh(a);
  b = tanh(b);
  a += 1;
  b += 1;
  f[0] = a;
  f[1] = b;
}

From being unprofitably vectorized to this after #124984

vsetivli	zero, 2, e64, m1, ta, ma
vle64.v	v8, (a0)
.Lpcrel_hi0:
auipc	a0, %pcrel_hi(.LCPI0_0)
fld	fs1, %pcrel_lo(.Lpcrel_hi0)(a0)
vfadd.vf	v8, v8, fs1
addi	a0, sp, 16
vs1r.v	v8, (a0)                        # Unknown-size Folded Spill
vslidedown.vi	v8, v8, 1
vfmv.f.s	fa0, v8
call	tanh
fmv.d	fs0, fa0
fld	fa0, 16(sp)                     # 8-byte Folded Reload
call	tanh
vsetivli	zero, 2, e64, m1, ta, ma
vfmv.v.f	v8, fa0
vfslide1down.vf	v8, v8, fs0
vfadd.vf	v8, v8, fs1
vse64.v	v8, (s0)

Even though an intrinsic may be vectorized, the backend may end up scalarizing it. Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost. This fixes examples like #include <math.h> void f(double *f) { double a = f[0], b = f[1]; a += 1; b += 1; a = tanh(a); b = tanh(b); a += 1; b += 1; f[0] = a; f[1] = b; } From being unprofitably vectorized to this after llvm#124984 vsetivli zero, 2, e64, m1, ta, ma vle64.v v8, (a0) .Lpcrel_hi0: auipc a0, %pcrel_hi(.LCPI0_0) fld fs1, %pcrel_lo(.Lpcrel_hi0)(a0) vfadd.vf v8, v8, fs1 addi a0, sp, 16 vs1r.v v8, (a0) # Unknown-size Folded Spill vslidedown.vi v8, v8, 1 vfmv.f.s fa0, v8 call tanh fmv.d fs0, fa0 fld fa0, 16(sp) # 8-byte Folded Reload call tanh vsetivli zero, 2, e64, m1, ta, ma vfmv.v.f v8, fa0 vfslide1down.vf v8, v8, fs0 vfadd.vf v8, v8, fs1 vse64.v v8, (s0)

llvmbot · 2025-02-04T08:39:05Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-vectorizers

Author: Luke Lau (lukel97)

Changes

Even though an intrinsic may be vectorized, the backend may end up scalarizing it.

Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost.

This fixes examples like

#include &lt;math.h&gt;
void f(double *f) {
  double a = f[0], b = f[1];
  a += 1;
  b += 1;
  a = tanh(a);
  b = tanh(b);
  a += 1;
  b += 1;
  f[0] = a;
  f[1] = b;
}

From being unprofitably vectorized to this after #124984

vsetivli	zero, 2, e64, m1, ta, ma
vle64.v	v8, (a0)
.Lpcrel_hi0:
auipc	a0, %pcrel_hi(.LCPI0_0)
fld	fs1, %pcrel_lo(.Lpcrel_hi0)(a0)
vfadd.vf	v8, v8, fs1
addi	a0, sp, 16
vs1r.v	v8, (a0)                        # Unknown-size Folded Spill
vslidedown.vi	v8, v8, 1
vfmv.f.s	fa0, v8
call	tanh
fmv.d	fs0, fa0
fld	fa0, 16(sp)                     # 8-byte Folded Reload
call	tanh
vsetivli	zero, 2, e64, m1, ta, ma
vfmv.v.f	v8, fa0
vfslide1down.vf	v8, v8, fs0
vfadd.vf	v8, v8, fs1
vse64.v	v8, (s0)

Full diff: https://github.com/llvm/llvm-project/pull/125650.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+3-3)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (+46)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 539c9227af7e3a..ef8740ec2b451f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12248,10 +12248,10 @@ InstructionCost BoUpSLP::getSpillCost() {
       };
 
       // Debug information does not impact spill cost.
-      // Vectorized calls, represented as vector intrinsics, do not impact spill
-      // cost.
+      // Vectorized calls, represented as vector intrinsics, may still impact
+      // spill cost if scalarized in codegen.
       if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
-          CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
+          CB && !NoCallIntrinsic(CB))
         NumCalls++;
 
       ++PrevInstIt;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index 5bfd776512711f..4a2a6ace5f8b88 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -141,6 +141,52 @@ entry:
   ret <4 x float> %vecins.3
 }
 
+; Don't vectorize @llvm.exp because it will be scalarized in codegen.
+define void @exp_v2f64(ptr %a) {
+; CHECK-LABEL: define void @exp_v2f64
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[X:%.*]] = load double, ptr [[A]], align 8
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT:    [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; CHECK-NEXT:    [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; CHECK-NEXT:    [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; CHECK-NEXT:    [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; CHECK-NEXT:    [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; CHECK-NEXT:    [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; CHECK-NEXT:    store double [[X_ADD2]], ptr [[A]], align 8
+; CHECK-NEXT:    store double [[Y_ADD2]], ptr [[GEP]], align 8
+; CHECK-NEXT:    ret void
+;
+; DEFAULT-LABEL: define void @exp_v2f64
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT:    [[X:%.*]] = load double, ptr [[A]], align 8
+; DEFAULT-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; DEFAULT-NEXT:    [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; DEFAULT-NEXT:    [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; DEFAULT-NEXT:    [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; DEFAULT-NEXT:    [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; DEFAULT-NEXT:    [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; DEFAULT-NEXT:    [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; DEFAULT-NEXT:    [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; DEFAULT-NEXT:    store double [[X_ADD2]], ptr [[A]], align 8
+; DEFAULT-NEXT:    store double [[Y_ADD2]], ptr [[GEP]], align 8
+; DEFAULT-NEXT:    ret void
+;
+  %x = load double, ptr %a
+  %gep = getelementptr double, ptr %a, i64 1
+  %y = load double, ptr %gep
+  %x.add = fadd double %x, 1.0
+  %y.add = fadd double %y, 1.0
+  %x.exp = call double @llvm.exp(double %x.add)
+  %y.exp = call double @llvm.exp(double %y.add)
+  %x.add2 = fadd double %x.exp, 1.0
+  %y.add2 = fadd double %y.exp, 1.0
+  store double %x.add2, ptr %a
+  store double %y.add2, ptr %gep
+  ret void
+}
+
 declare float @expf(float) readonly nounwind willreturn
 
 ; We can not vectorized exp since RISCV has no such instruction.

lukel97 · 2025-02-04T12:29:04Z

Closing as I didn't see #125070 , sorry for the noise

lukel97 added 2 commits February 4, 2025 16:29

Precommit test

1a01b91

lukel97 requested review from preames, RKSimon and alexey-bataev February 4, 2025 08:38

llvmbot added vectorizers llvm:transforms labels Feb 4, 2025

lukel97 closed this Feb 4, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SLP] Include vectorized calls in spill cost #125650

[SLP] Include vectorized calls in spill cost #125650

lukel97 commented Feb 4, 2025 •

edited

Loading

llvmbot commented Feb 4, 2025 •

edited

Loading

lukel97 commented Feb 4, 2025

[SLP] Include vectorized calls in spill cost #125650

[SLP] Include vectorized calls in spill cost #125650

Conversation

lukel97 commented Feb 4, 2025 • edited Loading

llvmbot commented Feb 4, 2025 • edited Loading

lukel97 commented Feb 4, 2025

lukel97 commented Feb 4, 2025 •

edited

Loading

llvmbot commented Feb 4, 2025 •

edited

Loading