Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2712,7 +2712,12 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
return false;
}

Ops.push_back(&Op->getOperandUse(0));
Use *InsertEltUse = &Op->getOperandUse(0);
// Sink any fpexts since they might be used in a widening fp pattern.
auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
if (isa<FPExtInst>(InsertElt->getOperand(1)))
Ops.push_back(&InsertElt->getOperandUse(1));
Ops.push_back(InsertEltUse);
Ops.push_back(&OpIdx.value());
}
return true;
Expand Down
149 changes: 149 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5741,3 +5741,152 @@ vector.body:
for.cond.cleanup:
ret void
}

define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
; CHECK-LABEL: sink_splat_vfwadd_vf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: li a2, 1020
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB125_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: vfwadd.vf v10, v8, fa0
; CHECK-NEXT: vs2r.v v10, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: j .LBB125_1
entry:
%f.ext = fpext float %f to double
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr float, ptr %a, i64 %index
%wide.load = load <vscale x 2 x float>, ptr %0
%ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
%1 = fadd <vscale x 2 x double> %ext, %broadcast.splat
%2 = getelementptr double, ptr %b, i64 %index
store <vscale x 2 x double> %1, ptr %0
%index.next = add i64 %index, 4
%3 = icmp eq i64 %index.next, 1024
br i1 32, label %for.cond.cleanup, label %vector.body

for.cond.cleanup: ; preds = %vector.body
ret void
}

define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
; CHECK-LABEL: sink_splat_vfwadd_wf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: li a2, 1020
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB126_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re64.v v8, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: vfwadd.wf v8, v8, fa0
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: j .LBB126_1
entry:
%f.ext = fpext float %f to double
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr double, ptr %a, i64 %index
%wide.load = load <vscale x 2 x double>, ptr %0
%1 = fadd <vscale x 2 x double> %wide.load, %broadcast.splat
%2 = getelementptr double, ptr %b, i64 %index
store <vscale x 2 x double> %1, ptr %0
%index.next = add i64 %index, 4
%3 = icmp eq i64 %index.next, 1024
br i1 32, label %for.cond.cleanup, label %vector.body

for.cond.cleanup: ; preds = %vector.body
ret void
}

define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
; CHECK-LABEL: sink_splat_vfwmul_vf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: li a2, 1020
; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB127_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: vfwmul.vf v10, v8, fa0
; CHECK-NEXT: vs2r.v v10, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: j .LBB127_1
entry:
%f.ext = fpext float %f to double
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr float, ptr %a, i64 %index
%wide.load = load <vscale x 2 x float>, ptr %0
%ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
%1 = fmul <vscale x 2 x double> %ext, %broadcast.splat
%2 = getelementptr double, ptr %b, i64 %index
store <vscale x 2 x double> %1, ptr %0
%index.next = add i64 %index, 4
%3 = icmp eq i64 %index.next, 1024
br i1 32, label %for.cond.cleanup, label %vector.body

for.cond.cleanup: ; preds = %vector.body
ret void
}

; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
; early-machinelicm undos the sink after isel.
define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
; CHECK-LABEL: sink_splat_vfwmul_wf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: li a2, 1020
; CHECK-NEXT: fcvt.d.s fa5, fa0
; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma
; CHECK-NEXT: .LBB128_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re64.v v8, (a0)
; CHECK-NEXT: addi a1, a1, 4
; CHECK-NEXT: addi a2, a2, -4
; CHECK-NEXT: vfmul.vf v8, v8, fa5
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: j .LBB128_1
entry:
%f.ext = fpext float %f to double
%broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
%broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr float, ptr %a, i64 %index
%wide.load = load <vscale x 2 x double>, ptr %0
%1 = fmul <vscale x 2 x double> %wide.load, %broadcast.splat
%2 = getelementptr double, ptr %b, i64 %index
store <vscale x 2 x double> %1, ptr %0
%index.next = add i64 %index, 4
%3 = icmp eq i64 %index.next, 1024
br i1 32, label %for.cond.cleanup, label %vector.body

for.cond.cleanup: ; preds = %vector.body
ret void
}