From c041b10516b2641f5c29f7801a0b96dfd2a708ad Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 5 Feb 2025 12:21:27 +0800 Subject: [PATCH 1/2] Precommit test --- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 156 ++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 735621aa4390e..f38de743c68ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5741,3 +5741,159 @@ vector.body: for.cond.cleanup: ret void } + +define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) { +; CHECK-LABEL: sink_splat_vfwadd_vf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: fcvt.d.s fa5, fa0 +; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: .LBB125_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl1re32.v v10, (a0) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwadd.wv v12, v8, v10 +; CHECK-NEXT: vs2r.v v12, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: j .LBB125_1 +entry: + %f.ext = fpext float %f to double + %broadcast.splatinsert = insertelement poison, double %f.ext, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr float, ptr %a, i64 %index + %wide.load = load , ptr %0 + %ext = fpext %wide.load to + %1 = fadd %ext, %broadcast.splat + %2 = getelementptr double, ptr %b, i64 %index + store %1, ptr %0 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 32, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) { +; CHECK-LABEL: sink_splat_vfwadd_wf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: fcvt.d.s fa5, fa0 +; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; CHECK-NEXT: .LBB126_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl2re64.v v8, (a0) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: vfadd.vf v8, v8, fa5 +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: j .LBB126_1 +entry: + %f.ext = fpext float %f to double + %broadcast.splatinsert = insertelement poison, double %f.ext, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr double, ptr %a, i64 %index + %wide.load = load , ptr %0 + %1 = fadd %wide.load, %broadcast.splat + %2 = getelementptr double, ptr %b, i64 %index + store %1, ptr %0 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 32, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) { +; CHECK-LABEL: sink_splat_vfwmul_vf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: fcvt.d.s fa5, fa0 +; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: .LBB127_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl1re32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmul.vf v8, v10, fa5 +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: j .LBB127_1 +entry: + %f.ext = fpext float %f to double + %broadcast.splatinsert = insertelement poison, double %f.ext, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr float, ptr %a, i64 %index + %wide.load = load , ptr %0 + %ext = fpext %wide.load to + %1 = fmul %ext, %broadcast.splat + %2 = getelementptr double, ptr %b, i64 %index + store %1, ptr %0 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 32, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure +; early-machinelicm undos the sink after isel. +define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) { +; CHECK-LABEL: sink_splat_vfwmul_wf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: fcvt.d.s fa5, fa0 +; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; CHECK-NEXT: .LBB128_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vl2re64.v v8, (a0) +; CHECK-NEXT: addi a1, a1, 4 +; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: vfmul.vf v8, v8, fa5 +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: j .LBB128_1 +entry: + %f.ext = fpext float %f to double + %broadcast.splatinsert = insertelement poison, double %f.ext, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr float, ptr %a, i64 %index + %wide.load = load , ptr %0 + %1 = fmul %wide.load, %broadcast.splat + %2 = getelementptr double, ptr %b, i64 %index + store %1, ptr %0 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 32, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} From 5ba5d33a78009f70a7828dfac81dfb0a706a2b06 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 5 Feb 2025 12:41:31 +0800 Subject: [PATCH 2/2] [RISCV] Sink splatted fpext operands We sink splatted operands in codegenprepare to help match .vx/.vf patterns. This extends it to also splat any fpext so that we can match widening vfwadd.vf/vfwadd.wf patterns too. Some instructions don't have .wf forms so there's no benefit to sinking the fpext. For simplicity this sinks them anyway and lets earlymachine-licm hoist them back out. --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 7 ++++- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 27 +++++++------------ 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index fa7c7c57be257..6799130bde130 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2712,7 +2712,12 @@ bool RISCVTTIImpl::isProfitableToSinkOperands( return false; } - Ops.push_back(&Op->getOperandUse(0)); + Use *InsertEltUse = &Op->getOperandUse(0); + // Sink any fpexts since they might be used in a widening fp pattern. + auto *InsertElt = cast(InsertEltUse); + if (isa(InsertElt->getOperand(1))) + Ops.push_back(&InsertElt->getOperandUse(1)); + Ops.push_back(InsertEltUse); Ops.push_back(&OpIdx.value()); } return true; diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index f38de743c68ba..1948675ae9cf0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5746,18 +5746,15 @@ define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) ; CHECK-LABEL: sink_splat_vfwadd_vf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: li a2, 1020 -; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB125_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v10, (a0) +; CHECK-NEXT: vl1re32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a2, a2, -4 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwadd.wv v12, v8, v10 -; CHECK-NEXT: vs2r.v v12, (a0) +; CHECK-NEXT: vfwadd.vf v10, v8, fa0 +; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: j .LBB125_1 entry: @@ -5786,15 +5783,14 @@ define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) ; CHECK-LABEL: sink_splat_vfwadd_wf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: li a2, 1020 -; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB126_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl2re64.v v8, (a0) ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a2, a2, -4 -; CHECK-NEXT: vfadd.vf v8, v8, fa5 +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: vs2r.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: j .LBB126_1 @@ -5823,18 +5819,15 @@ define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) ; CHECK-LABEL: sink_splat_vfwmul_vf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB127_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a2, a2, -4 -; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwcvt.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vfmul.vf v8, v10, fa5 -; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: vfwmul.vf v10, v8, fa0 +; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: j .LBB127_1 entry: @@ -5865,8 +5858,8 @@ define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) ; CHECK-LABEL: sink_splat_vfwmul_wf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: li a2, 1020 +; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma ; CHECK-NEXT: .LBB128_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1