From 3a189cdedaa5003d1482defbf109426145d73a49 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 15 Jul 2024 21:23:07 +0800 Subject: [PATCH 1/2] Precommit tests --- .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 12 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 41c7d1f5fd64c..80e86384e7397 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -670,3 +670,15 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) ret <4 x half> %load } + +define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) { +; CHECK-OPT-LABEL: zero_strided_vadd.vx: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-OPT-NEXT: vlse64.v v10, (a0), zero +; CHECK-OPT-NEXT: vadd.vv v8, v8, v10 +; CHECK-OPT-NEXT: ret + %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4) + %w = add <4 x i64> %v, %load + ret <4 x i64> %w +} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 6b8ded4914226..07bef81ef06c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -822,3 +822,16 @@ define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 4) ret %load } + +define @zero_strided_vadd.vx( %v, ptr %ptr) { +; CHECK-OPT-LABEL: zero_strided_vadd.vx: +; CHECK-OPT: # %bb.0: +; CHECK-OPT-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-OPT-NEXT: vlse64.v v9, (a0), zero +; CHECK-OPT-NEXT: vadd.vv v8, v8, v9 +; CHECK-OPT-NEXT: ret + %vscale = call i32 @llvm.vscale() + %load = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) + %w = add %v, %load + ret %w +} From a70190c665a4ce3357cef3bc4cc3a38bdd6676bb Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 15 Jul 2024 21:33:20 +0800 Subject: [PATCH 2/2] [RISCV] Always expand zero strided vp.strided.loads This patch makes zero strided VP loads always be expanded to a scalar load and splat even if +optimized-zero-stride-load is present. Expanding it allows more .vx splat patterns to be matched, which is needed to prevent regressions in #98111. If the feature is present, RISCVISelDAGToDAG will combine it back to a zero strided load. The RV32 test diff also shows how need to emit a zero strided load either way after expanding an SEW=64 strided load. We could maybe fix this in a later patch by not doing the expand if SEW>XLEN. --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 6 ++-- .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 31 ++++++++++++++----- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 27 ++++++++++++---- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 35c46157c2eb9..b3f3dc6e2256c 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -163,10 +163,10 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { return true; } +// Always expand zero strided loads so we match more .vx splat patterns, even if +// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert +// it back to a strided load if it's optimized. bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { - if (ST->hasOptimizedZeroStrideLoad()) - return false; - Value *BasePtr, *VL; using namespace PatternMatch; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 80e86384e7397..95f853b77f18b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,7 +638,7 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; @@ -657,7 +657,7 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; @@ -672,12 +672,27 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { } define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) { -; CHECK-OPT-LABEL: zero_strided_vadd.vx: -; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-OPT-NEXT: vlse64.v v10, (a0), zero -; CHECK-OPT-NEXT: vadd.vv v8, v8, v10 -; CHECK-OPT-NEXT: ret +; CHECK-RV32-LABEL: zero_strided_vadd.vx: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: lw a1, 4(a0) +; CHECK-RV32-NEXT: lw a0, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero +; CHECK-RV32-NEXT: vadd.vv v8, v8, v10 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: zero_strided_vadd.vx: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: ld a0, 0(a0) +; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-RV64-NEXT: vadd.vx v8, v8, a0 +; CHECK-RV64-NEXT: ret %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4) %w = add <4 x i64> %v, %load ret <4 x i64> %w diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 07bef81ef06c1..563da270272c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -824,12 +824,27 @@ define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { } define @zero_strided_vadd.vx( %v, ptr %ptr) { -; CHECK-OPT-LABEL: zero_strided_vadd.vx: -; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-OPT-NEXT: vlse64.v v9, (a0), zero -; CHECK-OPT-NEXT: vadd.vv v8, v8, v9 -; CHECK-OPT-NEXT: ret +; CHECK-RV32-LABEL: zero_strided_vadd.vx: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: lw a1, 4(a0) +; CHECK-RV32-NEXT: lw a0, 0(a0) +; CHECK-RV32-NEXT: sw a1, 12(sp) +; CHECK-RV32-NEXT: sw a0, 8(sp) +; CHECK-RV32-NEXT: addi a0, sp, 8 +; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero +; CHECK-RV32-NEXT: vadd.vv v8, v8, v9 +; CHECK-RV32-NEXT: addi sp, sp, 16 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: zero_strided_vadd.vx: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: ld a0, 0(a0) +; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-RV64-NEXT: vadd.vx v8, v8, a0 +; CHECK-RV64-NEXT: ret %vscale = call i32 @llvm.vscale() %load = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) %w = add %v, %load