From 9bba3ddfe0f551ab6590c1019cef1c563a962eb3 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 5 Sep 2023 14:27:15 +0100 Subject: [PATCH 1/3] [RISCV] Add extract_subvector tests for a statically-known VLEN. NFC This is partly a precommit for an upcoming patch, and partly to remove the fixed length LMUL restriction similarly to what was done in https://reviews.llvm.org/D158270, since it's no longer that relevant. --- .../rvv/fixed-vectors-extract-subvector.ll | 513 +++++++++--------- 1 file changed, 265 insertions(+), 248 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index ed84a97c804f6..92b052fcaab83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,UNKNOWNVLEN +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,KNOWNVLEN define void @extract_v2i8_v4i8_0(ptr %x, ptr %y) { ; CHECK-LABEL: extract_v2i8_v4i8_0: @@ -62,22 +62,46 @@ define void @extract_v2i8_v8i8_6(ptr %x, ptr %y) { ret void } +define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) { +; CHECK-LABEL: extract_v1i32_v8i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 4) + store <1 x i32> %c, ptr %y + ret void +} + +define void @extract_v1i32_v8i32_5(ptr %x, ptr %y) { +; CHECK-LABEL: extract_v1i32_v8i32_5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 5) + store <1 x i32> %c, ptr %y + ret void +} + define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i32_v8i32_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i32_v8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 0) store <2 x i32> %c, ptr %y @@ -85,52 +109,47 @@ define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { } define void @extract_v2i32_v8i32_2(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_2: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i32_v8i32_2: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i32_v8i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2) store <2 x i32> %c, ptr %y ret void } +define void @extract_v2i32_v8i32_4(ptr %x, ptr %y) { +; CHECK-LABEL: extract_v2i32_v8i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret + %a = load <8 x i32>, ptr %x + %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 4) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i32_v8i32_6(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i32_v8i32_6: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vle32.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 6 -; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i32_v8i32_6: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i32_v8i32_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 6 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6) store <2 x i32> %c, ptr %y @@ -148,6 +167,33 @@ define void @extract_v2i32_nxv16i32_0( %x, ptr %y) { ret void } + +define void @extract_v2i32_nxv16i32_2( %x, ptr %y) { +; CHECK-LABEL: extract_v2i32_nxv16i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 2) + store <2 x i32> %c, ptr %y + ret void +} + +define void @extract_v2i32_nxv16i32_4( %x, ptr %y) { +; CHECK-LABEL: extract_v2i32_nxv16i32_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 4) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { ; CHECK-LABEL: extract_v2i32_nxv16i32_6: ; CHECK: # %bb.0: @@ -161,6 +207,19 @@ define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { ret void } +define void @extract_v2i32_nxv16i32_8( %x, ptr %y) { +; CHECK-LABEL: extract_v2i32_nxv16i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 8) + store <2 x i32> %c, ptr %y + ret void +} + define void @extract_v2i8_nxv2i8_0( %x, ptr %y) { ; CHECK-LABEL: extract_v2i8_nxv2i8_0: ; CHECK: # %bb.0: @@ -185,47 +244,54 @@ define void @extract_v2i8_nxv2i8_2( %x, ptr %y) { ret void } +define void @extract_v2i8_nxv2i8_4( %x, ptr %y) { +; CHECK-LABEL: extract_v2i8_nxv2i8_4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %x, i64 4) + store <2 x i8> %c, ptr %y + ret void +} + +define void @extract_v2i8_nxv2i8_6( %x, ptr %y) { +; CHECK-LABEL: extract_v2i8_nxv2i8_6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 6 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + %c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %x, i64 6) + store <2 x i8> %c, ptr %y + ret void +} + define void @extract_v8i32_nxv16i32_8( %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i32_nxv16i32_8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m8, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 8 -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: vse32.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i32_nxv16i32_8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m8, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v16, v8, 8 -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 12 -; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vse32.v v16, (a0) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i32_nxv16i32_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret %c = call <8 x i32> @llvm.vector.extract.v8i32.nxv16i32( %x, i64 8) store <8 x i32> %c, ptr %y ret void } define void @extract_v8i1_v64i1_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0) store <8 x i1> %c, ptr %y @@ -233,26 +299,16 @@ define void @extract_v8i1_v64i1_0(ptr %x, ptr %y) { } define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_8: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_8: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 1 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8) store <8 x i1> %c, ptr %y @@ -260,26 +316,16 @@ define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) { } define void @extract_v8i1_v64i1_48(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v8i1_v64i1_48: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a0, a0, 4 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v8, (a0) -; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v8i1_v64i1_48: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 6 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v8, (a0) -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v8i1_v64i1_48: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 6 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48) store <8 x i1> %c, ptr %y @@ -334,40 +380,49 @@ define void @extract_v8i1_nxv64i1_48( %x, ptr %y) { ret void } +define void @extract_v8i1_nxv64i1_128( %x, ptr %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v0, 16 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %x, i64 128) + store <8 x i1> %c, ptr %y + ret void +} + +define void @extract_v8i1_nxv64i1_192( %x, ptr %y) { +; CHECK-LABEL: extract_v8i1_nxv64i1_192: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v0, 24 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: ret + %c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %x, i64 192) + store <8 x i1> %c, ptr %y + ret void +} define void @extract_v2i1_v64i1_0(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_0: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i1_v64i1_0: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i1_v64i1_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0) store <2 x i1> %c, ptr %y @@ -375,48 +430,27 @@ define void @extract_v2i1_v64i1_0(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_2: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i1_v64i1_2: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i1_v64i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2) store <2 x i1> %c, ptr %y @@ -424,50 +458,28 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { -; LMULMAX2-LABEL: extract_v2i1_v64i1_42: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a0, a0, 4 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vlm.v v0, (a0) -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, m2, ta, ma -; LMULMAX2-NEXT: vslidedown.vi v8, v8, 10 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX2-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX2-NEXT: vmv.v.i v8, 0 -; LMULMAX2-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmv.v.i v9, 0 -; LMULMAX2-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX2-NEXT: vmv.v.v v9, v8 -; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX2-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-LABEL: extract_v2i1_v64i1_42: -; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a0, a0, 4 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-NEXT: vlm.v v0, (a0) -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; LMULMAX1-NEXT: vslidedown.vi v8, v8, 10 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; LMULMAX1-NEXT: vmv.v.v v9, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 -; LMULMAX1-NEXT: vsm.v v8, (a1) -; LMULMAX1-NEXT: ret +; CHECK-LABEL: extract_v2i1_v64i1_42: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vsm.v v8, (a1) +; CHECK-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42) store <2 x i1> %c, ptr %y @@ -640,9 +652,14 @@ declare <8 x i1> @llvm.vector.extract.v8i1.nxv64i1( %vec, i64 declare <2 x i8> @llvm.vector.extract.v2i8.v4i8(<4 x i8> %vec, i64 %idx) declare <2 x i8> @llvm.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx) + +declare <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %vec, i64 %idx) declare <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx) declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %vec, i64 %idx) declare <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %vec, i64 %idx) declare <8 x i32> @llvm.vector.extract.v8i32.nxv16i32( %vec, i64 %idx) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; KNOWNVLEN: {{.*}} +; UNKNOWNVLEN: {{.*}} From 032a5a67d6574a3c60a76ae4e63e95cbc2cf539d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 5 Sep 2023 17:26:59 +0100 Subject: [PATCH 2/3] [RISCV] Refactor extract_subvector slightly. NFC This patch refactors extract_subvector to lower to extract_subreg directly, and to shortcut whenever the index is 0 when extracting a scalable vector. This doesn't change any of the existing behaviour, but makes an upcoming patch that extends the scalable path slightly easier to read. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ab93f6a800775..54f37c541043a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8622,6 +8622,11 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); } } + + // With an index of 0 this is a cast-like subvector, which can be performed + // with subregister operations. + if (OrigIdx == 0) + return Op; // If the subvector vector is a fixed-length type, we cannot use subregister // manipulation to simplify the codegen; we don't know which register of a @@ -8629,10 +8634,6 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, // register size. Therefore we must slide the vector group down the full // amount. if (SubVecVT.isFixedLengthVector()) { - // With an index of 0 this is a cast-like subvector, which can be performed - // with subregister operations. - if (OrigIdx == 0) - return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -8664,17 +8665,15 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, if (RemIdx == 0) return Op; - // Else we must shift our vector register directly to extract the subvector. - // Do this using VSLIDEDOWN. + // Else SubVecVT is a fractional LMUL and needs to be slid down. + assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); // If the vector type is an LMUL-group type, extract a subvector equal to the - // nearest full vector register type. This should resolve to a EXTRACT_SUBREG - // instruction. + // nearest full vector register type. MVT InterSubVT = VecVT; if (VecVT.bitsGT(getLMUL1VT(VecVT))) { InterSubVT = getLMUL1VT(VecVT); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, - DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); + Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); } // Slide this vector register down by the desired number of elements in order From 50b189480cf1a401eef4ce788ef9ffb2df72d67a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 4 Sep 2023 17:42:52 +0100 Subject: [PATCH 3/3] [RISCV] Extract subregister if VLEN is known when lowering extract_subvector If we know VLEN at compile time, then we can workout what subregister an index into a fixed length vector will be at. We can use this information when lowering extract_subvector to perform the vslidedown on a smaller subregister. This allows us to use a smaller LMUL, or if the extract is aligned to a vector register then we can avoid the slide altogether. The logic here is a bit tangled with the scalable path: If people find this too unwieldy, I can separate it out and duplicate it for the fixed case. This technique could be applied to extract_vector_elt, insert_vector_elt and insert_subvector too. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 56 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 5 + .../rvv/fixed-vectors-extract-subvector.ll | 549 ++++++++++++------ 3 files changed, 416 insertions(+), 194 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 54f37c541043a..d8cf831da2d29 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8628,12 +8628,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, if (OrigIdx == 0) return Op; + auto KnownVLen = Subtarget.getRealKnownVLen(); + // If the subvector vector is a fixed-length type, we cannot use subregister - // manipulation to simplify the codegen; we don't know which register of a - // LMUL group contains the specific subvector as we only know the minimum - // register size. Therefore we must slide the vector group down the full - // amount. - if (SubVecVT.isFixedLengthVector()) { + // manipulation to simplify the codegen if we don't know VLEN; we don't know + // which register of a LMUL group contains the specific subvector as we only + // know the minimum register size. Therefore we must slide the vector group + // down the full amount. + if (SubVecVT.isFixedLengthVector() && !KnownVLen) { MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -8654,19 +8656,46 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, return DAG.getBitcast(Op.getValueType(), Slidedown); } + if (VecVT.isFixedLengthVector()) { + VecVT = getContainerForFixedLengthVector(VecVT); + Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget); + } + + // The semantics of extract_subvector are that if the extracted subvector is + // scalable, then the index is scaled by vscale. So if we have a fixed length + // subvector, we need to factor that in before we decompose it to + // subregisters... + MVT ContainerSubVecVT = SubVecVT; + unsigned EffectiveIdx = OrigIdx; + unsigned Vscale = *KnownVLen / RISCV::RVVBitsPerBlock; + if (SubVecVT.isFixedLengthVector()) { + assert(KnownVLen); + ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT); + EffectiveIdx = OrigIdx / Vscale; + } + unsigned SubRegIdx, RemIdx; std::tie(SubRegIdx, RemIdx) = RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( - VecVT, SubVecVT, OrigIdx, TRI); + VecVT, ContainerSubVecVT, EffectiveIdx, TRI); + + // ... and scale the remainder back afterwards. + if (SubVecVT.isFixedLengthVector()) + RemIdx = (RemIdx * Vscale) + (OrigIdx % Vscale); // If the Idx has been completely eliminated then this is a subvector extract // which naturally aligns to a vector register. These can easily be handled // using subregister manipulation. - if (RemIdx == 0) + if (RemIdx == 0) { + if (SubVecVT.isFixedLengthVector()) { + Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec); + return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget); + } return Op; + } // Else SubVecVT is a fractional LMUL and needs to be slid down. - assert(RISCVVType::decodeVLMUL(getLMUL(SubVecVT)).second); + assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second); // If the vector type is an LMUL-group type, extract a subvector equal to the // nearest full vector register type. @@ -8678,10 +8707,17 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, // Slide this vector register down by the desired number of elements in order // to place the desired subvector starting at element 0. - SDValue SlidedownAmt = - DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); + SDValue SlidedownAmt; + if (SubVecVT.isFixedLengthVector()) + SlidedownAmt = DAG.getConstant(RemIdx, DL, Subtarget.getXLenVT()); + else + SlidedownAmt = + DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx)); auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); + if (SubVecVT.isFixedLengthVector()) + VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); + SDValue Slidedown = getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index cf64dbc21bd8a..86dd9d6861362 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -152,6 +152,11 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { unsigned VLen = getMaxRVVVectorSizeInBits(); return VLen == 0 ? 65536 : VLen; } + std::optional getRealKnownVLen() const { + if (getRealMinVLen() == getRealMaxVLen()) + return getRealMinVLen(); + return std::nullopt; + } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isSoftFPABI() const { return TargetABI == RISCVABI::ABI_LP64 || diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index 92b052fcaab83..a89a0065be555 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -63,15 +63,23 @@ define void @extract_v2i8_v8i8_6(ptr %x, ptr %y) { } define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v1i32_v8i32_4: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v1i32_v8i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v1i32_v8i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 4) store <1 x i32> %c, ptr %y @@ -79,15 +87,25 @@ define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) { } define void @extract_v1i32_v8i32_5(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v1i32_v8i32_5: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v1i32_v8i32_5: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 5 +; UNKNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v1i32_v8i32_5: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 1 +; KNOWNVLEN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <1 x i32> @llvm.vector.extract.v1i32.v8i32(<8 x i32> %a, i64 5) store <1 x i32> %c, ptr %y @@ -109,15 +127,25 @@ define void @extract_v2i32_v8i32_0(ptr %x, ptr %y) { } define void @extract_v2i32_v8i32_2(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_v8i32_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2) store <2 x i32> %c, ptr %y @@ -125,15 +153,23 @@ define void @extract_v2i32_v8i32_2(ptr %x, ptr %y) { } define void @extract_v2i32_v8i32_4(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_v8i32_4: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 4) store <2 x i32> %c, ptr %y @@ -141,15 +177,25 @@ define void @extract_v2i32_v8i32_4(ptr %x, ptr %y) { } define void @extract_v2i32_v8i32_6(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_v8i32_6: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 6 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_v8i32_6: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vle32.v v8, (a0) +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 6 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_v8i32_6: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vle32.v v8, (a0) +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <8 x i32>, ptr %x %c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6) store <2 x i32> %c, ptr %y @@ -169,52 +215,80 @@ define void @extract_v2i32_nxv16i32_0( %x, ptr %y) { define void @extract_v2i32_nxv16i32_2( %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_nxv16i32_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 2) store <2 x i32> %c, ptr %y ret void } define void @extract_v2i32_nxv16i32_4( %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_nxv16i32_4: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_4: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 4 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_4: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v9, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 4) store <2 x i32> %c, ptr %y ret void } define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_nxv16i32_6: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 6 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_6: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 6 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_6: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 6) store <2 x i32> %c, ptr %y ret void } define void @extract_v2i32_nxv16i32_8( %x, ptr %y) { -; CHECK-LABEL: extract_v2i32_nxv16i32_8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i32_nxv16i32_8: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i32_nxv16i32_8: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v10, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %x, i64 8) store <2 x i32> %c, ptr %y ret void @@ -271,13 +345,19 @@ define void @extract_v2i8_nxv2i8_6( %x, ptr %y) { } define void @extract_v8i32_nxv16i32_8( %x, ptr %y) { -; CHECK-LABEL: extract_v8i32_nxv16i32_8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v8i32_nxv16i32_8: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; UNKNOWNVLEN-NEXT: vse32.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v8i32_nxv16i32_8: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; KNOWNVLEN-NEXT: vse32.v v10, (a0) +; KNOWNVLEN-NEXT: ret %c = call <8 x i32> @llvm.vector.extract.v8i32.nxv16i32( %x, i64 8) store <8 x i32> %c, ptr %y ret void @@ -430,27 +510,49 @@ define void @extract_v2i1_v64i1_0(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_v64i1_2: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_v64i1_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: li a2, 64 +; UNKNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vlm.v v0, (a0) +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_v64i1_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: li a2, 64 +; KNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vlm.v v0, (a0) +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2) store <2 x i1> %c, ptr %y @@ -458,28 +560,50 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { } define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_v64i1_42: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: li a0, 42 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a1) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_v64i1_42: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: li a2, 64 +; UNKNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vlm.v v0, (a0) +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: li a0, 42 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vx v8, v8, a0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a1) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_v64i1_42: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: li a2, 64 +; KNOWNVLEN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vlm.v v0, (a0) +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v10, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a1) +; KNOWNVLEN-NEXT: ret %a = load <64 x i1>, ptr %x %c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42) store <2 x i1> %c, ptr %y @@ -550,76 +674,136 @@ define void @extract_v2i1_nxv64i1_0( %x, ptr %y) { } define void @extract_v2i1_nxv64i1_2( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv64i1_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv64i1_2: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv64i1_2: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v8, 2 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1( %x, i64 2) store <2 x i1> %c, ptr %y ret void } define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv64i1_42: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: li a1, 42 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv64i1_42: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: li a1, 42 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vx v8, v8, a1 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv64i1_42: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v10, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1( %x, i64 42) store <2 x i1> %c, ptr %y ret void } define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv32i1_26: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 26 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; UNKNOWNVLEN-LABEL: extract_v2i1_nxv32i1_26: +; UNKNOWNVLEN: # %bb.0: +; UNKNOWNVLEN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; UNKNOWNVLEN-NEXT: vslidedown.vi v8, v8, 26 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; UNKNOWNVLEN-NEXT: vmv.v.i v8, 0 +; UNKNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmv.v.i v9, 0 +; UNKNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; UNKNOWNVLEN-NEXT: vmv.v.v v9, v8 +; UNKNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; UNKNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; UNKNOWNVLEN-NEXT: vsm.v v8, (a0) +; UNKNOWNVLEN-NEXT: ret +; +; KNOWNVLEN-LABEL: extract_v2i1_nxv32i1_26: +; KNOWNVLEN: # %bb.0: +; KNOWNVLEN-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; KNOWNVLEN-NEXT: vslidedown.vi v8, v9, 10 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v0, v8, 0 +; KNOWNVLEN-NEXT: vmv.v.i v8, 0 +; KNOWNVLEN-NEXT: vmerge.vim v8, v8, 1, v0 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmv.v.i v9, 0 +; KNOWNVLEN-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; KNOWNVLEN-NEXT: vmv.v.v v9, v8 +; KNOWNVLEN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; KNOWNVLEN-NEXT: vmsne.vi v8, v9, 0 +; KNOWNVLEN-NEXT: vsm.v v8, (a0) +; KNOWNVLEN-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv32i1( %x, i64 26) store <2 x i1> %c, ptr %y ret void @@ -660,6 +844,3 @@ declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %vec, i64 %i declare <2 x i32> @llvm.vector.extract.v2i32.nxv16i32( %vec, i64 %idx) declare <8 x i32> @llvm.vector.extract.v8i32.nxv16i32( %vec, i64 %idx) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; KNOWNVLEN: {{.*}} -; UNKNOWNVLEN: {{.*}}