diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 0883f597dabc1..c71ff209f7401 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -83,210 +83,233 @@ def v1i1 : VTVec<1, i1, 17>; // 1 x i1 vector value def v2i1 : VTVec<2, i1, 18>; // 2 x i1 vector value def v3i1 : VTVec<3, i1, 19>; // 3 x i1 vector value def v4i1 : VTVec<4, i1, 20>; // 4 x i1 vector value -def v8i1 : VTVec<8, i1, 21>; // 8 x i1 vector value -def v16i1 : VTVec<16, i1, 22>; // 16 x i1 vector value -def v32i1 : VTVec<32, i1, 23>; // 32 x i1 vector value -def v64i1 : VTVec<64, i1, 24>; // 64 x i1 vector value -def v128i1 : VTVec<128, i1, 25>; // 128 x i1 vector value -def v256i1 : VTVec<256, i1, 26>; // 256 x i1 vector value -def v512i1 : VTVec<512, i1, 27>; // 512 x i1 vector value -def v1024i1 : VTVec<1024, i1, 28>; // 1024 x i1 vector value -def v2048i1 : VTVec<2048, i1, 29>; // 2048 x i1 vector value - -def v128i2 : VTVec<128, i2, 30>; // 128 x i2 vector value -def v256i2 : VTVec<256, i2, 31>; // 256 x i2 vector value - -def v64i4 : VTVec<64, i4, 32>; // 64 x i4 vector value -def v128i4 : VTVec<128, i4, 33>; // 128 x i4 vector value - -def v1i8 : VTVec<1, i8, 34>; // 1 x i8 vector value -def v2i8 : VTVec<2, i8, 35>; // 2 x i8 vector value -def v3i8 : VTVec<3, i8, 36>; // 3 x i8 vector value -def v4i8 : VTVec<4, i8, 37>; // 4 x i8 vector value -def v8i8 : VTVec<8, i8, 38>; // 8 x i8 vector value -def v16i8 : VTVec<16, i8, 39>; // 16 x i8 vector value -def v32i8 : VTVec<32, i8, 40>; // 32 x i8 vector value -def v64i8 : VTVec<64, i8, 41>; // 64 x i8 vector value -def v128i8 : VTVec<128, i8, 42>; // 128 x i8 vector value -def v256i8 : VTVec<256, i8, 43>; // 256 x i8 vector value -def v512i8 : VTVec<512, i8, 44>; // 512 x i8 vector value -def v1024i8 : VTVec<1024, i8, 45>; // 1024 x i8 vector value - -def v1i16 : VTVec<1, i16, 46>; // 1 x i16 vector value -def v2i16 : VTVec<2, i16, 47>; // 2 x i16 vector value -def v3i16 : VTVec<3, i16, 48>; // 3 x i16 vector value -def v4i16 : VTVec<4, i16, 49>; // 4 x i16 vector value -def v8i16 : VTVec<8, i16, 50>; // 8 x i16 vector value -def v16i16 : VTVec<16, i16, 51>; // 16 x i16 vector value -def v32i16 : VTVec<32, i16, 52>; // 32 x i16 vector value -def v64i16 : VTVec<64, i16, 53>; // 64 x i16 vector value -def v128i16 : VTVec<128, i16, 54>; // 128 x i16 vector value -def v256i16 : VTVec<256, i16, 55>; // 256 x i16 vector value -def v512i16 : VTVec<512, i16, 56>; // 512 x i16 vector value - -def v1i32 : VTVec<1, i32, 57>; // 1 x i32 vector value -def v2i32 : VTVec<2, i32, 58>; // 2 x i32 vector value -def v3i32 : VTVec<3, i32, 59>; // 3 x i32 vector value -def v4i32 : VTVec<4, i32, 60>; // 4 x i32 vector value -def v5i32 : VTVec<5, i32, 61>; // 5 x i32 vector value -def v6i32 : VTVec<6, i32, 62>; // 6 x f32 vector value -def v7i32 : VTVec<7, i32, 63>; // 7 x f32 vector value -def v8i32 : VTVec<8, i32, 64>; // 8 x i32 vector value -def v9i32 : VTVec<9, i32, 65>; // 9 x i32 vector value -def v10i32 : VTVec<10, i32, 66>; // 10 x i32 vector value -def v11i32 : VTVec<11, i32, 67>; // 11 x i32 vector value -def v12i32 : VTVec<12, i32, 68>; // 12 x i32 vector value -def v16i32 : VTVec<16, i32, 69>; // 16 x i32 vector value -def v32i32 : VTVec<32, i32, 70>; // 32 x i32 vector value -def v64i32 : VTVec<64, i32, 71>; // 64 x i32 vector value -def v128i32 : VTVec<128, i32, 72>; // 128 x i32 vector value -def v256i32 : VTVec<256, i32, 73>; // 256 x i32 vector value -def v512i32 : VTVec<512, i32, 74>; // 512 x i32 vector value -def v1024i32 : VTVec<1024, i32, 75>; // 1024 x i32 vector value -def v2048i32 : VTVec<2048, i32, 76>; // 2048 x i32 vector value - -def v1i64 : VTVec<1, i64, 77>; // 1 x i64 vector value -def v2i64 : VTVec<2, i64, 78>; // 2 x i64 vector value -def v3i64 : VTVec<3, i64, 79>; // 3 x i64 vector value -def v4i64 : VTVec<4, i64, 80>; // 4 x i64 vector value -def v8i64 : VTVec<8, i64, 81>; // 8 x i64 vector value -def v16i64 : VTVec<16, i64, 82>; // 16 x i64 vector value -def v32i64 : VTVec<32, i64, 83>; // 32 x i64 vector value -def v64i64 : VTVec<64, i64, 84>; // 64 x i64 vector value -def v128i64 : VTVec<128, i64, 85>; // 128 x i64 vector value -def v256i64 : VTVec<256, i64, 86>; // 256 x i64 vector value - -def v1i128 : VTVec<1, i128, 87>; // 1 x i128 vector value - -def v1f16 : VTVec<1, f16, 88>; // 1 x f16 vector value -def v2f16 : VTVec<2, f16, 89>; // 2 x f16 vector value -def v3f16 : VTVec<3, f16, 90>; // 3 x f16 vector value -def v4f16 : VTVec<4, f16, 91>; // 4 x f16 vector value -def v8f16 : VTVec<8, f16, 92>; // 8 x f16 vector value -def v16f16 : VTVec<16, f16, 93>; // 16 x f16 vector value -def v32f16 : VTVec<32, f16, 94>; // 32 x f16 vector value -def v64f16 : VTVec<64, f16, 95>; // 64 x f16 vector value -def v128f16 : VTVec<128, f16, 96>; // 128 x f16 vector value -def v256f16 : VTVec<256, f16, 97>; // 256 x f16 vector value -def v512f16 : VTVec<512, f16, 98>; // 512 x f16 vector value - -def v2bf16 : VTVec<2, bf16, 99>; // 2 x bf16 vector value -def v3bf16 : VTVec<3, bf16, 100>; // 3 x bf16 vector value -def v4bf16 : VTVec<4, bf16, 101>; // 4 x bf16 vector value -def v8bf16 : VTVec<8, bf16, 102>; // 8 x bf16 vector value -def v16bf16 : VTVec<16, bf16, 103>; // 16 x bf16 vector value -def v32bf16 : VTVec<32, bf16, 104>; // 32 x bf16 vector value -def v64bf16 : VTVec<64, bf16, 105>; // 64 x bf16 vector value -def v128bf16 : VTVec<128, bf16, 106>; // 128 x bf16 vector value - -def v1f32 : VTVec<1, f32, 107>; // 1 x f32 vector value -def v2f32 : VTVec<2, f32, 108>; // 2 x f32 vector value -def v3f32 : VTVec<3, f32, 109>; // 3 x f32 vector value -def v4f32 : VTVec<4, f32, 110>; // 4 x f32 vector value -def v5f32 : VTVec<5, f32, 111>; // 5 x f32 vector value -def v6f32 : VTVec<6, f32, 112>; // 6 x f32 vector value -def v7f32 : VTVec<7, f32, 113>; // 7 x f32 vector value -def v8f32 : VTVec<8, f32, 114>; // 8 x f32 vector value -def v9f32 : VTVec<9, f32, 115>; // 9 x f32 vector value -def v10f32 : VTVec<10, f32, 116>; // 10 x f32 vector value -def v11f32 : VTVec<11, f32, 117>; // 11 x f32 vector value -def v12f32 : VTVec<12, f32, 118>; // 12 x f32 vector value -def v16f32 : VTVec<16, f32, 119>; // 16 x f32 vector value -def v32f32 : VTVec<32, f32, 120>; // 32 x f32 vector value -def v64f32 : VTVec<64, f32, 121>; // 64 x f32 vector value -def v128f32 : VTVec<128, f32, 122>; // 128 x f32 vector value -def v256f32 : VTVec<256, f32, 123>; // 256 x f32 vector value -def v512f32 : VTVec<512, f32, 124>; // 512 x f32 vector value -def v1024f32 : VTVec<1024, f32, 125>; // 1024 x f32 vector value -def v2048f32 : VTVec<2048, f32, 126>; // 2048 x f32 vector value - -def v1f64 : VTVec<1, f64, 127>; // 1 x f64 vector value -def v2f64 : VTVec<2, f64, 128>; // 2 x f64 vector value -def v3f64 : VTVec<3, f64, 129>; // 3 x f64 vector value -def v4f64 : VTVec<4, f64, 130>; // 4 x f64 vector value -def v8f64 : VTVec<8, f64, 131>; // 8 x f64 vector value -def v16f64 : VTVec<16, f64, 132>; // 16 x f64 vector value -def v32f64 : VTVec<32, f64, 133>; // 32 x f64 vector value -def v64f64 : VTVec<64, f64, 134>; // 64 x f64 vector value -def v128f64 : VTVec<128, f64, 135>; // 128 x f64 vector value -def v256f64 : VTVec<256, f64, 136>; // 256 x f64 vector value - -def nxv1i1 : VTScalableVec<1, i1, 137>; // n x 1 x i1 vector value -def nxv2i1 : VTScalableVec<2, i1, 138>; // n x 2 x i1 vector value -def nxv4i1 : VTScalableVec<4, i1, 139>; // n x 4 x i1 vector value -def nxv8i1 : VTScalableVec<8, i1, 140>; // n x 8 x i1 vector value -def nxv16i1 : VTScalableVec<16, i1, 141>; // n x 16 x i1 vector value -def nxv32i1 : VTScalableVec<32, i1, 142>; // n x 32 x i1 vector value -def nxv64i1 : VTScalableVec<64, i1, 143>; // n x 64 x i1 vector value - -def nxv1i8 : VTScalableVec<1, i8, 144>; // n x 1 x i8 vector value -def nxv2i8 : VTScalableVec<2, i8, 145>; // n x 2 x i8 vector value -def nxv4i8 : VTScalableVec<4, i8, 146>; // n x 4 x i8 vector value -def nxv8i8 : VTScalableVec<8, i8, 147>; // n x 8 x i8 vector value -def nxv16i8 : VTScalableVec<16, i8, 148>; // n x 16 x i8 vector value -def nxv32i8 : VTScalableVec<32, i8, 149>; // n x 32 x i8 vector value -def nxv64i8 : VTScalableVec<64, i8, 150>; // n x 64 x i8 vector value - -def nxv1i16 : VTScalableVec<1, i16, 151>; // n x 1 x i16 vector value -def nxv2i16 : VTScalableVec<2, i16, 152>; // n x 2 x i16 vector value -def nxv4i16 : VTScalableVec<4, i16, 153>; // n x 4 x i16 vector value -def nxv8i16 : VTScalableVec<8, i16, 154>; // n x 8 x i16 vector value -def nxv16i16 : VTScalableVec<16, i16, 155>; // n x 16 x i16 vector value -def nxv32i16 : VTScalableVec<32, i16, 156>; // n x 32 x i16 vector value - -def nxv1i32 : VTScalableVec<1, i32, 157>; // n x 1 x i32 vector value -def nxv2i32 : VTScalableVec<2, i32, 158>; // n x 2 x i32 vector value -def nxv4i32 : VTScalableVec<4, i32, 159>; // n x 4 x i32 vector value -def nxv8i32 : VTScalableVec<8, i32, 160>; // n x 8 x i32 vector value -def nxv16i32 : VTScalableVec<16, i32, 161>; // n x 16 x i32 vector value -def nxv32i32 : VTScalableVec<32, i32, 162>; // n x 32 x i32 vector value - -def nxv1i64 : VTScalableVec<1, i64, 163>; // n x 1 x i64 vector value -def nxv2i64 : VTScalableVec<2, i64, 164>; // n x 2 x i64 vector value -def nxv4i64 : VTScalableVec<4, i64, 165>; // n x 4 x i64 vector value -def nxv8i64 : VTScalableVec<8, i64, 166>; // n x 8 x i64 vector value -def nxv16i64 : VTScalableVec<16, i64, 167>; // n x 16 x i64 vector value -def nxv32i64 : VTScalableVec<32, i64, 168>; // n x 32 x i64 vector value - -def nxv1f16 : VTScalableVec<1, f16, 169>; // n x 1 x f16 vector value -def nxv2f16 : VTScalableVec<2, f16, 170>; // n x 2 x f16 vector value -def nxv4f16 : VTScalableVec<4, f16, 171>; // n x 4 x f16 vector value -def nxv8f16 : VTScalableVec<8, f16, 172>; // n x 8 x f16 vector value -def nxv16f16 : VTScalableVec<16, f16, 173>; // n x 16 x f16 vector value -def nxv32f16 : VTScalableVec<32, f16, 174>; // n x 32 x f16 vector value - -def nxv1bf16 : VTScalableVec<1, bf16, 175>; // n x 1 x bf16 vector value -def nxv2bf16 : VTScalableVec<2, bf16, 176>; // n x 2 x bf16 vector value -def nxv4bf16 : VTScalableVec<4, bf16, 177>; // n x 4 x bf16 vector value -def nxv8bf16 : VTScalableVec<8, bf16, 178>; // n x 8 x bf16 vector value -def nxv16bf16 : VTScalableVec<16, bf16, 179>; // n x 16 x bf16 vector value -def nxv32bf16 : VTScalableVec<32, bf16, 180>; // n x 32 x bf16 vector value - -def nxv1f32 : VTScalableVec<1, f32, 181>; // n x 1 x f32 vector value -def nxv2f32 : VTScalableVec<2, f32, 182>; // n x 2 x f32 vector value -def nxv4f32 : VTScalableVec<4, f32, 183>; // n x 4 x f32 vector value -def nxv8f32 : VTScalableVec<8, f32, 184>; // n x 8 x f32 vector value -def nxv16f32 : VTScalableVec<16, f32, 185>; // n x 16 x f32 vector value - -def nxv1f64 : VTScalableVec<1, f64, 186>; // n x 1 x f64 vector value -def nxv2f64 : VTScalableVec<2, f64, 187>; // n x 2 x f64 vector value -def nxv4f64 : VTScalableVec<4, f64, 188>; // n x 4 x f64 vector value -def nxv8f64 : VTScalableVec<8, f64, 189>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 190>; // X86 MMX value -def Glue : ValueType<0, 191>; // Pre-RA sched glue -def isVoid : ValueType<0, 192>; // Produces no value -def untyped : ValueType<8, 193> { // Produces an untyped value +def v5i1 : VTVec<5, i1, 21>; // 5 x i1 vector value +def v7i1 : VTVec<7, i1, 22>; // 7 x i1 vector value +def v8i1 : VTVec<8, i1, 23>; // 8 x i1 vector value +def v15i1 : VTVec<15, i1, 24>; // 15 x i1 vector value +def v16i1 : VTVec<16, i1, 25>; // 16 x i1 vector value +def v32i1 : VTVec<32, i1, 26>; // 32 x i1 vector value +def v64i1 : VTVec<64, i1, 27>; // 64 x i1 vector value +def v128i1 : VTVec<128, i1, 28>; // 128 x i1 vector value +def v256i1 : VTVec<256, i1, 29>; // 256 x i1 vector value +def v512i1 : VTVec<512, i1, 30>; // 512 x i1 vector value +def v1024i1 : VTVec<1024, i1, 31>; // 1024 x i1 vector value +def v2048i1 : VTVec<2048, i1, 32>; // 2048 x i1 vector value + +def v128i2 : VTVec<128, i2, 33>; // 128 x i2 vector value +def v256i2 : VTVec<256, i2, 34>; // 256 x i2 vector value + +def v64i4 : VTVec<64, i4, 35>; // 64 x i4 vector value +def v128i4 : VTVec<128, i4, 36>; // 128 x i4 vector value + +def v1i8 : VTVec<1, i8, 37>; // 1 x i8 vector value +def v2i8 : VTVec<2, i8, 38>; // 2 x i8 vector value +def v3i8 : VTVec<3, i8, 39>; // 3 x i8 vector value +def v4i8 : VTVec<4, i8, 40>; // 4 x i8 vector value +def v5i8 : VTVec<5, i8, 41>; // 5 x i8 vector value +def v7i8 : VTVec<7, i8, 42>; // 7 x i8 vector value +def v8i8 : VTVec<8, i8, 43>; // 8 x i8 vector value +def v15i8 : VTVec<15, i8, 44>; // 15 x i8 vector value +def v16i8 : VTVec<16, i8, 45>; // 16 x i8 vector value +def v32i8 : VTVec<32, i8, 46>; // 32 x i8 vector value +def v64i8 : VTVec<64, i8, 47>; // 64 x i8 vector value +def v128i8 : VTVec<128, i8, 48>; // 128 x i8 vector value +def v256i8 : VTVec<256, i8, 49>; // 256 x i8 vector value +def v512i8 : VTVec<512, i8, 50>; // 512 x i8 vector value +def v1024i8 : VTVec<1024, i8, 51>; // 1024 x i8 vector value + +def v1i16 : VTVec<1, i16, 52>; // 1 x i16 vector value +def v2i16 : VTVec<2, i16, 53>; // 2 x i16 vector value +def v3i16 : VTVec<3, i16, 54>; // 3 x i16 vector value +def v4i16 : VTVec<4, i16, 55>; // 4 x i16 vector value +def v5i16 : VTVec<5, i16, 56>; // 5 x i16 vector value +def v7i16 : VTVec<7, i16, 57>; // 7 x i16 vector value +def v8i16 : VTVec<8, i16, 58>; // 8 x i16 vector value +def v15i16 : VTVec<15, i16, 59>; // 15 x i16 vector value +def v16i16 : VTVec<16, i16, 60>; // 16 x i16 vector value +def v32i16 : VTVec<32, i16, 61>; // 32 x i16 vector value +def v64i16 : VTVec<64, i16, 62>; // 64 x i16 vector value +def v128i16 : VTVec<128, i16, 63>; // 128 x i16 vector value +def v256i16 : VTVec<256, i16, 64>; // 256 x i16 vector value +def v512i16 : VTVec<512, i16, 65>; // 512 x i16 vector value + +def v1i32 : VTVec<1, i32, 66>; // 1 x i32 vector value +def v2i32 : VTVec<2, i32, 67>; // 2 x i32 vector value +def v3i32 : VTVec<3, i32, 68>; // 3 x i32 vector value +def v4i32 : VTVec<4, i32, 69>; // 4 x i32 vector value +def v5i32 : VTVec<5, i32, 70>; // 5 x i32 vector value +def v6i32 : VTVec<6, i32, 71>; // 6 x i32 vector value +def v7i32 : VTVec<7, i32, 72>; // 7 x i32 vector value +def v8i32 : VTVec<8, i32, 73>; // 8 x i32 vector value +def v9i32 : VTVec<9, i32, 74>; // 9 x i32 vector value +def v10i32 : VTVec<10, i32, 75>; // 10 x i32 vector value +def v11i32 : VTVec<11, i32, 76>; // 11 x i32 vector value +def v12i32 : VTVec<12, i32, 77>; // 12 x i32 vector value +def v15i32 : VTVec<15, i32, 78>; // 15 x i32 vector value +def v16i32 : VTVec<16, i32, 79>; // 16 x i32 vector value +def v32i32 : VTVec<32, i32, 80>; // 32 x i32 vector value +def v64i32 : VTVec<64, i32, 81>; // 64 x i32 vector value +def v128i32 : VTVec<128, i32, 82>; // 128 x i32 vector value +def v256i32 : VTVec<256, i32, 83>; // 256 x i32 vector value +def v512i32 : VTVec<512, i32, 84>; // 512 x i32 vector value +def v1024i32 : VTVec<1024, i32, 85>; // 1024 x i32 vector value +def v2048i32 : VTVec<2048, i32, 86>; // 2048 x i32 vector value + +def v1i64 : VTVec<1, i64, 87>; // 1 x i64 vector value +def v2i64 : VTVec<2, i64, 88>; // 2 x i64 vector value +def v3i64 : VTVec<3, i64, 89>; // 3 x i64 vector value +def v4i64 : VTVec<4, i64, 90>; // 4 x i64 vector value +def v5i64 : VTVec<5, i64, 91>; // 5 x i64 vector value +def v7i64 : VTVec<7, i64, 92>; // 7 x i64 vector value +def v8i64 : VTVec<8, i64, 93>; // 8 x i64 vector value +def v15i64 : VTVec<15, i64, 94>; // 15 x i64 vector value +def v16i64 : VTVec<16, i64, 95>; // 16 x i64 vector value +def v32i64 : VTVec<32, i64, 96>; // 32 x i64 vector value +def v64i64 : VTVec<64, i64, 97>; // 64 x i64 vector value +def v128i64 : VTVec<128, i64, 98>; // 128 x i64 vector value +def v256i64 : VTVec<256, i64, 99>; // 256 x i64 vector value + +def v1i128 : VTVec<1, i128, 100>; // 1 x i128 vector value + +def v1f16 : VTVec<1, f16, 101>; // 1 x f16 vector value +def v2f16 : VTVec<2, f16, 102>; // 2 x f16 vector value +def v3f16 : VTVec<3, f16, 103>; // 3 x f16 vector value +def v4f16 : VTVec<4, f16, 104>; // 4 x f16 vector value +def v5f16 : VTVec<5, f16, 105>; // 5 x f16 vector value +def v7f16 : VTVec<7, f16, 106>; // 7 x f16 vector value +def v8f16 : VTVec<8, f16, 107>; // 8 x f16 vector value +def v15f16 : VTVec<15, f16, 108>; // 15 x f16 vector value +def v16f16 : VTVec<16, f16, 109>; // 16 x f16 vector value +def v32f16 : VTVec<32, f16, 110>; // 32 x f16 vector value +def v64f16 : VTVec<64, f16, 111>; // 64 x f16 vector value +def v128f16 : VTVec<128, f16, 112>; // 128 x f16 vector value +def v256f16 : VTVec<256, f16, 113>; // 256 x f16 vector value +def v512f16 : VTVec<512, f16, 114>; // 512 x f16 vector value + +def v2bf16 : VTVec<2, bf16, 115>; // 2 x bf16 vector value +def v3bf16 : VTVec<3, bf16, 116>; // 3 x bf16 vector value +def v4bf16 : VTVec<4, bf16, 117>; // 4 x bf16 vector value +def v8bf16 : VTVec<8, bf16, 118>; // 8 x bf16 vector value +def v15bf16 : VTVec<15, bf16, 119>; // 15 x bf16 vector value +def v16bf16 : VTVec<16, bf16, 120>; // 16 x bf16 vector value +def v32bf16 : VTVec<32, bf16, 121>; // 32 x bf16 vector value +def v64bf16 : VTVec<64, bf16, 122>; // 64 x bf16 vector value +def v128bf16 : VTVec<128, bf16, 123>; // 128 x bf16 vector value + +def v1f32 : VTVec<1, f32, 124>; // 1 x f32 vector value +def v2f32 : VTVec<2, f32, 125>; // 2 x f32 vector value +def v3f32 : VTVec<3, f32, 126>; // 3 x f32 vector value +def v4f32 : VTVec<4, f32, 127>; // 4 x f32 vector value +def v5f32 : VTVec<5, f32, 128>; // 5 x f32 vector value +def v6f32 : VTVec<6, f32, 129>; // 6 x f32 vector value +def v7f32 : VTVec<7, f32, 130>; // 7 x f32 vector value +def v8f32 : VTVec<8, f32, 131>; // 8 x f32 vector value +def v9f32 : VTVec<9, f32, 132>; // 9 x f32 vector value +def v10f32 : VTVec<10, f32, 133>; // 10 x f32 vector value +def v11f32 : VTVec<11, f32, 134>; // 11 x f32 vector value +def v12f32 : VTVec<12, f32, 135>; // 12 x f32 vector value +def v15f32 : VTVec<15, f32, 136>; // 15 x f32 vector value +def v16f32 : VTVec<16, f32, 137>; // 16 x f32 vector value +def v32f32 : VTVec<32, f32, 138>; // 32 x f32 vector value +def v64f32 : VTVec<64, f32, 139>; // 64 x f32 vector value +def v128f32 : VTVec<128, f32, 140>; // 128 x f32 vector value +def v256f32 : VTVec<256, f32, 141>; // 256 x f32 vector value +def v512f32 : VTVec<512, f32, 142>; // 512 x f32 vector value +def v1024f32 : VTVec<1024, f32, 143>; // 1024 x f32 vector value +def v2048f32 : VTVec<2048, f32, 144>; // 2048 x f32 vector value + +def v1f64 : VTVec<1, f64, 145>; // 1 x f64 vector value +def v2f64 : VTVec<2, f64, 146>; // 2 x f64 vector value +def v3f64 : VTVec<3, f64, 147>; // 3 x f64 vector value +def v4f64 : VTVec<4, f64, 148>; // 4 x f64 vector value +def v5f64 : VTVec<5, f64, 149>; // 5 x f64 vector value +def v7f64 : VTVec<7, f64, 150>; // 7 x f64 vector value +def v8f64 : VTVec<8, f64, 151>; // 8 x f64 vector value +def v15f64 : VTVec<15, f64, 152>; // 15 x f64 vector value +def v16f64 : VTVec<16, f64, 153>; // 16 x f64 vector value +def v32f64 : VTVec<32, f64, 154>; // 32 x f64 vector value +def v64f64 : VTVec<64, f64, 155>; // 64 x f64 vector value +def v128f64 : VTVec<128, f64, 156>; // 128 x f64 vector value +def v256f64 : VTVec<256, f64, 157>; // 256 x f64 vector value + +def nxv1i1 : VTScalableVec<1, i1, 158>; // n x 1 x i1 vector value +def nxv2i1 : VTScalableVec<2, i1, 159>; // n x 2 x i1 vector value +def nxv4i1 : VTScalableVec<4, i1, 160>; // n x 4 x i1 vector value +def nxv8i1 : VTScalableVec<8, i1, 161>; // n x 8 x i1 vector value +def nxv16i1 : VTScalableVec<16, i1, 162>; // n x 16 x i1 vector value +def nxv32i1 : VTScalableVec<32, i1, 163>; // n x 32 x i1 vector value +def nxv64i1 : VTScalableVec<64, i1, 164>; // n x 64 x i1 vector value + +def nxv1i8 : VTScalableVec<1, i8, 165>; // n x 1 x i8 vector value +def nxv2i8 : VTScalableVec<2, i8, 166>; // n x 2 x i8 vector value +def nxv4i8 : VTScalableVec<4, i8, 167>; // n x 4 x i8 vector value +def nxv8i8 : VTScalableVec<8, i8, 168>; // n x 8 x i8 vector value +def nxv16i8 : VTScalableVec<16, i8, 169>; // n x 16 x i8 vector value +def nxv32i8 : VTScalableVec<32, i8, 170>; // n x 32 x i8 vector value +def nxv64i8 : VTScalableVec<64, i8, 171>; // n x 64 x i8 vector value + +def nxv1i16 : VTScalableVec<1, i16, 172>; // n x 1 x i16 vector value +def nxv2i16 : VTScalableVec<2, i16, 173>; // n x 2 x i16 vector value +def nxv4i16 : VTScalableVec<4, i16, 174>; // n x 4 x i16 vector value +def nxv8i16 : VTScalableVec<8, i16, 175>; // n x 8 x i16 vector value +def nxv16i16 : VTScalableVec<16, i16, 176>; // n x 16 x i16 vector value +def nxv32i16 : VTScalableVec<32, i16, 177>; // n x 32 x i16 vector value + +def nxv1i32 : VTScalableVec<1, i32, 178>; // n x 1 x i32 vector value +def nxv2i32 : VTScalableVec<2, i32, 179>; // n x 2 x i32 vector value +def nxv4i32 : VTScalableVec<4, i32, 180>; // n x 4 x i32 vector value +def nxv8i32 : VTScalableVec<8, i32, 181>; // n x 8 x i32 vector value +def nxv16i32 : VTScalableVec<16, i32, 182>; // n x 16 x i32 vector value +def nxv32i32 : VTScalableVec<32, i32, 183>; // n x 32 x i32 vector value + +def nxv1i64 : VTScalableVec<1, i64, 184>; // n x 1 x i64 vector value +def nxv2i64 : VTScalableVec<2, i64, 185>; // n x 2 x i64 vector value +def nxv4i64 : VTScalableVec<4, i64, 186>; // n x 4 x i64 vector value +def nxv8i64 : VTScalableVec<8, i64, 187>; // n x 8 x i64 vector value +def nxv16i64 : VTScalableVec<16, i64, 188>; // n x 16 x i64 vector value +def nxv32i64 : VTScalableVec<32, i64, 189>; // n x 32 x i64 vector value + +def nxv1f16 : VTScalableVec<1, f16, 190>; // n x 1 x f16 vector value +def nxv2f16 : VTScalableVec<2, f16, 191>; // n x 2 x f16 vector value +def nxv4f16 : VTScalableVec<4, f16, 192>; // n x 4 x f16 vector value +def nxv8f16 : VTScalableVec<8, f16, 193>; // n x 8 x f16 vector value +def nxv16f16 : VTScalableVec<16, f16, 194>; // n x 16 x f16 vector value +def nxv32f16 : VTScalableVec<32, f16, 195>; // n x 32 x f16 vector value + +def nxv1bf16 : VTScalableVec<1, bf16, 196>; // n x 1 x bf16 vector value +def nxv2bf16 : VTScalableVec<2, bf16, 197>; // n x 2 x bf16 vector value +def nxv4bf16 : VTScalableVec<4, bf16, 198>; // n x 4 x bf16 vector value +def nxv8bf16 : VTScalableVec<8, bf16, 199>; // n x 8 x bf16 vector value +def nxv16bf16 : VTScalableVec<16, bf16, 200>; // n x 16 x bf16 vector value +def nxv32bf16 : VTScalableVec<32, bf16, 201>; // n x 32 x bf16 vector value + +def nxv1f32 : VTScalableVec<1, f32, 202>; // n x 1 x f32 vector value +def nxv2f32 : VTScalableVec<2, f32, 203>; // n x 2 x f32 vector value +def nxv4f32 : VTScalableVec<4, f32, 204>; // n x 4 x f32 vector value +def nxv8f32 : VTScalableVec<8, f32, 205>; // n x 8 x f32 vector value +def nxv16f32 : VTScalableVec<16, f32, 206>; // n x 16 x f32 vector value + +def nxv1f64 : VTScalableVec<1, f64, 207>; // n x 1 x f64 vector value +def nxv2f64 : VTScalableVec<2, f64, 208>; // n x 2 x f64 vector value +def nxv4f64 : VTScalableVec<4, f64, 209>; // n x 4 x f64 vector value +def nxv8f64 : VTScalableVec<8, f64, 210>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 211>; // X86 MMX value +def FlagVT : ValueType<0, 212> { // Pre-RA sched glue + let LLVMName = "Glue"; +} +def isVoid : ValueType<0, 213>; // Produces no value +def untyped : ValueType<8, 214> { // Produces an untyped value let LLVMName = "Untyped"; } -def funcref : ValueType<0, 194>; // WebAssembly's funcref type -def externref : ValueType<0, 195>; // WebAssembly's externref type -def exnref : ValueType<0, 196>; // WebAssembly's exnref type -def x86amx : ValueType<8192, 197>; // X86 AMX value -def i64x8 : ValueType<512, 198>; // 8 Consecutive GPRs (AArch64) +def funcref : ValueType<0, 215>; // WebAssembly's funcref type +def externref : ValueType<0, 216>; // WebAssembly's externref type +def exnref : ValueType<0, 217>; // WebAssembly's exnref type +def x86amx : ValueType<8192, 218>; // X86 AMX value +def i64x8 : ValueType<512, 219>; // 8 Consecutive GPRs (AArch64) def aarch64svcount - : ValueType<16, 199>; // AArch64 predicate-as-counter -def spirvbuiltin : ValueType<0, 200>; // SPIR-V's builtin type + : ValueType<16, 220>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 221>; // SPIR-V's builtin type let isNormalValueType = false in { def token : ValueType<0, 504>; // TokenTy diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e57c8f8b7b483..2d63ffee38567 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -352,6 +352,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); setTruncStoreAction(MVT::v3i32, MVT::v3i8, Expand); + setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand); setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand); setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand); @@ -1194,7 +1195,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( if (NumRegs == 1) { // This argument is not split, so the IR type is the memory type. - if (ArgVT.isExtended()) { + if (ArgVT.isExtended() || (ArgVT.isVector() && !ArgVT.isPow2VectorType())) { // We have an extended type, like i24, so we should just use the // register type. MemVT = RegisterVT; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 48fe788a81dff..5fbdb022b92e2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2259,8 +2259,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { [[maybe_unused]] bool ExactlyVecRegSized = Subtarget->expandVScale(SubVecVT.getSizeInBits()) .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize)); - assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits()) - .getKnownMinValue())); assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef())); } MVT ContainerVT = VT; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 670dee2edb1df..7277bac973100 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2584,9 +2584,14 @@ static bool useRVVForFixedLengthVectorVT(MVT VT, if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) return false; - // TODO: Perhaps an artificial restriction, but worth having whilst getting - // the base fixed length RVV support in place. - if (!VT.isPow2VectorType()) + // Only support non-power-of-2 fixed length vector types with lengths 3, 5, 7, + // or 15. + // In theory, we could support any length, but we want to prevent the + // number of MVTs from growing too quickly. Therefore, we only add these + // specific types. + unsigned NumElems = VT.getVectorNumElements(); + if (!VT.isPow2VectorType() && NumElems != 3 && NumElems != 5 && + NumElems != 7 && NumElems != 15) return false; return true; @@ -2623,10 +2628,14 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within // each fractional LMUL we support SEW between 8 and LMUL*ELEN. + unsigned NumVLSElts = VT.getVectorNumElements(); + if (!isPowerOf2_32(NumVLSElts)) + NumVLSElts = llvm::NextPowerOf2 (NumVLSElts); + unsigned NumElts = - (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; + (NumVLSElts * RISCV::RVVBitsPerBlock) / MinVLen; NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); - assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); + return MVT::getScalableVectorVT(EltVT, NumElts); } } @@ -3573,6 +3582,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, // XLenVT if we're producing a v8i1. This results in more consistent // codegen across RV32 and RV64. unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen()); + if (!isPowerOf2_32(NumViaIntegerBits)) + NumViaIntegerBits = llvm::NextPowerOf2 (NumViaIntegerBits); NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen()); // If we have to use more than one INSERT_VECTOR_ELT then this // optimization is likely to increase code size; avoid peforming it in @@ -3616,10 +3627,16 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, // If we're producing a smaller vector than our minimum legal integer // type, bitcast to the equivalent (known-legal) mask type, and extract // our final mask. - assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); - Vec = DAG.getBitcast(MVT::v8i1, Vec); - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, - DAG.getConstant(0, DL, XLenVT)); + if (IntegerViaVecVT == MVT::v1i8){ + assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); + Vec = DAG.getBitcast(MVT::v8i1, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, + DAG.getConstant(0, DL, XLenVT)); + } else if (IntegerViaVecVT == MVT::v1i16) { + Vec = DAG.getBitcast(MVT::v16i1, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, + DAG.getConstant(0, DL, XLenVT)); + } } else { // Else we must have produced an integer type with the same size as the // mask type; bitcast for the final result. @@ -4873,6 +4890,10 @@ static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, EVT VT = SVN->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); + // We don't handle non-power-of-2 here. + if (!isPowerOf2_32(NumElts)) + return false; + unsigned EltSizeInBits = VT.getScalarSizeInBits(); unsigned NumSubElts; if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 90bedf87e04d3..0d2ff8790d301 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1649,28 +1649,16 @@ declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32) define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1, v0.t @@ -1683,21 +1671,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 3 ; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vx v24, v8, a4, v0.t ; RV32-NEXT: vsll.vi v24, v24, 24, v0.t -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsll.vi v16, v24, 8, v0.t @@ -1706,19 +1694,19 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: vand.vx v24, v24, a2, v0.t ; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: vand.vx v24, v24, a4, v0.t @@ -1726,42 +1714,45 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v16, v16, 4, v0.t -; RV32-NEXT: vor.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vi v16, v16, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vand.vv v8, v8, v24, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t @@ -1770,7 +1761,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v15i64: @@ -1856,27 +1847,15 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: sw zero, 20(sp) +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a1 @@ -1886,66 +1865,69 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vx v0, v8, a4 ; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vx v0, v8, a3 ; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v0, v0, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v16, v0, v16 ; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 2 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v15i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 6f2e86097d6ff..c3ead344dc1c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -780,7 +780,7 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 3 @@ -917,7 +917,7 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index f5e6b92905193..faa155a89967e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1503,28 +1503,6 @@ declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1539,60 +1517,40 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64: @@ -1649,24 +1607,6 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -1682,32 +1622,39 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64_unmasked: @@ -4139,28 +4086,6 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4175,60 +4100,40 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vnot.v v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64: @@ -4285,24 +4190,6 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -4318,32 +4205,39 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vnot.v v8, v8 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index e90e52fba642b..06202830cf6d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1119,93 +1119,40 @@ declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32) define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v16, v24, v0.t -; RV32-NEXT: vsub.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64: @@ -1248,50 +1195,40 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index dfad7881066a2..1efb49bb751e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1263,86 +1263,44 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64: @@ -1389,55 +1347,44 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64_unmasked: @@ -3499,86 +3446,44 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 36(sp) -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v24, v8, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: @@ -3625,55 +3530,44 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v16, v0, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index d309da6df7dc7..7c13116d81100 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -220,11 +220,12 @@ define i64 @extractelt_v3i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: vslidedown.vi v8, v8, 5 -; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: vmv.x.s a1, v10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v3i64: @@ -567,14 +568,12 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: add a1, a1, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vx v10, v8, a1 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; @@ -582,7 +581,6 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vslidedown.vx v8, v8, a1 ; RV64-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index a6e224d475a31..d1e03fb3ca5cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1181,79 +1181,33 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFH-NEXT: vle32.v v8, (a1) ; ZVFH-NEXT: vle16.v v9, (a0) -; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfncvt.f.f.w v10, v8 -; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-RV32: # %bb.0: -; ZVFHMIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9 -; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-RV32-NEXT: addi a1, sp, 8 -; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: fsh fa5, 4(a0) -; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vse32.v v9, (a0) -; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV32-NEXT: ret -; -; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-RV64: # %bb.0: -; ZVFHMIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0) -; ZVFHMIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-RV64-NEXT: vse64.v v8, (a2) -; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2) -; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9 -; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v10, v8 -; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8 -; ZVFHMIN-RV64-NEXT: addi a1, sp, 8 -; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: fsh fa5, 4(a0) -; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vse32.v v9, (a0) -; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfneg.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vse16.v v9, (a0) +; ZVFHMIN-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index bfcc7017178e3..dcbf0d3d64f10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -109,7 +109,7 @@ define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2si_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -122,43 +122,47 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) { define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-LABEL: fp2si_v3f32_v3i15: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH32-NEXT: vmv.x.s a1, v8 -; ZVFH32-NEXT: slli a2, a1, 17 -; ZVFH32-NEXT: srli a2, a2, 19 -; ZVFH32-NEXT: sh a2, 4(a0) -; ZVFH32-NEXT: vmv.x.s a2, v9 -; ZVFH32-NEXT: lui a3, 8 -; ZVFH32-NEXT: addi a3, a3, -1 -; ZVFH32-NEXT: and a2, a2, a3 -; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 -; ZVFH32-NEXT: vmv.x.s a4, v8 -; ZVFH32-NEXT: and a3, a4, a3 -; ZVFH32-NEXT: slli a3, a3, 15 -; ZVFH32-NEXT: slli a1, a1, 30 +; ZVFH32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFH32-NEXT: vslidedown.vi v9, v8, 1 +; ZVFH32-NEXT: vfmv.f.s fa5, v9 +; ZVFH32-NEXT: fcvt.w.s a1, fa5, rtz +; ZVFH32-NEXT: vfmv.f.s fa5, v8 +; ZVFH32-NEXT: fcvt.w.s a2, fa5, rtz +; ZVFH32-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH32-NEXT: vfmv.f.s fa5, v8 +; ZVFH32-NEXT: fcvt.w.s a3, fa5, rtz +; ZVFH32-NEXT: slli a4, a3, 17 +; ZVFH32-NEXT: srli a4, a4, 19 +; ZVFH32-NEXT: sh a4, 4(a0) +; ZVFH32-NEXT: lui a4, 8 +; ZVFH32-NEXT: addi a4, a4, -1 +; ZVFH32-NEXT: and a2, a2, a4 +; ZVFH32-NEXT: and a1, a1, a4 +; ZVFH32-NEXT: slli a1, a1, 15 +; ZVFH32-NEXT: slli a3, a3, 30 +; ZVFH32-NEXT: or a2, a2, a3 ; ZVFH32-NEXT: or a1, a2, a1 -; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2si_v3f32_v3i15: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH64-NEXT: vmv.x.s a1, v9 -; ZVFH64-NEXT: lui a2, 8 -; ZVFH64-NEXT: addiw a2, a2, -1 -; ZVFH64-NEXT: and a1, a1, a2 -; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: and a2, a3, a2 +; ZVFH64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFH64-NEXT: vslidedown.vi v9, v8, 2 +; ZVFH64-NEXT: vfmv.f.s fa5, v9 +; ZVFH64-NEXT: fcvt.l.s a1, fa5, rtz +; ZVFH64-NEXT: vslidedown.vi v9, v8, 1 +; ZVFH64-NEXT: vfmv.f.s fa5, v9 +; ZVFH64-NEXT: fcvt.l.s a2, fa5, rtz +; ZVFH64-NEXT: vfmv.f.s fa5, v8 +; ZVFH64-NEXT: fcvt.l.s a3, fa5, rtz +; ZVFH64-NEXT: lui a4, 8 +; ZVFH64-NEXT: addiw a4, a4, -1 +; ZVFH64-NEXT: and a3, a3, a4 +; ZVFH64-NEXT: and a2, a2, a4 ; ZVFH64-NEXT: slli a2, a2, 15 -; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: slli a3, a3, 30 -; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: slli a1, a1, 30 +; ZVFH64-NEXT: or a1, a3, a1 ; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: sw a1, 0(a0) ; ZVFH64-NEXT: slli a1, a1, 19 @@ -168,43 +172,47 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: slli a2, a1, 17 -; ZVFHMIN32-NEXT: srli a2, a2, 19 -; ZVFHMIN32-NEXT: sh a2, 4(a0) -; ZVFHMIN32-NEXT: vmv.x.s a2, v9 -; ZVFHMIN32-NEXT: lui a3, 8 -; ZVFHMIN32-NEXT: addi a3, a3, -1 -; ZVFHMIN32-NEXT: and a2, a2, a3 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 -; ZVFHMIN32-NEXT: vmv.x.s a4, v8 -; ZVFHMIN32-NEXT: and a3, a4, a3 -; ZVFHMIN32-NEXT: slli a3, a3, 15 -; ZVFHMIN32-NEXT: slli a1, a1, 30 +; ZVFHMIN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v9, v8, 1 +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v9 +; ZVFHMIN32-NEXT: fcvt.w.s a1, fa5, rtz +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN32-NEXT: fcvt.w.s a2, fa5, rtz +; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN32-NEXT: fcvt.w.s a3, fa5, rtz +; ZVFHMIN32-NEXT: slli a4, a3, 17 +; ZVFHMIN32-NEXT: srli a4, a4, 19 +; ZVFHMIN32-NEXT: sh a4, 4(a0) +; ZVFHMIN32-NEXT: lui a4, 8 +; ZVFHMIN32-NEXT: addi a4, a4, -1 +; ZVFHMIN32-NEXT: and a2, a2, a4 +; ZVFHMIN32-NEXT: and a1, a1, a4 +; ZVFHMIN32-NEXT: slli a1, a1, 15 +; ZVFHMIN32-NEXT: slli a3, a3, 30 +; ZVFHMIN32-NEXT: or a2, a2, a3 ; ZVFHMIN32-NEXT: or a1, a2, a1 -; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN64-NEXT: vmv.x.s a1, v9 -; ZVFHMIN64-NEXT: lui a2, 8 -; ZVFHMIN64-NEXT: addiw a2, a2, -1 -; ZVFHMIN64-NEXT: and a1, a1, a2 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: and a2, a3, a2 +; ZVFHMIN64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v9, v8, 2 +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v9 +; ZVFHMIN64-NEXT: fcvt.l.s a1, fa5, rtz +; ZVFHMIN64-NEXT: vslidedown.vi v9, v8, 1 +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v9 +; ZVFHMIN64-NEXT: fcvt.l.s a2, fa5, rtz +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN64-NEXT: fcvt.l.s a3, fa5, rtz +; ZVFHMIN64-NEXT: lui a4, 8 +; ZVFHMIN64-NEXT: addiw a4, a4, -1 +; ZVFHMIN64-NEXT: and a3, a3, a4 +; ZVFHMIN64-NEXT: and a2, a2, a4 ; ZVFHMIN64-NEXT: slli a2, a2, 15 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: slli a3, a3, 30 -; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: slli a1, a1, 30 +; ZVFHMIN64-NEXT: or a1, a3, a1 ; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: sw a1, 0(a0) ; ZVFHMIN64-NEXT: slli a1, a1, 19 @@ -219,94 +227,82 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH32-LABEL: fp2ui_v3f32_v3i15: ; ZVFH32: # %bb.0: -; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH32-NEXT: vmv.x.s a1, v8 -; ZVFH32-NEXT: slli a2, a1, 17 -; ZVFH32-NEXT: srli a2, a2, 19 -; ZVFH32-NEXT: sh a2, 4(a0) -; ZVFH32-NEXT: vmv.x.s a2, v9 -; ZVFH32-NEXT: lui a3, 16 -; ZVFH32-NEXT: addi a3, a3, -1 -; ZVFH32-NEXT: and a2, a2, a3 -; ZVFH32-NEXT: vslidedown.vi v8, v9, 1 -; ZVFH32-NEXT: vmv.x.s a4, v8 -; ZVFH32-NEXT: and a3, a4, a3 -; ZVFH32-NEXT: slli a3, a3, 15 -; ZVFH32-NEXT: slli a1, a1, 30 -; ZVFH32-NEXT: or a1, a2, a1 +; ZVFH32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFH32-NEXT: vfmv.f.s fa5, v8 +; ZVFH32-NEXT: fcvt.wu.s a1, fa5, rtz +; ZVFH32-NEXT: vslidedown.vi v9, v8, 1 +; ZVFH32-NEXT: vfmv.f.s fa5, v9 +; ZVFH32-NEXT: fcvt.wu.s a2, fa5, rtz +; ZVFH32-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH32-NEXT: vfmv.f.s fa5, v8 +; ZVFH32-NEXT: fcvt.wu.s a3, fa5, rtz +; ZVFH32-NEXT: srli a4, a3, 2 +; ZVFH32-NEXT: sh a4, 4(a0) +; ZVFH32-NEXT: slli a2, a2, 15 +; ZVFH32-NEXT: or a1, a1, a2 +; ZVFH32-NEXT: slli a3, a3, 30 ; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2ui_v3f32_v3i15: ; ZVFH64: # %bb.0: -; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH64-NEXT: vmv.x.s a1, v9 -; ZVFH64-NEXT: lui a2, 16 -; ZVFH64-NEXT: addiw a2, a2, -1 -; ZVFH64-NEXT: and a1, a1, a2 -; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: and a2, a3, a2 +; ZVFH64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFH64-NEXT: vfmv.f.s fa5, v8 +; ZVFH64-NEXT: fcvt.lu.s a1, fa5, rtz +; ZVFH64-NEXT: vslidedown.vi v9, v8, 1 +; ZVFH64-NEXT: vfmv.f.s fa5, v9 +; ZVFH64-NEXT: fcvt.lu.s a2, fa5, rtz +; ZVFH64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFH64-NEXT: vfmv.f.s fa5, v8 +; ZVFH64-NEXT: fcvt.lu.s a3, fa5, rtz +; ZVFH64-NEXT: srli a4, a3, 2 +; ZVFH64-NEXT: sh a4, 4(a0) ; ZVFH64-NEXT: slli a2, a2, 15 -; ZVFH64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH64-NEXT: vmv.x.s a3, v8 +; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: slli a3, a3, 30 ; ZVFH64-NEXT: or a1, a1, a3 -; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: sw a1, 0(a0) -; ZVFH64-NEXT: slli a1, a1, 19 -; ZVFH64-NEXT: srli a1, a1, 51 -; ZVFH64-NEXT: sh a1, 4(a0) ; ZVFH64-NEXT: ret ; ; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: slli a2, a1, 17 -; ZVFHMIN32-NEXT: srli a2, a2, 19 -; ZVFHMIN32-NEXT: sh a2, 4(a0) -; ZVFHMIN32-NEXT: vmv.x.s a2, v9 -; ZVFHMIN32-NEXT: lui a3, 16 -; ZVFHMIN32-NEXT: addi a3, a3, -1 -; ZVFHMIN32-NEXT: and a2, a2, a3 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1 -; ZVFHMIN32-NEXT: vmv.x.s a4, v8 -; ZVFHMIN32-NEXT: and a3, a4, a3 -; ZVFHMIN32-NEXT: slli a3, a3, 15 -; ZVFHMIN32-NEXT: slli a1, a1, 30 -; ZVFHMIN32-NEXT: or a1, a2, a1 +; ZVFHMIN32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN32-NEXT: fcvt.wu.s a1, fa5, rtz +; ZVFHMIN32-NEXT: vslidedown.vi v9, v8, 1 +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v9 +; ZVFHMIN32-NEXT: fcvt.wu.s a2, fa5, rtz +; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN32-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN32-NEXT: fcvt.wu.s a3, fa5, rtz +; ZVFHMIN32-NEXT: srli a4, a3, 2 +; ZVFHMIN32-NEXT: sh a4, 4(a0) +; ZVFHMIN32-NEXT: slli a2, a2, 15 +; ZVFHMIN32-NEXT: or a1, a1, a2 +; ZVFHMIN32-NEXT: slli a3, a3, 30 ; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN64-NEXT: vmv.x.s a1, v9 -; ZVFHMIN64-NEXT: lui a2, 16 -; ZVFHMIN64-NEXT: addiw a2, a2, -1 -; ZVFHMIN64-NEXT: and a1, a1, a2 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: and a2, a3, a2 +; ZVFHMIN64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN64-NEXT: fcvt.lu.s a1, fa5, rtz +; ZVFHMIN64-NEXT: vslidedown.vi v9, v8, 1 +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v9 +; ZVFHMIN64-NEXT: fcvt.lu.s a2, fa5, rtz +; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 2 +; ZVFHMIN64-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN64-NEXT: fcvt.lu.s a3, fa5, rtz +; ZVFHMIN64-NEXT: srli a4, a3, 2 +; ZVFHMIN64-NEXT: sh a4, 4(a0) ; ZVFHMIN64-NEXT: slli a2, a2, 15 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 +; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: slli a3, a3, 30 ; ZVFHMIN64-NEXT: or a1, a1, a3 -; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: sw a1, 0(a0) -; ZVFHMIN64-NEXT: slli a1, a1, 19 -; ZVFHMIN64-NEXT: srli a1, a1, 51 -; ZVFHMIN64-NEXT: sh a1, 4(a0) ; ZVFHMIN64-NEXT: ret %z = fptoui <3 x float> %x to <3 x i15> ret <3 x i15> %z @@ -315,7 +311,7 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2ui_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 7333067e9205e..1d32b33764bbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -115,7 +115,7 @@ define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: si2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 ; CHECK-NEXT: vfwcvt.f.x.v v8, v9 @@ -273,7 +273,7 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { define <3 x float> @ui2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: ui2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v9, v8, 1, v0 ; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 776a1e9bab6b2..4f0cdd72e6a01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -217,35 +217,18 @@ define void @insertelt_v4i64_store(ptr %x, i64 %y) { define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) { ; RV32-LABEL: insertelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 3 -; RV32-NEXT: vmv.x.s a2, v9 -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: vmv.x.s a3, v9 -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vmv.x.s a4, v9 -; RV32-NEXT: vmv.x.s a5, v8 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v8, a5 -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v8, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vmv.x.s a1, v9 -; RV64-NEXT: vmv.x.s a2, v8 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vslideup.vi v8, v10, 2 ; RV64-NEXT: ret %b = insertelement <3 x i64> %a, i64 %y, i32 2 ret <3 x i64> %b diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index bc3e135a588a6..cb0a10f478b8f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -8,51 +8,11 @@ ; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { -; RV32-LABEL: load_factor2_v3: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV32-NEXT: vle32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vwaddu.vv v8, v10, v9 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: vwmaccu.vx v8, a0, v9 -; RV32-NEXT: vmv.v.i v0, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v9, v9, v9 -; RV32-NEXT: vadd.vi v11, v9, 1 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: load_factor2_v3: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; RV64-NEXT: vle32.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: vrgather.vv v9, v10, v8 -; RV64-NEXT: vmv.v.i v0, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v12, v10, 4 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v9, v12, 1, v0.t -; RV64-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v11, v10, 2 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vwaddu.vv v8, v10, v11 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vwmaccu.vx v8, a0, v11 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: vrgather.vi v8, v12, 0, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: load_factor2_v3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vlseg2e32.v v8, (a0) +; CHECK-NEXT: ret %interleaved.vec = load <6 x i32>, ptr %ptr %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index 901be442c0012..3724021094b8d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -148,22 +148,7 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: call llrintf -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 @@ -179,17 +164,11 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV64-NEXT: fcvt.l.s a0, fa5 ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a1, fa5 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 2 -; RV64-NEXT: vfmv.f.s fa5, v9 -; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a0, fa5 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index a90ee3ebb8766..740a196c60e81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -81,7 +81,7 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; RV32-LABEL: lrint_v3f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vi v9, v8, 1 ; RV32-NEXT: vfmv.f.s fa5, v9 ; RV32-NEXT: fcvt.w.s a0, fa5 @@ -89,11 +89,7 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; RV32-NEXT: fcvt.w.s a1, fa5 ; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: vslide1down.vx v9, v9, a0 -; RV32-NEXT: vslidedown.vi v10, v8, 2 -; RV32-NEXT: vfmv.f.s fa5, v10 -; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: vslide1down.vx v9, v9, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vslidedown.vi v8, v8, 2 ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.s a0, fa5 ; RV32-NEXT: vslide1down.vx v8, v9, a0 @@ -101,7 +97,7 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; ; RV64-i32-LABEL: lrint_v3f32: ; RV64-i32: # %bb.0: -; RV64-i32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-i32-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; RV64-i32-NEXT: vslidedown.vi v9, v8, 1 ; RV64-i32-NEXT: vfmv.f.s fa5, v9 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 @@ -109,11 +105,7 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; RV64-i32-NEXT: fcvt.l.s a1, fa5 ; RV64-i32-NEXT: vmv.v.x v9, a1 ; RV64-i32-NEXT: vslide1down.vx v9, v9, a0 -; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 -; RV64-i32-NEXT: vfmv.f.s fa5, v10 -; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: vslide1down.vx v9, v9, a0 -; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 +; RV64-i32-NEXT: vslidedown.vi v8, v8, 2 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 ; RV64-i32-NEXT: vslide1down.vx v8, v9, a0 @@ -127,17 +119,11 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; RV64-i64-NEXT: fcvt.l.s a0, fa5 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a1, fa5 -; RV64-i64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-i64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-i64-NEXT: vmv.v.x v10, a1 ; RV64-i64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64-i64-NEXT: vslidedown.vi v9, v8, 2 -; RV64-i64-NEXT: vfmv.f.s fa5, v9 -; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-i64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-i64-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64-i64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-i64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 ; RV64-i64-NEXT: vsetvli zero, zero, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index 6e5ab436fc02d..0cdd4fd5e9ff2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -121,10 +121,9 @@ define i32 @reduce_sum_16xi32_prefix2(ptr %p) { define i32 @reduce_sum_16xi32_prefix3(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -160,10 +159,9 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) { define i32 @reduce_sum_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,10 +206,9 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) { define i32 @reduce_sum_16xi32_prefix7(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -375,10 +372,9 @@ define i32 @reduce_sum_16xi32_prefix14(ptr %p) { define i32 @reduce_sum_16xi32_prefix15(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix15: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -499,10 +495,9 @@ define i32 @reduce_xor_16xi32_prefix2(ptr %p) { define i32 @reduce_xor_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_xor_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -537,12 +532,9 @@ define i32 @reduce_and_16xi32_prefix2(ptr %p) { define i32 @reduce_and_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_and_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -576,11 +568,9 @@ define i32 @reduce_or_16xi32_prefix2(ptr %p) { define i32 @reduce_or_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_or_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -619,12 +609,9 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) { define i32 @reduce_smax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smax_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -658,13 +645,9 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) { define i32 @reduce_smin_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smin_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vredmin.vs v8, v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -698,11 +681,9 @@ define i32 @reduce_umax_16xi32_prefix2(ptr %p) { define i32 @reduce_umax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_umax_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -734,27 +715,13 @@ define i32 @reduce_umin_16xi32_prefix2(ptr %p) { } define i32 @reduce_umin_16xi32_prefix5(ptr %p) { -; RV32-LABEL: reduce_umin_16xi32_prefix5: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 5, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: reduce_umin_16xi32_prefix5: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v10 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: reduce_umin_16xi32_prefix5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredminu.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 %e0 = extractelement <16 x i32> %v, i32 0 %e1 = extractelement <16 x i32> %v, i32 1 @@ -787,11 +754,10 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { define float @reduce_fadd_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xi32_prefix5: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 566c9070eab51..385758d30148a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1304,9 +1304,7 @@ define float @vreduce_fmin_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x @@ -1551,9 +1549,7 @@ define float @vreduce_fmax_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x @@ -1753,20 +1749,15 @@ define float @vreduce_fminimum_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10, v0.t +; CHECK-NEXT: vcpop.m a0, v10 ; CHECK-NEXT: beqz a0, .LBB108_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB108_2: -; CHECK-NEXT: lui a0, 522240 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x @@ -1779,9 +1770,7 @@ define float @vreduce_fminimum_v7f32_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 522240 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x @@ -2506,20 +2495,15 @@ define float @vreduce_fmaximum_v7f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10, v0.t +; CHECK-NEXT: vcpop.m a0, v10 ; CHECK-NEXT: beqz a0, .LBB136_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB136_2: -; CHECK-NEXT: lui a0, 1046528 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x @@ -2532,9 +2516,7 @@ define float @vreduce_fmaximum_v7f32_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 1046528 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <7 x float>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 56944e2aa5074..1eb45b87f6644 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1766,9 +1766,7 @@ define i8 @vreduce_and_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -2369,8 +2367,7 @@ define i8 @vreduce_or_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -3604,9 +3601,7 @@ define i8 @vreduce_smin_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -4206,9 +4201,7 @@ define i8 @vreduce_smax_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -4808,9 +4801,7 @@ define i8 @vreduce_umin_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -5410,8 +5401,7 @@ define i8 @vreduce_umax_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x @@ -6012,17 +6002,13 @@ define i8 @vreduce_mul_v3i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 1 -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.x.s a2, v8 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: mul a0, a1, a0 ; CHECK-NEXT: ret %v = load <3 x i8>, ptr %x %red = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 33e9cde4c30ab..8faeb5f834d68 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -22,7 +22,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m, ; ; ZVFHMIN-LABEL: fcmp_oeq_vv_v7f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index e773d93fad479..dcb5ee6ad6f0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -640,45 +640,15 @@ define <8 x double> @reverse_v8f64(<8 x double> %a) { define <3 x i64> @reverse_v3i64(<3 x i64> %a) { -; RV32-LABEL: reverse_v3i64: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI44_0) -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: reverse_v3i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vi v12, v10, 2 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret -; -; RV32-ZVBB-LABEL: reverse_v3i64: -; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI44_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI44_0) -; RV32-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-ZVBB-NEXT: vle16.v v12, (a0) -; RV32-ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 -; RV32-ZVBB-NEXT: vmv.v.v v8, v10 -; RV32-ZVBB-NEXT: ret -; -; RV64-ZVBB-LABEL: reverse_v3i64: -; RV64-ZVBB: # %bb.0: -; RV64-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-ZVBB-NEXT: vid.v v10 -; RV64-ZVBB-NEXT: vrsub.vi v12, v10, 2 -; RV64-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 -; RV64-ZVBB-NEXT: vmv.v.v v8, v10 -; RV64-ZVBB-NEXT: ret +; CHECK-LABEL: reverse_v3i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v12, v10, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %res = shufflevector <3 x i64> %a, <3 x i64> poison, <3 x i32> ret <3 x i64> %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll index e2580c132f65e..877f5da763146 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -19,7 +19,7 @@ declare <3 x i8> @llvm.experimental.stepvector.v3i8() define <3 x i8> @stepvector_v3i8() { ; CHECK-LABEL: stepvector_v3i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: ret %v = call <3 x i8> @llvm.experimental.stepvector.v3i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll index f023c760f14a7..b076cc074cab7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -119,12 +119,12 @@ define <3 x half> @vfadd_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfadd_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fadd.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll index 9fb8377d5a5ef..32aa40230aedf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll @@ -119,12 +119,12 @@ define <3 x half> @vfdiv_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfdiv_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fdiv.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index e2e48cee3eacc..6d2aafdf0c7bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -762,7 +762,7 @@ declare <15 x double> @llvm.vp.fma.v15f64(<15 x double>, <15 x double>, <15 x do define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t @@ -775,7 +775,7 @@ define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll index 64ce0a12de8cf..2b9d8fbc0b67b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll @@ -119,12 +119,12 @@ define <3 x half> @vfmul_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfmul_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fmul.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index 6c695b43d2718..cf76c1ab9d013 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -526,7 +526,7 @@ declare <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double>, <15 x double>, <15 define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t @@ -539,7 +539,7 @@ define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 15, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll index eb717a851ed46..85c73224f0944 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll @@ -119,12 +119,12 @@ define <3 x half> @vfsub_vv_v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i3 ; ; ZVFHMIN-LABEL: vfsub_vv_v3f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v9, v9, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret %v = call <3 x half> @llvm.vp.fsub.v3f16(<3 x half> %va, <3 x half> %b, <3 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll index 53598c609107b..a703e450071c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll @@ -63,18 +63,14 @@ entry: define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind { ; RV32-LABEL: unaligned_memcpy3: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lbu a2, 2(a1) -; RV32-NEXT: sb a2, 2(a0) -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: unaligned_memcpy3: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lbu a2, 2(a1) -; RV64-NEXT: sb a2, 2(a0) -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v8, (a1) ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret @@ -133,29 +129,15 @@ entry: define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { ; RV32-LABEL: unaligned_memcpy7: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lbu a2, 6(a1) -; RV32-NEXT: sb a2, 6(a0) -; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RV32-NEXT: vsetivli zero, 7, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: addi a1, a1, 4 -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 4 -; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: unaligned_memcpy7: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lbu a2, 6(a1) -; RV64-NEXT: sb a2, 6(a0) -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: addi a1, a1, 4 -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 7, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -214,49 +196,23 @@ entry: define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { ; RV32-LABEL: unaligned_memcpy15: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lbu a2, 14(a1) -; RV32-NEXT: sb a2, 14(a0) -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 15, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: addi a2, a1, 12 -; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV32-NEXT: vle8.v v8, (a2) -; RV32-NEXT: addi a2, a0, 12 -; RV32-NEXT: vse8.v v8, (a2) -; RV32-NEXT: addi a1, a1, 8 -; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: unaligned_memcpy15: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lbu a2, 14(a1) -; RV64-NEXT: sb a2, 14(a0) -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 15, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a1) ; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: addi a2, a1, 12 -; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; RV64-NEXT: vle8.v v8, (a2) -; RV64-NEXT: addi a2, a0, 12 -; RV64-NEXT: vse8.v v8, (a2) -; RV64-NEXT: addi a1, a1, 8 -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: unaligned_memcpy15: ; RV32-FAST: # %bb.0: # %entry ; RV32-FAST-NEXT: lw a2, 11(a1) ; RV32-FAST-NEXT: sw a2, 11(a0) -; RV32-FAST-NEXT: lw a2, 8(a1) -; RV32-FAST-NEXT: sw a2, 8(a0) -; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-FAST-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; RV32-FAST-NEXT: vle32.v v8, (a1) ; RV32-FAST-NEXT: vse32.v v8, (a0) ; RV32-FAST-NEXT: ret @@ -770,9 +726,7 @@ define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { ; RV32-NEXT: sb a2, 14(a0) ; RV32-NEXT: lh a2, 12(a1) ; RV32-NEXT: sh a2, 12(a0) -; RV32-NEXT: lw a2, 8(a1) -; RV32-NEXT: sw a2, 8(a0) -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret @@ -793,9 +747,7 @@ define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST: # %bb.0: # %entry ; RV32-FAST-NEXT: lw a2, 11(a1) ; RV32-FAST-NEXT: sw a2, 11(a0) -; RV32-FAST-NEXT: lw a2, 8(a1) -; RV32-FAST-NEXT: sw a2, 8(a0) -; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-FAST-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; RV32-FAST-NEXT: vle32.v v8, (a1) ; RV32-FAST-NEXT: vse32.v v8, (a0) ; RV32-FAST-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index dc27158cfb31f..20948924092e5 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -678,35 +678,37 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vsext.vf4 v12, v11 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32MV-NEXT: vmsne.vv v0, v8, v12 +; RV32MV-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32MV-NEXT: vse32.v v8, (s0) +; RV32MV-NEXT: vmv.x.s a0, v8 +; RV32MV-NEXT: sw a0, 0(s0) ; RV32MV-NEXT: vslidedown.vi v10, v8, 1 ; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: vslidedown.vi v10, v8, 2 -; RV32MV-NEXT: vmv.x.s a1, v10 -; RV32MV-NEXT: slli a2, a1, 1 -; RV32MV-NEXT: sub a2, a2, a0 -; RV32MV-NEXT: sw a2, 4(s0) -; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32MV-NEXT: vslidedown.vi v10, v8, 4 -; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: srli a2, a0, 30 -; RV32MV-NEXT: vslidedown.vi v10, v8, 5 -; RV32MV-NEXT: vmv.x.s a3, v10 -; RV32MV-NEXT: slli a3, a3, 2 -; RV32MV-NEXT: or a2, a3, a2 -; RV32MV-NEXT: andi a2, a2, 7 -; RV32MV-NEXT: sb a2, 12(s0) -; RV32MV-NEXT: srli a1, a1, 31 -; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32MV-NEXT: vslidedown.vi v8, v8, 3 +; RV32MV-NEXT: slli a1, a0, 1 +; RV32MV-NEXT: li a2, 32 +; RV32MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32MV-NEXT: vsrl.vx v12, v8, a2 +; RV32MV-NEXT: vmv.x.s a3, v12 +; RV32MV-NEXT: andi a3, a3, 1 +; RV32MV-NEXT: or a1, a3, a1 +; RV32MV-NEXT: sw a1, 4(s0) +; RV32MV-NEXT: vslidedown.vi v8, v8, 2 +; RV32MV-NEXT: vmv.x.s a1, v8 +; RV32MV-NEXT: srli a3, a1, 30 +; RV32MV-NEXT: vsrl.vx v8, v8, a2 +; RV32MV-NEXT: vmv.x.s a4, v8 +; RV32MV-NEXT: slli a4, a4, 2 +; RV32MV-NEXT: or a3, a4, a3 +; RV32MV-NEXT: andi a3, a3, 7 +; RV32MV-NEXT: sb a3, 12(s0) +; RV32MV-NEXT: srli a0, a0, 31 +; RV32MV-NEXT: vsrl.vx v8, v10, a2 ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 -; RV32MV-NEXT: slli a0, a0, 2 -; RV32MV-NEXT: or a0, a1, a0 +; RV32MV-NEXT: slli a1, a1, 2 +; RV32MV-NEXT: or a0, a0, a1 ; RV32MV-NEXT: or a0, a0, a2 ; RV32MV-NEXT: sw a0, 8(s0) ; RV32MV-NEXT: csrr a0, vlenb @@ -744,9 +746,9 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: add a4, a4, a5 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) +; RV64MV-NEXT: slli a6, a4, 3 ; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a4, 3 -; RV64MV-NEXT: sub a2, a2, a4 +; RV64MV-NEXT: sub a2, a2, a6 ; RV64MV-NEXT: mulh a4, a3, a5 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: add a4, a4, a5 @@ -776,26 +778,32 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmv.s.x v10, a2 ; RV64MV-NEXT: vsext.vf8 v12, v10 ; RV64MV-NEXT: vmsne.vv v0, v8, v12 +; RV64MV-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; RV64MV-NEXT: vmv.v.i v8, 0 -; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64MV-NEXT: vslidedown.vi v10, v8, 2 -; RV64MV-NEXT: vmv.x.s a2, v10 -; RV64MV-NEXT: slli a3, a2, 31 -; RV64MV-NEXT: srli a3, a3, 61 -; RV64MV-NEXT: sb a3, 12(a0) -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: and a1, a3, a1 -; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64MV-NEXT: vslidedown.vi v8, v8, 1 -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: slli a4, a3, 33 -; RV64MV-NEXT: or a1, a1, a4 +; RV64MV-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64MV-NEXT: vslidedown.vi v9, v8, 1 +; RV64MV-NEXT: vmv.x.s a2, v9 +; RV64MV-NEXT: andi a2, a2, 1 +; RV64MV-NEXT: neg a3, a2 +; RV64MV-NEXT: vslidedown.vi v8, v8, 2 +; RV64MV-NEXT: vmv.x.s a4, v8 +; RV64MV-NEXT: andi a4, a4, 1 +; RV64MV-NEXT: neg a5, a4 +; RV64MV-NEXT: vfirst.m a6, v0 +; RV64MV-NEXT: snez a6, a6 +; RV64MV-NEXT: addi a6, a6, -1 +; RV64MV-NEXT: and a1, a6, a1 +; RV64MV-NEXT: slli a2, a2, 33 +; RV64MV-NEXT: sub a1, a1, a2 ; RV64MV-NEXT: sd a1, 0(a0) -; RV64MV-NEXT: slli a2, a2, 2 +; RV64MV-NEXT: slli a5, a5, 29 +; RV64MV-NEXT: srli a5, a5, 61 +; RV64MV-NEXT: sb a5, 12(a0) +; RV64MV-NEXT: slli a4, a4, 2 ; RV64MV-NEXT: slli a3, a3, 31 ; RV64MV-NEXT: srli a3, a3, 62 -; RV64MV-NEXT: or a2, a3, a2 -; RV64MV-NEXT: sw a2, 8(a0) +; RV64MV-NEXT: subw a3, a3, a4 +; RV64MV-NEXT: sw a3, 8(a0) ; RV64MV-NEXT: ret %ld = load <3 x i33>, ptr %X %srem = srem <3 x i33> %ld, diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index c016e8f316363..a6fc1835dc284 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -522,16 +522,16 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-LABEL: test_urem_vec: ; RV32MV: # %bb.0: ; RV32MV-NEXT: lw a1, 0(a0) -; RV32MV-NEXT: andi a2, a1, 2047 +; RV32MV-NEXT: slli a2, a1, 10 +; RV32MV-NEXT: srli a2, a2, 21 +; RV32MV-NEXT: lbu a3, 4(a0) +; RV32MV-NEXT: andi a4, a1, 2047 ; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32MV-NEXT: vmv.v.x v8, a2 -; RV32MV-NEXT: lbu a2, 4(a0) -; RV32MV-NEXT: slli a3, a1, 10 -; RV32MV-NEXT: srli a3, a3, 21 -; RV32MV-NEXT: vslide1down.vx v8, v8, a3 -; RV32MV-NEXT: slli a2, a2, 10 +; RV32MV-NEXT: vmv.v.x v8, a4 +; RV32MV-NEXT: vslide1down.vx v8, v8, a2 +; RV32MV-NEXT: slli a3, a3, 10 ; RV32MV-NEXT: srli a1, a1, 22 -; RV32MV-NEXT: or a1, a1, a2 +; RV32MV-NEXT: or a1, a1, a3 ; RV32MV-NEXT: andi a1, a1, 2047 ; RV32MV-NEXT: vslide1down.vx v8, v8, a1 ; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) @@ -562,22 +562,29 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vor.vv v8, v8, v9 ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vmsltu.vv v0, v10, v8 +; RV32MV-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; RV32MV-NEXT: vmv.v.i v8, 0 -; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32MV-NEXT: vslidedown.vi v9, v8, 2 +; RV32MV-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32MV-NEXT: vslidedown.vi v9, v8, 1 ; RV32MV-NEXT: vmv.x.s a1, v9 -; RV32MV-NEXT: slli a2, a1, 21 -; RV32MV-NEXT: srli a2, a2, 31 -; RV32MV-NEXT: sb a2, 4(a0) -; RV32MV-NEXT: vmv.x.s a2, v8 -; RV32MV-NEXT: andi a2, a2, 2047 -; RV32MV-NEXT: vslidedown.vi v8, v8, 1 +; RV32MV-NEXT: slli a1, a1, 31 +; RV32MV-NEXT: srai a1, a1, 31 +; RV32MV-NEXT: vfirst.m a2, v0 +; RV32MV-NEXT: snez a2, a2 +; RV32MV-NEXT: addi a2, a2, -1 +; RV32MV-NEXT: vslidedown.vi v8, v8, 2 ; RV32MV-NEXT: vmv.x.s a3, v8 -; RV32MV-NEXT: andi a3, a3, 2047 -; RV32MV-NEXT: slli a3, a3, 11 -; RV32MV-NEXT: slli a1, a1, 22 +; RV32MV-NEXT: andi a3, a3, 1 +; RV32MV-NEXT: neg a4, a3 +; RV32MV-NEXT: slli a4, a4, 21 +; RV32MV-NEXT: srli a4, a4, 31 +; RV32MV-NEXT: sb a4, 4(a0) +; RV32MV-NEXT: andi a2, a2, 2047 +; RV32MV-NEXT: andi a1, a1, 2047 +; RV32MV-NEXT: slli a1, a1, 11 ; RV32MV-NEXT: or a1, a2, a1 -; RV32MV-NEXT: or a1, a1, a3 +; RV32MV-NEXT: slli a3, a3, 22 +; RV32MV-NEXT: sub a1, a1, a3 ; RV32MV-NEXT: sw a1, 0(a0) ; RV32MV-NEXT: ret ; @@ -623,23 +630,29 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vor.vv v8, v8, v9 ; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: vmsltu.vv v0, v10, v8 +; RV64MV-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; RV64MV-NEXT: vmv.v.i v8, 0 -; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64MV-NEXT: vmv.x.s a1, v8 -; RV64MV-NEXT: andi a1, a1, 2047 -; RV64MV-NEXT: vslidedown.vi v9, v8, 1 -; RV64MV-NEXT: vmv.x.s a2, v9 +; RV64MV-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64MV-NEXT: vslidedown.vi v9, v8, 2 +; RV64MV-NEXT: vmv.x.s a1, v9 +; RV64MV-NEXT: andi a1, a1, 1 +; RV64MV-NEXT: vslidedown.vi v8, v8, 1 +; RV64MV-NEXT: vmv.x.s a2, v8 +; RV64MV-NEXT: slli a2, a2, 63 +; RV64MV-NEXT: srai a2, a2, 63 +; RV64MV-NEXT: vfirst.m a3, v0 +; RV64MV-NEXT: snez a3, a3 +; RV64MV-NEXT: addi a3, a3, -1 +; RV64MV-NEXT: andi a3, a3, 2047 ; RV64MV-NEXT: andi a2, a2, 2047 ; RV64MV-NEXT: slli a2, a2, 11 -; RV64MV-NEXT: vslidedown.vi v8, v8, 2 -; RV64MV-NEXT: vmv.x.s a3, v8 -; RV64MV-NEXT: slli a3, a3, 22 -; RV64MV-NEXT: or a1, a1, a3 -; RV64MV-NEXT: or a1, a1, a2 -; RV64MV-NEXT: sw a1, 0(a0) -; RV64MV-NEXT: slli a1, a1, 31 -; RV64MV-NEXT: srli a1, a1, 63 -; RV64MV-NEXT: sb a1, 4(a0) +; RV64MV-NEXT: or a2, a3, a2 +; RV64MV-NEXT: slli a1, a1, 22 +; RV64MV-NEXT: sub a2, a2, a1 +; RV64MV-NEXT: sw a2, 0(a0) +; RV64MV-NEXT: slli a2, a2, 31 +; RV64MV-NEXT: srli a2, a2, 63 +; RV64MV-NEXT: sb a2, 4(a0) ; RV64MV-NEXT: ret %ld = load <3 x i11>, ptr %X %urem = urem <3 x i11> %ld,