Skip to content

Commit e3ffc4b

Browse files
authored
[RISCV] Unify getDemanded between forward and backwards passes in RISCVInsertVSETVLI (#92860)
We have two rules in needVSETVLI where we can relax the demanded fields for slides and splats when VL=1. However these aren't present in getDemanded which prevents us from coalescing some vsetvlis around slides and splats in the backwards pass. The reasoning as to why they weren't in getDemanded is that these require us to check the value of the AVL operand, which may be stale in the backwards pass: the actual VL or VTYPE value may differ from what was precisely requested in the pseudo's operands. Using the original operands should actually be fine though, as we only care about what was originally demanded by the instruction. The current value of VL or VTYPE shouldn't influence this. This addresses some of the regressions we are seeing in #70549 from splats and slides getting reordered.
1 parent 66b76fa commit e3ffc4b

16 files changed

+136
-212
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -378,10 +378,10 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
378378

379379
/// Return the fields and properties demanded by the provided instruction.
380380
DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
381-
// Warning: This function has to work on both the lowered (i.e. post
382-
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
383-
// that it can't use the value of a SEW, VL, or Policy operand as they might
384-
// be stale after lowering.
381+
// This function works in RISCVCoalesceVSETVLI too. We can still use the value
382+
// of a SEW, VL, or Policy operand even though it might not be the exact value
383+
// in the VL or VTYPE, since we only care about what the instruction
384+
// originally demanded.
385385

386386
// Most instructions don't use any of these subfeilds.
387387
DemandedFields Res;
@@ -459,6 +459,43 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
459459
Res.MaskPolicy = false;
460460
}
461461

462+
if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
463+
const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
464+
// A slidedown/slideup with an *undefined* merge op can freely clobber
465+
// elements not copied from the source vector (e.g. masked off, tail, or
466+
// slideup's prefix). Notes:
467+
// * We can't modify SEW here since the slide amount is in units of SEW.
468+
// * VL=1 is special only because we have existing support for zero vs
469+
// non-zero VL. We could generalize this if we had a VL > C predicate.
470+
// * The LMUL1 restriction is for machines whose latency may depend on VL.
471+
// * As above, this is only legal for tail "undefined" not "agnostic".
472+
if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
473+
hasUndefinedMergeOp(MI)) {
474+
Res.VLAny = false;
475+
Res.VLZeroness = true;
476+
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
477+
Res.TailPolicy = false;
478+
}
479+
480+
// A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
481+
// same semantically as vmv.s.x. This is particularly useful since we don't
482+
// have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
483+
// it's place. Since a splat is non-constant time in LMUL, we do need to be
484+
// careful to not increase the number of active vector registers (unlike for
485+
// vmv.s.x.)
486+
if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
487+
hasUndefinedMergeOp(MI)) {
488+
Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
489+
Res.SEWLMULRatio = false;
490+
Res.VLAny = false;
491+
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
492+
Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
493+
else
494+
Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
495+
Res.TailPolicy = false;
496+
}
497+
}
498+
462499
return Res;
463500
}
464501

@@ -1149,39 +1186,6 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
11491186

11501187
DemandedFields Used = getDemanded(MI, ST);
11511188

1152-
// A slidedown/slideup with an *undefined* merge op can freely clobber
1153-
// elements not copied from the source vector (e.g. masked off, tail, or
1154-
// slideup's prefix). Notes:
1155-
// * We can't modify SEW here since the slide amount is in units of SEW.
1156-
// * VL=1 is special only because we have existing support for zero vs
1157-
// non-zero VL. We could generalize this if we had a VL > C predicate.
1158-
// * The LMUL1 restriction is for machines whose latency may depend on VL.
1159-
// * As above, this is only legal for tail "undefined" not "agnostic".
1160-
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1161-
hasUndefinedMergeOp(MI)) {
1162-
Used.VLAny = false;
1163-
Used.VLZeroness = true;
1164-
Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
1165-
Used.TailPolicy = false;
1166-
}
1167-
1168-
// A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1169-
// semantically as vmv.s.x. This is particularly useful since we don't have an
1170-
// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1171-
// Since a splat is non-constant time in LMUL, we do need to be careful to not
1172-
// increase the number of active vector registers (unlike for vmv.s.x.)
1173-
if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
1174-
Require.getAVLImm() == 1 && hasUndefinedMergeOp(MI)) {
1175-
Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
1176-
Used.SEWLMULRatio = false;
1177-
Used.VLAny = false;
1178-
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1179-
Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1180-
else
1181-
Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1182-
Used.TailPolicy = false;
1183-
}
1184-
11851189
if (CurInfo.isCompatible(Used, Require, LIS))
11861190
return false;
11871191

llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ define i1 @extractelt_nxv16i1(ptr %x, i64 %idx) nounwind {
7878
; CHECK-NEXT: vmseq.vi v0, v8, 0
7979
; CHECK-NEXT: vmv.v.i v8, 0
8080
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
81-
; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma
8281
; CHECK-NEXT: vslidedown.vx v8, v8, a1
8382
; CHECK-NEXT: vmv.x.s a0, v8
8483
; CHECK-NEXT: ret
@@ -96,7 +95,6 @@ define i1 @extractelt_nxv32i1(ptr %x, i64 %idx) nounwind {
9695
; CHECK-NEXT: vmseq.vi v0, v8, 0
9796
; CHECK-NEXT: vmv.v.i v8, 0
9897
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
99-
; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma
10098
; CHECK-NEXT: vslidedown.vx v8, v8, a1
10199
; CHECK-NEXT: vmv.x.s a0, v8
102100
; CHECK-NEXT: ret
@@ -114,7 +112,6 @@ define i1 @extractelt_nxv64i1(ptr %x, i64 %idx) nounwind {
114112
; CHECK-NEXT: vmseq.vi v0, v8, 0
115113
; CHECK-NEXT: vmv.v.i v8, 0
116114
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
117-
; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
118115
; CHECK-NEXT: vslidedown.vx v8, v8, a1
119116
; CHECK-NEXT: vmv.x.s a0, v8
120117
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ define void @extract_v1i32_v8i32_4(ptr %x, ptr %y) {
7373
; VLA: # %bb.0:
7474
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7575
; VLA-NEXT: vle32.v v8, (a0)
76-
; VLA-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7776
; VLA-NEXT: vslidedown.vi v8, v8, 4
7877
; VLA-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
7978
; VLA-NEXT: vse32.v v8, (a1)
@@ -96,7 +95,6 @@ define void @extract_v1i32_v8i32_5(ptr %x, ptr %y) {
9695
; VLA: # %bb.0:
9796
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9897
; VLA-NEXT: vle32.v v8, (a0)
99-
; VLA-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10098
; VLA-NEXT: vslidedown.vi v8, v8, 5
10199
; VLA-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
102100
; VLA-NEXT: vse32.v v8, (a1)
@@ -391,19 +389,17 @@ define void @extract_v8i1_v64i1_8(ptr %x, ptr %y) {
391389
; VLA-NEXT: li a2, 64
392390
; VLA-NEXT: vsetvli zero, a2, e8, m4, ta, ma
393391
; VLA-NEXT: vlm.v v8, (a0)
394-
; VLA-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
395-
; VLA-NEXT: vslidedown.vi v8, v8, 1
396392
; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
393+
; VLA-NEXT: vslidedown.vi v8, v8, 1
397394
; VLA-NEXT: vsm.v v8, (a1)
398395
; VLA-NEXT: ret
399396
;
400397
; VLS-LABEL: extract_v8i1_v64i1_8:
401398
; VLS: # %bb.0:
402399
; VLS-NEXT: vsetvli a2, zero, e8, m4, ta, ma
403400
; VLS-NEXT: vlm.v v8, (a0)
404-
; VLS-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
405-
; VLS-NEXT: vslidedown.vi v8, v8, 1
406401
; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
402+
; VLS-NEXT: vslidedown.vi v8, v8, 1
407403
; VLS-NEXT: vsm.v v8, (a1)
408404
; VLS-NEXT: ret
409405
%a = load <64 x i1>, ptr %x
@@ -418,19 +414,17 @@ define void @extract_v8i1_v64i1_48(ptr %x, ptr %y) {
418414
; VLA-NEXT: li a2, 64
419415
; VLA-NEXT: vsetvli zero, a2, e8, m4, ta, ma
420416
; VLA-NEXT: vlm.v v8, (a0)
421-
; VLA-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
422-
; VLA-NEXT: vslidedown.vi v8, v8, 6
423417
; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
418+
; VLA-NEXT: vslidedown.vi v8, v8, 6
424419
; VLA-NEXT: vsm.v v8, (a1)
425420
; VLA-NEXT: ret
426421
;
427422
; VLS-LABEL: extract_v8i1_v64i1_48:
428423
; VLS: # %bb.0:
429424
; VLS-NEXT: vsetvli a2, zero, e8, m4, ta, ma
430425
; VLS-NEXT: vlm.v v8, (a0)
431-
; VLS-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
432-
; VLS-NEXT: vslidedown.vi v8, v8, 6
433426
; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
427+
; VLS-NEXT: vslidedown.vi v8, v8, 6
434428
; VLS-NEXT: vsm.v v8, (a1)
435429
; VLS-NEXT: ret
436430
%a = load <64 x i1>, ptr %x
@@ -853,9 +847,8 @@ define void @extract_v2i1_nxv32i1_26(<vscale x 32 x i1> %x, ptr %y) {
853847
define void @extract_v8i1_nxv32i1_16(<vscale x 32 x i1> %x, ptr %y) {
854848
; CHECK-LABEL: extract_v8i1_nxv32i1_16:
855849
; CHECK: # %bb.0:
856-
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
857-
; CHECK-NEXT: vslidedown.vi v8, v0, 2
858850
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
851+
; CHECK-NEXT: vslidedown.vi v8, v0, 2
859852
; CHECK-NEXT: vsm.v v8, (a0)
860853
; CHECK-NEXT: ret
861854
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %x, i64 16)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ define i32 @extractelt_v8i32(ptr %x) nounwind {
138138
; CHECK: # %bb.0:
139139
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
140140
; CHECK-NEXT: vle32.v v8, (a0)
141-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
142141
; CHECK-NEXT: vslidedown.vi v8, v8, 6
143142
; CHECK-NEXT: vmv.x.s a0, v8
144143
; CHECK-NEXT: ret
@@ -152,9 +151,9 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
152151
; RV32: # %bb.0:
153152
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
154153
; RV32-NEXT: vle64.v v8, (a0)
155-
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
156154
; RV32-NEXT: vslidedown.vi v8, v8, 3
157155
; RV32-NEXT: li a0, 32
156+
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
158157
; RV32-NEXT: vsrl.vx v10, v8, a0
159158
; RV32-NEXT: vmv.x.s a1, v10
160159
; RV32-NEXT: vmv.x.s a0, v8
@@ -164,7 +163,6 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
164163
; RV64: # %bb.0:
165164
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
166165
; RV64-NEXT: vle64.v v8, (a0)
167-
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
168166
; RV64-NEXT: vslidedown.vi v8, v8, 3
169167
; RV64-NEXT: vmv.x.s a0, v8
170168
; RV64-NEXT: ret
@@ -233,7 +231,6 @@ define i64 @extractelt_v3i64(ptr %x) nounwind {
233231
; RV64: # %bb.0:
234232
; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
235233
; RV64-NEXT: vle64.v v8, (a0)
236-
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
237234
; RV64-NEXT: vslidedown.vi v8, v8, 2
238235
; RV64-NEXT: vmv.x.s a0, v8
239236
; RV64-NEXT: ret
@@ -452,7 +449,6 @@ define i8 @extractelt_v32i8_idx(ptr %x, i32 zeroext %idx) nounwind {
452449
; CHECK-NEXT: li a2, 32
453450
; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
454451
; CHECK-NEXT: vle8.v v8, (a0)
455-
; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma
456452
; CHECK-NEXT: vslidedown.vx v8, v8, a1
457453
; CHECK-NEXT: vmv.x.s a0, v8
458454
; CHECK-NEXT: ret
@@ -466,7 +462,6 @@ define i16 @extractelt_v16i16_idx(ptr %x, i32 zeroext %idx) nounwind {
466462
; CHECK: # %bb.0:
467463
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
468464
; CHECK-NEXT: vle16.v v8, (a0)
469-
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
470465
; CHECK-NEXT: vslidedown.vx v8, v8, a1
471466
; CHECK-NEXT: vmv.x.s a0, v8
472467
; CHECK-NEXT: ret
@@ -481,7 +476,6 @@ define i32 @extractelt_v8i32_idx(ptr %x, i32 zeroext %idx) nounwind {
481476
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
482477
; CHECK-NEXT: vle32.v v8, (a0)
483478
; CHECK-NEXT: vadd.vv v8, v8, v8
484-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
485479
; CHECK-NEXT: vslidedown.vx v8, v8, a1
486480
; CHECK-NEXT: vmv.x.s a0, v8
487481
; CHECK-NEXT: ret
@@ -497,10 +491,10 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
497491
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
498492
; RV32-NEXT: vle64.v v8, (a0)
499493
; RV32-NEXT: vadd.vv v8, v8, v8
500-
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
501494
; RV32-NEXT: vslidedown.vx v8, v8, a1
502495
; RV32-NEXT: vmv.x.s a0, v8
503496
; RV32-NEXT: li a1, 32
497+
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
504498
; RV32-NEXT: vsrl.vx v8, v8, a1
505499
; RV32-NEXT: vmv.x.s a1, v8
506500
; RV32-NEXT: ret
@@ -510,7 +504,6 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
510504
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
511505
; RV64-NEXT: vle64.v v8, (a0)
512506
; RV64-NEXT: vadd.vv v8, v8, v8
513-
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
514507
; RV64-NEXT: vslidedown.vx v8, v8, a1
515508
; RV64-NEXT: vmv.x.s a0, v8
516509
; RV64-NEXT: ret
@@ -526,7 +519,6 @@ define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
526519
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
527520
; CHECK-NEXT: vle16.v v8, (a0)
528521
; CHECK-NEXT: vfadd.vv v8, v8, v8
529-
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
530522
; CHECK-NEXT: vslidedown.vx v8, v8, a1
531523
; CHECK-NEXT: vfmv.f.s fa0, v8
532524
; CHECK-NEXT: ret
@@ -542,7 +534,6 @@ define float @extractelt_v8f32_idx(ptr %x, i32 zeroext %idx) nounwind {
542534
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
543535
; CHECK-NEXT: vle32.v v8, (a0)
544536
; CHECK-NEXT: vfadd.vv v8, v8, v8
545-
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
546537
; CHECK-NEXT: vslidedown.vx v8, v8, a1
547538
; CHECK-NEXT: vfmv.f.s fa0, v8
548539
; CHECK-NEXT: ret
@@ -558,7 +549,6 @@ define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
558549
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
559550
; CHECK-NEXT: vle64.v v8, (a0)
560551
; CHECK-NEXT: vfadd.vv v8, v8, v8
561-
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
562552
; CHECK-NEXT: vslidedown.vx v8, v8, a1
563553
; CHECK-NEXT: vfmv.f.s fa0, v8
564554
; CHECK-NEXT: ret
@@ -594,7 +584,6 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
594584
; RV64-NEXT: vle64.v v8, (a0)
595585
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
596586
; RV64-NEXT: vadd.vv v8, v8, v8
597-
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
598587
; RV64-NEXT: vslidedown.vx v8, v8, a1
599588
; RV64-NEXT: vmv.x.s a0, v8
600589
; RV64-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
66
; CHECK-LABEL: shuffle_v4f16:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
9-
; CHECK-NEXT: vmv.v.i v0, 11
108
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
9+
; CHECK-NEXT: vmv.v.i v0, 11
1110
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
1211
; CHECK-NEXT: ret
1312
%s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) {
66
; CHECK-LABEL: shuffle_v4i16:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
9-
; CHECK-NEXT: vmv.v.i v0, 11
108
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
9+
; CHECK-NEXT: vmv.v.i v0, 11
1110
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
1211
; CHECK-NEXT: ret
1312
%s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@@ -29,9 +28,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %x, <8 x i32> %y) {
2928
define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
3029
; CHECK-LABEL: shuffle_xv_v4i16:
3130
; CHECK: # %bb.0:
32-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
33-
; CHECK-NEXT: vmv.v.i v0, 9
3431
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
32+
; CHECK-NEXT: vmv.v.i v0, 9
3533
; CHECK-NEXT: vmerge.vim v8, v8, 5, v0
3634
; CHECK-NEXT: ret
3735
%s = shufflevector <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i16> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -41,9 +39,8 @@ define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) {
4139
define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
4240
; CHECK-LABEL: shuffle_vx_v4i16:
4341
; CHECK: # %bb.0:
44-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
45-
; CHECK-NEXT: vmv.v.i v0, 6
4642
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
43+
; CHECK-NEXT: vmv.v.i v0, 6
4744
; CHECK-NEXT: vmerge.vim v8, v8, 5, v0
4845
; CHECK-NEXT: ret
4946
%s = shufflevector <4 x i16> %x, <4 x i16> <i16 5, i16 5, i16 5, i16 5>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>

0 commit comments

Comments
 (0)