Skip to content

Commit 111932d

Browse files
authored
[RISCV] Fix same mask vmerge peephole discarding false operand (#107827)
This fixes the issue raised in #106108 (comment) True's passthru needs to be equivalent to vmerge's false, but we also allow true's passthru to be undef. However if it's undef then we need to replace it with false, otherwise we end up discarding the false operand entirely. The changes in fixed-vectors-strided-load-store-asm.ll undo the changes in #106108 where we introduced this miscompile.
1 parent 2d338be commit 111932d

File tree

3 files changed

+47
-18
lines changed

3 files changed

+47
-18
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6666
bool convertToWholeRegister(MachineInstr &MI) const;
6767
bool convertToUnmasked(MachineInstr &MI) const;
6868
bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
69-
bool convertSameMaskVMergeToVMv(MachineInstr &MI) const;
69+
bool convertSameMaskVMergeToVMv(MachineInstr &MI);
7070
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
7171
bool foldVMV_V_V(MachineInstr &MI);
7272

@@ -396,7 +396,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
396396
/// ->
397397
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
398398
/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
399-
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
399+
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
400400
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
401401
if (!NewOpc)
402402
return false;
@@ -405,18 +405,24 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
405405
!hasSameEEW(MI, *True))
406406
return false;
407407

408-
// True's passthru needs to be equivalent to False
409-
Register TruePassthruReg = True->getOperand(1).getReg();
410-
Register FalseReg = MI.getOperand(2).getReg();
411-
if (TruePassthruReg != RISCV::NoRegister && TruePassthruReg != FalseReg)
412-
return false;
413-
414408
const MachineInstr *TrueV0Def = V0Defs.lookup(True);
415409
const MachineInstr *MIV0Def = V0Defs.lookup(&MI);
416410
assert(TrueV0Def && TrueV0Def->isCopy() && MIV0Def && MIV0Def->isCopy());
417411
if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg())
418412
return false;
419413

414+
// True's passthru needs to be equivalent to False
415+
Register TruePassthruReg = True->getOperand(1).getReg();
416+
Register FalseReg = MI.getOperand(2).getReg();
417+
if (TruePassthruReg != FalseReg) {
418+
// If True's passthru is undef see if we can change it to False
419+
if (TruePassthruReg != RISCV::NoRegister ||
420+
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
421+
!ensureDominates(MI.getOperand(2), *True))
422+
return false;
423+
True->getOperand(1).setReg(MI.getOperand(2).getReg());
424+
}
425+
420426
MI.setDesc(TII->get(NewOpc));
421427
MI.removeOperand(2); // False operand
422428
MI.removeOperand(3); // Mask operand

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,12 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
6262
; CHECK-NEXT: li a4, 5
6363
; CHECK-NEXT: .LBB1_1: # %vector.body
6464
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
65-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
66-
; CHECK-NEXT: vlse8.v v8, (a1), a4, v0.t
67-
; CHECK-NEXT: vle8.v v9, (a0)
68-
; CHECK-NEXT: vadd.vv v8, v9, v8
69-
; CHECK-NEXT: vse8.v v8, (a0)
65+
; CHECK-NEXT: vmv1r.v v9, v8
66+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
67+
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
68+
; CHECK-NEXT: vle8.v v10, (a0)
69+
; CHECK-NEXT: vadd.vv v9, v10, v9
70+
; CHECK-NEXT: vse8.v v9, (a0)
7071
; CHECK-NEXT: addi a0, a0, 32
7172
; CHECK-NEXT: addi a1, a1, 160
7273
; CHECK-NEXT: bne a0, a2, .LBB1_1
@@ -343,11 +344,12 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
343344
; CHECK-NEXT: li a4, 5
344345
; CHECK-NEXT: .LBB7_1: # %vector.body
345346
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
346-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
347-
; CHECK-NEXT: vle8.v v8, (a1)
348-
; CHECK-NEXT: vlse8.v v9, (a0), a4, v0.t
349-
; CHECK-NEXT: vadd.vv v8, v9, v8
350-
; CHECK-NEXT: vsse8.v v8, (a0), a4, v0.t
347+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
348+
; CHECK-NEXT: vle8.v v9, (a1)
349+
; CHECK-NEXT: vmv1r.v v10, v8
350+
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
351+
; CHECK-NEXT: vadd.vv v9, v10, v9
352+
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
351353
; CHECK-NEXT: addi a1, a1, 32
352354
; CHECK-NEXT: addi a0, a0, 160
353355
; CHECK-NEXT: bne a1, a2, .LBB7_1

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,24 @@ body: |
138138
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
139139
$v0 = COPY %mask
140140
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
141+
...
142+
---
143+
name: same_mask_undef_truepassthru
144+
body: |
145+
bb.0:
146+
liveins: $v8, $v0
147+
; CHECK-LABEL: name: same_mask_undef_truepassthru
148+
; CHECK: liveins: $v8, $v0
149+
; CHECK-NEXT: {{ $}}
150+
; CHECK-NEXT: %false:vrnov0 = COPY $v8
151+
; CHECK-NEXT: %mask:vr = COPY $v0
152+
; CHECK-NEXT: $v0 = COPY %mask
153+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 1 /* ta, mu */
154+
; CHECK-NEXT: $v0 = COPY %mask
155+
%false:vrnov0 = COPY $v8
156+
%mask:vr = COPY $v0
157+
$v0 = COPY %mask
158+
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
159+
$v0 = COPY %mask
160+
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, 4, 5 /* e32 */
161+
...

0 commit comments

Comments
 (0)