Skip to content

Commit 921e89c

Browse files
[SVE] Only combine (fneg (fma)) => FNMLA with nsz
-(Za + Zm * Zn) != (-Za + Zm * (-Zn)) when the FMA produces a zero output (e.g. all zero inputs can produce -0 output) Add a PatFrag to check presence of nsz on the fneg, add tests which ensure the combine does not fire in the absense of nsz. See https://reviews.llvm.org/D90901 for a similar discussion on X86. Differential Revision: https://reviews.llvm.org/D109525
1 parent 41def32 commit 921e89c

File tree

4 files changed

+93
-7
lines changed

4 files changed

+93
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3111,6 +3111,8 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
31113111

31123112
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
31133113
SDNodeFlags Flags;
3114+
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
3115+
Flags.copyFMF(*FPOp);
31143116

31153117
SDValue Op = getValue(I.getOperand(0));
31163118
SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19117,7 +19117,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
1911719117
if (isMergePassthruOpcode(NewOp))
1911819118
Operands.push_back(DAG.getUNDEF(VT));
1911919119

19120-
return DAG.getNode(NewOp, DL, VT, Operands);
19120+
return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
1912119121
}
1912219122

1912319123
// If a fixed length vector operation has no side effects when applied to

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
275275
return N->hasOneUse();
276276
}]>;
277277

278+
def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
279+
(AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
280+
return N->getFlags().hasNoSignedZeros();
281+
}]>;
282+
278283
def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
279284
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
280285
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -536,7 +541,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
536541
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
537542

538543
// Zd = -(Za + Zn * Zm)
539-
def : Pat<(AArch64fneg_mt PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
544+
// (with nsz neg.)
545+
def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
540546
(!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
541547

542548
// Zda = Zda + Zn * Zm

llvm/test/CodeGen/AArch64/sve-fp-combine.ll

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ define <vscale x 8 x half> @fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x
549549
; CHECK-NEXT: ret
550550
%mul = fmul contract <vscale x 8 x half> %m1, %m2
551551
%add = fadd contract <vscale x 8 x half> %mul, %acc
552-
%res = fneg contract <vscale x 8 x half> %add
552+
%res = fneg contract nsz <vscale x 8 x half> %add
553553
ret <vscale x 8 x half> %res
554554
}
555555

@@ -561,7 +561,7 @@ define <vscale x 4 x half> @fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale
561561
; CHECK-NEXT: ret
562562
%mul = fmul contract <vscale x 4 x half> %m1, %m2
563563
%add = fadd contract <vscale x 4 x half> %mul, %acc
564-
%res = fneg contract <vscale x 4 x half> %add
564+
%res = fneg contract nsz <vscale x 4 x half> %add
565565
ret <vscale x 4 x half> %res
566566
}
567567

@@ -573,7 +573,7 @@ define <vscale x 2 x half> @fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale
573573
; CHECK-NEXT: ret
574574
%mul = fmul contract <vscale x 2 x half> %m1, %m2
575575
%add = fadd contract <vscale x 2 x half> %mul, %acc
576-
%res = fneg contract <vscale x 2 x half> %add
576+
%res = fneg contract nsz <vscale x 2 x half> %add
577577
ret <vscale x 2 x half> %res
578578
}
579579

@@ -585,7 +585,7 @@ define <vscale x 4 x float> @fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale
585585
; CHECK-NEXT: ret
586586
%mul = fmul contract <vscale x 4 x float> %m1, %m2
587587
%add = fadd contract <vscale x 4 x float> %mul, %acc
588-
%res = fneg contract <vscale x 4 x float> %add
588+
%res = fneg contract nsz <vscale x 4 x float> %add
589589
ret <vscale x 4 x float> %res
590590
}
591591

@@ -597,7 +597,7 @@ define <vscale x 2 x float> @fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vsca
597597
; CHECK-NEXT: ret
598598
%mul = fmul contract <vscale x 2 x float> %m1, %m2
599599
%add = fadd contract <vscale x 2 x float> %mul, %acc
600-
%res = fneg contract <vscale x 2 x float> %add
600+
%res = fneg contract nsz <vscale x 2 x float> %add
601601
ret <vscale x 2 x float> %res
602602
}
603603

@@ -606,6 +606,84 @@ define <vscale x 2 x double> @fnmla_d_reversed(<vscale x 2 x double> %acc, <vsca
606606
; CHECK: // %bb.0:
607607
; CHECK-NEXT: ptrue p0.d
608608
; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d
609+
; CHECK-NEXT: ret
610+
%mul = fmul contract <vscale x 2 x double> %m1, %m2
611+
%add = fadd contract <vscale x 2 x double> %mul, %acc
612+
%res = fneg contract nsz <vscale x 2 x double> %add
613+
ret <vscale x 2 x double> %res
614+
}
615+
616+
define <vscale x 8 x half> @signed_zeros_negtest_fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
617+
; CHECK-LABEL: signed_zeros_negtest_fnmla_h_reversed:
618+
; CHECK: // %bb.0:
619+
; CHECK-NEXT: ptrue p0.h
620+
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
621+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
622+
; CHECK-NEXT: ret
623+
%mul = fmul contract <vscale x 8 x half> %m1, %m2
624+
%add = fadd contract <vscale x 8 x half> %mul, %acc
625+
%res = fneg contract <vscale x 8 x half> %add
626+
ret <vscale x 8 x half> %res
627+
}
628+
629+
define <vscale x 4 x half> @signed_zeros_negtest_fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
630+
; CHECK-LABEL: signed_zeros_negtest_fnmla_hx4_reversed:
631+
; CHECK: // %bb.0:
632+
; CHECK-NEXT: ptrue p0.s
633+
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
634+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
635+
; CHECK-NEXT: ret
636+
%mul = fmul contract <vscale x 4 x half> %m1, %m2
637+
%add = fadd contract <vscale x 4 x half> %mul, %acc
638+
%res = fneg contract <vscale x 4 x half> %add
639+
ret <vscale x 4 x half> %res
640+
}
641+
642+
define <vscale x 2 x half> @signed_zeros_negtest_fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
643+
; CHECK-LABEL: signed_zeros_negtest_fnmla_hx2_reversed:
644+
; CHECK: // %bb.0:
645+
; CHECK-NEXT: ptrue p0.d
646+
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
647+
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
648+
; CHECK-NEXT: ret
649+
%mul = fmul contract <vscale x 2 x half> %m1, %m2
650+
%add = fadd contract <vscale x 2 x half> %mul, %acc
651+
%res = fneg contract <vscale x 2 x half> %add
652+
ret <vscale x 2 x half> %res
653+
}
654+
655+
define <vscale x 4 x float> @signed_zeros_negtest_fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
656+
; CHECK-LABEL: signed_zeros_negtest_fnmla_s_reversed:
657+
; CHECK: // %bb.0:
658+
; CHECK-NEXT: ptrue p0.s
659+
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
660+
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
661+
; CHECK-NEXT: ret
662+
%mul = fmul contract <vscale x 4 x float> %m1, %m2
663+
%add = fadd contract <vscale x 4 x float> %mul, %acc
664+
%res = fneg contract <vscale x 4 x float> %add
665+
ret <vscale x 4 x float> %res
666+
}
667+
668+
define <vscale x 2 x float> @signed_zeros_negtest_fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
669+
; CHECK-LABEL: signed_zeros_negtest_fnmla_sx2_reversed:
670+
; CHECK: // %bb.0:
671+
; CHECK-NEXT: ptrue p0.d
672+
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
673+
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
674+
; CHECK-NEXT: ret
675+
%mul = fmul contract <vscale x 2 x float> %m1, %m2
676+
%add = fadd contract <vscale x 2 x float> %mul, %acc
677+
%res = fneg contract <vscale x 2 x float> %add
678+
ret <vscale x 2 x float> %res
679+
}
680+
681+
define <vscale x 2 x double> @signed_zeros_negtest_fnmla_d_reversed(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
682+
; CHECK-LABEL: signed_zeros_negtest_fnmla_d_reversed:
683+
; CHECK: // %bb.0:
684+
; CHECK-NEXT: ptrue p0.d
685+
; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d
686+
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
609687
; CHECK-NEXT: ret
610688
%mul = fmul contract <vscale x 2 x double> %m1, %m2
611689
%add = fadd contract <vscale x 2 x double> %mul, %acc

0 commit comments

Comments
 (0)