Skip to content

Commit 78fd3d9

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:c464fea779c8 into amd-gfx:d5cba0d12a48
Local branch amd-gfx d5cba0d Merged main:ed1d29028492 into amd-gfx:ece3beeeb394 Remote branch main c464fea [DAG] Constant fold FMAD (llvm#69324)
2 parents d5cba0d + c464fea commit 78fd3d9

File tree

4 files changed

+33
-30
lines changed

4 files changed

+33
-30
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 477837
19+
#define LLVM_MAIN_REVISION 477838
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ namespace {
495495
SDValue visitFSUB(SDNode *N);
496496
SDValue visitFMUL(SDNode *N);
497497
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
498+
SDValue visitFMAD(SDNode *N);
498499
SDValue visitFDIV(SDNode *N);
499500
SDValue visitFREM(SDNode *N);
500501
SDValue visitFSQRT(SDNode *N);
@@ -2000,6 +2001,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
20002001
case ISD::FSUB: return visitFSUB(N);
20012002
case ISD::FMUL: return visitFMUL(N);
20022003
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2004+
case ISD::FMAD: return visitFMAD(N);
20032005
case ISD::FDIV: return visitFDIV(N);
20042006
case ISD::FREM: return visitFREM(N);
20052007
case ISD::FSQRT: return visitFSQRT(N);
@@ -16752,6 +16754,21 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
1675216754
return SDValue();
1675316755
}
1675416756

16757+
SDValue DAGCombiner::visitFMAD(SDNode *N) {
16758+
SDValue N0 = N->getOperand(0);
16759+
SDValue N1 = N->getOperand(1);
16760+
SDValue N2 = N->getOperand(2);
16761+
EVT VT = N->getValueType(0);
16762+
SDLoc DL(N);
16763+
16764+
// Constant fold FMAD.
16765+
if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
16766+
isa<ConstantFPSDNode>(N2))
16767+
return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
16768+
16769+
return SDValue();
16770+
}
16771+
1675516772
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
1675616773
// reciprocal.
1675716774
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7075,7 +7075,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
70757075
"Operand is DELETED_NODE!");
70767076
// Perform various simplifications.
70777077
switch (Opcode) {
7078-
case ISD::FMA: {
7078+
case ISD::FMA:
7079+
case ISD::FMAD: {
70797080
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
70807081
assert(N1.getValueType() == VT && N2.getValueType() == VT &&
70817082
N3.getValueType() == VT && "FMA types must match!");
@@ -7086,7 +7087,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
70867087
APFloat V1 = N1CFP->getValueAPF();
70877088
const APFloat &V2 = N2CFP->getValueAPF();
70887089
const APFloat &V3 = N3CFP->getValueAPF();
7089-
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
7090+
if (Opcode == ISD::FMAD) {
7091+
V1.multiply(V2, APFloat::rmNearestTiesToEven);
7092+
V1.add(V3, APFloat::rmNearestTiesToEven);
7093+
} else
7094+
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
70907095
return getConstantFP(V1, DL, VT);
70917096
}
70927097
break;

llvm/test/CodeGen/AMDGPU/udiv.ll

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2619,39 +2619,20 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
26192619
; VI-LABEL: v_test_udiv64_mulhi_fold:
26202620
; VI: ; %bb.0:
26212621
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2622-
; VI-NEXT: v_mov_b32_e32 v2, 0x4f800000
2623-
; VI-NEXT: v_madak_f32 v2, 0, v2, 0x47c35000
2624-
; VI-NEXT: v_rcp_f32_e32 v2, v2
2622+
; VI-NEXT: v_mov_b32_e32 v4, 0xa7c5
2623+
; VI-NEXT: v_mul_u32_u24_e32 v3, 0x500, v4
2624+
; VI-NEXT: v_mul_hi_u32_u24_e32 v2, 0x500, v4
2625+
; VI-NEXT: v_add_u32_e32 v3, vcc, 0x4237, v3
2626+
; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v2, vcc
2627+
; VI-NEXT: v_add_u32_e32 v6, vcc, 0xa9000000, v3
26252628
; VI-NEXT: s_mov_b32 s6, 0xfffe7960
2626-
; VI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
2627-
; VI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
2628-
; VI-NEXT: v_trunc_f32_e32 v3, v3
2629-
; VI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
2630-
; VI-NEXT: v_cvt_u32_f32_e32 v6, v2
2631-
; VI-NEXT: v_cvt_u32_f32_e32 v7, v3
2632-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
2633-
; VI-NEXT: v_mul_lo_u32 v4, v7, s6
2634-
; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v6
2635-
; VI-NEXT: v_add_u32_e32 v8, vcc, v3, v4
2636-
; VI-NEXT: v_mul_hi_u32 v5, v6, v2
2637-
; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0
2638-
; VI-NEXT: v_add_u32_e32 v9, vcc, v5, v3
2639-
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
2640-
; VI-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc
2641-
; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0
2642-
; VI-NEXT: v_add_u32_e32 v2, vcc, v9, v2
2643-
; VI-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc
2644-
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v5, vcc
2645-
; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4
2646-
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2647-
; VI-NEXT: v_add_u32_e32 v6, vcc, v6, v2
2648-
; VI-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
26492629
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
2630+
; VI-NEXT: v_addc_u32_e32 v7, vcc, v5, v4, vcc
26502631
; VI-NEXT: v_mul_lo_u32 v4, v7, s6
26512632
; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v6
2633+
; VI-NEXT: v_mul_hi_u32 v8, v6, v2
26522634
; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3
26532635
; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
2654-
; VI-NEXT: v_mul_hi_u32 v8, v6, v2
26552636
; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3
26562637
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
26572638
; VI-NEXT: v_addc_u32_e32 v9, vcc, 0, v4, vcc

0 commit comments

Comments
 (0)