Skip to content

Commit c799ba0

Browse files
committed
[AMDGPU] Fix canonicalization of truncated values.
We were relying on roundings to implicitly canonicalize, which is generally safe, except with roundings that may be optimized away. Fixes #82937.
1 parent 113052b commit c799ba0

File tree

6 files changed

+251
-70
lines changed

6 files changed

+251
-70
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2398,6 +2398,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
23982398
case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break;
23992399

24002400
// Unary FP Operations
2401+
case ISD::FREEZE:
24012402
case ISD::FABS:
24022403
case ISD::FCBRT:
24032404
case ISD::FCEIL:

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12416,7 +12416,7 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
1241612416
}
1241712417

1241812418
bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
12419-
unsigned MaxDepth) const {
12419+
bool &Trunc, unsigned MaxDepth) const {
1242012420
unsigned Opcode = Op.getOpcode();
1242112421
if (Opcode == ISD::FCANONICALIZE)
1242212422
return true;
@@ -12450,7 +12450,6 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
1245012450
case ISD::FSQRT:
1245112451
case ISD::FDIV:
1245212452
case ISD::FREM:
12453-
case ISD::FP_ROUND:
1245412453
case ISD::FP_EXTEND:
1245512454
case ISD::FLDEXP:
1245612455
case AMDGPUISD::FMUL_LEGACY:
@@ -12473,12 +12472,17 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
1247312472
case AMDGPUISD::CVT_F32_UBYTE3:
1247412473
return true;
1247512474

12475+
case ISD::FP_ROUND:
12476+
if (Op.getConstantOperandVal(1))
12477+
Trunc = true;
12478+
return true;
12479+
1247612480
// It can/will be lowered or combined as a bit operation.
1247712481
// Need to check their input recursively to handle.
1247812482
case ISD::FNEG:
1247912483
case ISD::FABS:
1248012484
case ISD::FCOPYSIGN:
12481-
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
12485+
return isCanonicalized(DAG, Op.getOperand(0), Trunc, MaxDepth - 1);
1248212486

1248312487
case ISD::FSIN:
1248412488
case ISD::FCOS:
@@ -12513,47 +12517,48 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
1251312517

1251412518
// FIXME: Does this apply with clamp? It's implemented with max.
1251512519
for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
12516-
if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
12520+
if (!isCanonicalized(DAG, Op.getOperand(I), Trunc, MaxDepth - 1))
1251712521
return false;
1251812522
}
1251912523

1252012524
return true;
1252112525
}
1252212526
case ISD::SELECT: {
12523-
return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
12524-
isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
12527+
return isCanonicalized(DAG, Op.getOperand(1), Trunc, MaxDepth - 1) &&
12528+
isCanonicalized(DAG, Op.getOperand(2), Trunc, MaxDepth - 1);
1252512529
}
1252612530
case ISD::BUILD_VECTOR: {
1252712531
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
1252812532
SDValue SrcOp = Op.getOperand(i);
12529-
if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
12533+
if (!isCanonicalized(DAG, SrcOp, Trunc, MaxDepth - 1))
1253012534
return false;
1253112535
}
1253212536

1253312537
return true;
1253412538
}
1253512539
case ISD::EXTRACT_VECTOR_ELT:
1253612540
case ISD::EXTRACT_SUBVECTOR: {
12537-
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
12541+
return isCanonicalized(DAG, Op.getOperand(0), Trunc, MaxDepth - 1);
1253812542
}
1253912543
case ISD::INSERT_VECTOR_ELT: {
12540-
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
12541-
isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
12544+
return isCanonicalized(DAG, Op.getOperand(0), Trunc, MaxDepth - 1) &&
12545+
isCanonicalized(DAG, Op.getOperand(1), Trunc, MaxDepth - 1);
1254212546
}
1254312547
case ISD::UNDEF:
1254412548
// Could be anything.
1254512549
return false;
1254612550

1254712551
case ISD::BITCAST:
12548-
return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
12552+
return isCanonicalized(DAG, Op.getOperand(0), Trunc, MaxDepth - 1);
1254912553
case ISD::TRUNCATE: {
1255012554
// Hack round the mess we make when legalizing extract_vector_elt
1255112555
if (Op.getValueType() == MVT::i16) {
1255212556
SDValue TruncSrc = Op.getOperand(0);
1255312557
if (TruncSrc.getValueType() == MVT::i32 &&
1255412558
TruncSrc.getOpcode() == ISD::BITCAST &&
1255512559
TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
12556-
return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
12560+
return isCanonicalized(DAG, TruncSrc.getOperand(0), Trunc,
12561+
MaxDepth - 1);
1255712562
}
1255812563
}
1255912564
return false;
@@ -12831,7 +12836,10 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
1283112836
}
1283212837
}
1283312838

12834-
return isCanonicalized(DAG, N0) ? N0 : SDValue();
12839+
bool Trunc = false;
12840+
return isCanonicalized(DAG, N0, Trunc)
12841+
? Trunc ? DAG.getNode(ISD::FREEZE, SDLoc(N), VT, N0) : N0
12842+
: SDValue();
1283512843
}
1283612844

1283712845
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
516516
Register N1) const override;
517517

518518
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
519+
unsigned MaxDepth = 5) const {
520+
bool Trunc;
521+
return isCanonicalized(DAG, Op, Trunc, MaxDepth);
522+
}
523+
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, bool &Trunc,
519524
unsigned MaxDepth = 5) const;
520525
bool isCanonicalized(Register Reg, MachineFunction &MF,
521526
unsigned MaxDepth = 5) const;

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26818,11 +26818,19 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
2681826818
; GCN-LABEL: v_canonicalize_bf16:
2681926819
; GCN: ; %bb.0:
2682026820
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26821+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
26822+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
26823+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
26824+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2682126825
; GCN-NEXT: s_setpc_b64 s[30:31]
2682226826
;
2682326827
; GFX7-LABEL: v_canonicalize_bf16:
2682426828
; GFX7: ; %bb.0:
2682526829
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26830+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
26831+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
26832+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
26833+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2682626834
; GFX7-NEXT: s_setpc_b64 s[30:31]
2682726835
;
2682826836
; GFX8-LABEL: v_canonicalize_bf16:

0 commit comments

Comments
 (0)