Skip to content

DAG: Fix ABI lowering with FP promote in strictfp functions #74405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 37 additions & 22 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CC);

/// getCopyFromParts - Create a value that contains the specified legal parts
Expand All @@ -163,6 +164,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue
getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CC = std::nullopt,
std::optional<ISD::NodeType> AssertOp = std::nullopt) {
// Let the target assemble the parts if it wants to
Expand All @@ -173,7 +175,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,

if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
InChain, CC);

assert(NumParts > 0 && "No parts to assemble!");
SDValue Val = Parts[0];
Expand All @@ -194,10 +196,10 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);

if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT, V);
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V,
InChain);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2,
PartVT, HalfVT, V, InChain);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
Expand All @@ -213,7 +215,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
OddVT, V, CC);
OddVT, V, InChain, CC);

// Combine the round and odd parts.
Lo = Val;
Expand Down Expand Up @@ -243,7 +245,8 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V,
InChain, CC);
}
}

Expand Down Expand Up @@ -283,10 +286,20 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,

if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
// FP_ROUND's are always exact here.
if (ValueVT.bitsLT(Val.getValueType()))
return DAG.getNode(
ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
if (ValueVT.bitsLT(Val.getValueType())) {

SDValue NoChange =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));

if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr(
llvm::Attribute::StrictFP)) {
return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
DAG.getVTList(ValueVT, MVT::Other), InChain, Val,
NoChange);
}

return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, NoChange);
}

return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
Expand Down Expand Up @@ -324,6 +337,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
SDValue InChain,
std::optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
Expand Down Expand Up @@ -362,17 +376,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
PartVT, IntermediateVT, V, CallConv);
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT,
V, InChain, CallConv);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
assert(NumParts % NumIntermediates == 0 &&
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT, V, CallConv);
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT,
IntermediateVT, V, InChain, CallConv);
}

// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
Expand Down Expand Up @@ -926,7 +940,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}

Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
RegisterVT, ValueVT, V, CallConv);
RegisterVT, ValueVT, V, Chain, CallConv);
Part += NumRegs;
Parts.clear();
}
Expand Down Expand Up @@ -10628,9 +10642,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);

ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
CLI.CallConv, AssertOp));
ReturnValues.push_back(getCopyFromParts(
CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr,
CLI.Chain, CLI.CallConv, AssertOp));
CurReg += NumRegs;
}

Expand Down Expand Up @@ -11109,8 +11123,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
std::optional<ISD::NodeType> AssertOp;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
nullptr, F.getCallingConv(), AssertOp);
SDValue ArgValue =
getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, NewRoot,
F.getCallingConv(), AssertOp);

MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
Expand Down Expand Up @@ -11182,7 +11197,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertZext;

ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr,
PartVT, VT, nullptr, NewRoot,
F.getCallingConv(), AssertOp));
}

Expand Down
51 changes: 5 additions & 46 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1094,52 +1094,11 @@ define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
ret <4 x i1> %1
}

define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
; GFX7CHECK-LABEL: isnan_bf16_strictfp:
; GFX7CHECK: ; %bb.0:
; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX8CHECK-LABEL: isnan_bf16_strictfp:
; GFX8CHECK: ; %bb.0:
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX9CHECK-LABEL: isnan_bf16_strictfp:
; GFX9CHECK: ; %bb.0:
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX10CHECK-LABEL: isnan_bf16_strictfp:
; GFX10CHECK: ; %bb.0:
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
;
; GFX11CHECK-LABEL: isnan_bf16_strictfp:
; GFX11CHECK: ; %bb.0:
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
%1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
ret i1 %1
}
; FIXME: Broken for gfx6/7
; define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
; %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
; ret i1 %1
; }

define i1 @isinf_bf16(bfloat %x) nounwind {
; GFX7CHECK-LABEL: isinf_bf16:
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
Expand Down
110 changes: 0 additions & 110 deletions llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll

This file was deleted.

Loading