@@ -21986,15 +21986,25 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
21986
21986
// Extend everything to 80 bits to force it to be done on x87.
21987
21987
// TODO: Are there any fast-math-flags to propagate here?
21988
21988
if (IsStrict) {
21989
- SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
21990
- {Chain, Fild, Fudge});
21989
+ unsigned Opc = ISD::STRICT_FADD;
21990
+ // Windows needs the precision control changed to 80bits around this add.
21991
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
21992
+ Opc = X86ISD::STRICT_FP80_ADD;
21993
+
21994
+ SDValue Add =
21995
+ DAG.getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
21991
21996
// STRICT_FP_ROUND can't handle equal types.
21992
21997
if (DstVT == MVT::f80)
21993
21998
return Add;
21994
21999
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
21995
22000
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
21996
22001
}
21997
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
22002
+ unsigned Opc = ISD::FADD;
22003
+ // Windows needs the precision control changed to 80bits around this add.
22004
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
22005
+ Opc = X86ISD::FP80_ADD;
22006
+
22007
+ SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
21998
22008
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
21999
22009
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
22000
22010
}
@@ -34790,6 +34800,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
34790
34800
NODE_NAME_CASE(AESDECWIDE256KL)
34791
34801
NODE_NAME_CASE(CMPCCXADD)
34792
34802
NODE_NAME_CASE(TESTUI)
34803
+ NODE_NAME_CASE(FP80_ADD)
34804
+ NODE_NAME_CASE(STRICT_FP80_ADD)
34793
34805
}
34794
34806
return nullptr;
34795
34807
#undef NODE_NAME_CASE
@@ -37300,6 +37312,69 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
37300
37312
return BB;
37301
37313
}
37302
37314
37315
+ case X86::FP80_ADDr:
37316
+ case X86::FP80_ADDm32: {
37317
+ // Change the floating point control register to use double extended
37318
+ // precision when performing the addition.
37319
+ int OrigCWFrameIdx =
37320
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
37321
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
37322
+ OrigCWFrameIdx);
37323
+
37324
+ // Load the old value of the control word...
37325
+ Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
37326
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
37327
+ OrigCWFrameIdx);
37328
+
37329
+ // OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
37330
+ // precision.
37331
+ Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
37332
+ BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
37333
+ .addReg(OldCW, RegState::Kill)
37334
+ .addImm(0x300);
37335
+
37336
+ // Extract to 16 bits.
37337
+ Register NewCW16 =
37338
+ MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
37339
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
37340
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
37341
+
37342
+ // Prepare memory for FLDCW.
37343
+ int NewCWFrameIdx =
37344
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
37345
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
37346
+ NewCWFrameIdx)
37347
+ .addReg(NewCW16, RegState::Kill);
37348
+
37349
+ // Reload the modified control word now...
37350
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
37351
+ NewCWFrameIdx);
37352
+
37353
+ // Do the addition.
37354
+ if (MI.getOpcode() == X86::FP80_ADDr) {
37355
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80))
37356
+ .add(MI.getOperand(0))
37357
+ .add(MI.getOperand(1))
37358
+ .add(MI.getOperand(2));
37359
+ } else {
37360
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80m32))
37361
+ .add(MI.getOperand(0))
37362
+ .add(MI.getOperand(1))
37363
+ .add(MI.getOperand(2))
37364
+ .add(MI.getOperand(3))
37365
+ .add(MI.getOperand(4))
37366
+ .add(MI.getOperand(5))
37367
+ .add(MI.getOperand(6));
37368
+ }
37369
+
37370
+ // Reload the original control word now.
37371
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
37372
+ OrigCWFrameIdx);
37373
+
37374
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
37375
+ return BB;
37376
+ }
37377
+
37303
37378
case X86::FP32_TO_INT16_IN_MEM:
37304
37379
case X86::FP32_TO_INT32_IN_MEM:
37305
37380
case X86::FP32_TO_INT64_IN_MEM:
0 commit comments