Skip to content

Commit 16fc3dc

Browse files
committed
[DAG] fdiv X, c2 -> fmul X, 1/c2
This moves the combine of fdiv by constant to fmul out of an 'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block, so that it still triggers if the divide is exact. An extra check for Recip.isDenormal() is added as multiple places make reference to it being unsafe or slow on certain platforms.
1 parent 264b1b2 commit 16fc3dc

File tree

8 files changed

+186
-164
lines changed

8 files changed

+186
-164
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+22-19
Original file line numberDiff line numberDiff line change
@@ -17271,26 +17271,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1727117271
if (SDValue V = combineRepeatedFPDivisors(N))
1727217272
return V;
1727317273

17274-
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17275-
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17276-
if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17277-
// Compute the reciprocal 1.0 / c2.
17278-
const APFloat &N1APF = N1CFP->getValueAPF();
17279-
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17280-
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17281-
// Only do the transform if the reciprocal is a legal fp immediate that
17282-
// isn't too nasty (eg NaN, denormal, ...).
17283-
if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17284-
(!LegalOperations ||
17285-
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17286-
// backend)... we should handle this gracefully after Legalize.
17287-
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17288-
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17289-
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17290-
return DAG.getNode(ISD::FMUL, DL, VT, N0,
17291-
DAG.getConstantFP(Recip, DL, VT));
17292-
}
17274+
// fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17275+
// the loss is acceptable with AllowReciprocal.
17276+
if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17277+
// Compute the reciprocal 1.0 / c2.
17278+
const APFloat &N1APF = N1CFP->getValueAPF();
17279+
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17280+
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17281+
// Only do the transform if the reciprocal is a legal fp immediate that
17282+
// isn't too nasty (eg NaN, denormal, ...).
17283+
if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17284+
(st == APFloat::opInexact &&
17285+
(Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17286+
(!LegalOperations ||
17287+
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17288+
// backend)... we should handle this gracefully after Legalize.
17289+
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17290+
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17291+
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17292+
return DAG.getNode(ISD::FMUL, DL, VT, N0,
17293+
DAG.getConstantFP(Recip, DL, VT));
17294+
}
1729317295

17296+
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
1729417297
// If this FDIV is part of a reciprocal square root, it may be folded
1729517298
// into a target-specific square root estimate instruction.
1729617299
if (N1.getOpcode() == ISD::FSQRT) {

llvm/test/CodeGen/AArch64/fcvt-fixed.ll

+16-16
Original file line numberDiff line numberDiff line change
@@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
412412
; CHECK-NO16-LABEL: scvtf_f16_i32_7:
413413
; CHECK-NO16: // %bb.0:
414414
; CHECK-NO16-NEXT: scvtf s1, w0
415-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
415+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
416416
; CHECK-NO16-NEXT: fcvt h1, s1
417417
; CHECK-NO16-NEXT: fcvt s1, h1
418-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
418+
; CHECK-NO16-NEXT: fmul s0, s1, s0
419419
; CHECK-NO16-NEXT: fcvt h0, s0
420420
; CHECK-NO16-NEXT: ret
421421
;
@@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
432432
; CHECK-NO16-LABEL: scvtf_f16_i32_15:
433433
; CHECK-NO16: // %bb.0:
434434
; CHECK-NO16-NEXT: scvtf s1, w0
435-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
435+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
436436
; CHECK-NO16-NEXT: fcvt h1, s1
437437
; CHECK-NO16-NEXT: fcvt s1, h1
438-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
438+
; CHECK-NO16-NEXT: fmul s0, s1, s0
439439
; CHECK-NO16-NEXT: fcvt h0, s0
440440
; CHECK-NO16-NEXT: ret
441441
;
@@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
452452
; CHECK-NO16-LABEL: scvtf_f16_i64_7:
453453
; CHECK-NO16: // %bb.0:
454454
; CHECK-NO16-NEXT: scvtf s1, x0
455-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
455+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
456456
; CHECK-NO16-NEXT: fcvt h1, s1
457457
; CHECK-NO16-NEXT: fcvt s1, h1
458-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
458+
; CHECK-NO16-NEXT: fmul s0, s1, s0
459459
; CHECK-NO16-NEXT: fcvt h0, s0
460460
; CHECK-NO16-NEXT: ret
461461
;
@@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
472472
; CHECK-NO16-LABEL: scvtf_f16_i64_15:
473473
; CHECK-NO16: // %bb.0:
474474
; CHECK-NO16-NEXT: scvtf s1, x0
475-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
475+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
476476
; CHECK-NO16-NEXT: fcvt h1, s1
477477
; CHECK-NO16-NEXT: fcvt s1, h1
478-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
478+
; CHECK-NO16-NEXT: fmul s0, s1, s0
479479
; CHECK-NO16-NEXT: fcvt h0, s0
480480
; CHECK-NO16-NEXT: ret
481481
;
@@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
574574
; CHECK-NO16-LABEL: ucvtf_f16_i32_7:
575575
; CHECK-NO16: // %bb.0:
576576
; CHECK-NO16-NEXT: ucvtf s1, w0
577-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
577+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
578578
; CHECK-NO16-NEXT: fcvt h1, s1
579579
; CHECK-NO16-NEXT: fcvt s1, h1
580-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
580+
; CHECK-NO16-NEXT: fmul s0, s1, s0
581581
; CHECK-NO16-NEXT: fcvt h0, s0
582582
; CHECK-NO16-NEXT: ret
583583
;
@@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
594594
; CHECK-NO16-LABEL: ucvtf_f16_i32_15:
595595
; CHECK-NO16: // %bb.0:
596596
; CHECK-NO16-NEXT: ucvtf s1, w0
597-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
597+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
598598
; CHECK-NO16-NEXT: fcvt h1, s1
599599
; CHECK-NO16-NEXT: fcvt s1, h1
600-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
600+
; CHECK-NO16-NEXT: fmul s0, s1, s0
601601
; CHECK-NO16-NEXT: fcvt h0, s0
602602
; CHECK-NO16-NEXT: ret
603603
;
@@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
614614
; CHECK-NO16-LABEL: ucvtf_f16_i64_7:
615615
; CHECK-NO16: // %bb.0:
616616
; CHECK-NO16-NEXT: ucvtf s1, x0
617-
; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
617+
; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
618618
; CHECK-NO16-NEXT: fcvt h1, s1
619619
; CHECK-NO16-NEXT: fcvt s1, h1
620-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
620+
; CHECK-NO16-NEXT: fmul s0, s1, s0
621621
; CHECK-NO16-NEXT: fcvt h0, s0
622622
; CHECK-NO16-NEXT: ret
623623
;
@@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
634634
; CHECK-NO16-LABEL: ucvtf_f16_i64_15:
635635
; CHECK-NO16: // %bb.0:
636636
; CHECK-NO16-NEXT: ucvtf s1, x0
637-
; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
637+
; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
638638
; CHECK-NO16-NEXT: fcvt h1, s1
639639
; CHECK-NO16-NEXT: fcvt s1, h1
640-
; CHECK-NO16-NEXT: fdiv s0, s1, s0
640+
; CHECK-NO16-NEXT: fmul s0, s1, s0
641641
; CHECK-NO16-NEXT: fcvt h0, s0
642642
; CHECK-NO16-NEXT: ret
643643
;

llvm/test/CodeGen/AArch64/fdiv-const.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define float @divf32_2(float %a) nounwind {
55
; CHECK-LABEL: divf32_2:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: fmov s1, #2.00000000
8-
; CHECK-NEXT: fdiv s0, s0, s1
7+
; CHECK-NEXT: fmov s1, #0.50000000
8+
; CHECK-NEXT: fmul s0, s0, s1
99
; CHECK-NEXT: ret
1010
%r = fdiv float %a, 2.0
1111
ret float %r
@@ -46,8 +46,8 @@ define float @divf32_p75_arcp(float %a) nounwind {
4646
define half @divf16_2(half %a) nounwind {
4747
; CHECK-LABEL: divf16_2:
4848
; CHECK: // %bb.0:
49-
; CHECK-NEXT: fmov h1, #2.00000000
50-
; CHECK-NEXT: fdiv h0, h0, h1
49+
; CHECK-NEXT: fmov h1, #0.50000000
50+
; CHECK-NEXT: fmul h0, h0, h1
5151
; CHECK-NEXT: ret
5252
%r = fdiv half %a, 2.0
5353
ret half %r
@@ -67,9 +67,9 @@ define half @divf16_32768(half %a) nounwind {
6767
define half @divf16_32768_arcp(half %a) nounwind {
6868
; CHECK-LABEL: divf16_32768_arcp:
6969
; CHECK: // %bb.0:
70-
; CHECK-NEXT: mov w8, #512 // =0x200
70+
; CHECK-NEXT: mov w8, #30720 // =0x7800
7171
; CHECK-NEXT: fmov h1, w8
72-
; CHECK-NEXT: fmul h0, h0, h1
72+
; CHECK-NEXT: fdiv h0, h0, h1
7373
; CHECK-NEXT: ret
7474
%r = fdiv arcp half %a, 32768.0
7575
ret half %r
@@ -78,8 +78,8 @@ define half @divf16_32768_arcp(half %a) nounwind {
7878
define double @divf64_2(double %a) nounwind {
7979
; CHECK-LABEL: divf64_2:
8080
; CHECK: // %bb.0:
81-
; CHECK-NEXT: fmov d1, #2.00000000
82-
; CHECK-NEXT: fdiv d0, d0, d1
81+
; CHECK-NEXT: fmov d1, #0.50000000
82+
; CHECK-NEXT: fmul d0, d0, d1
8383
; CHECK-NEXT: ret
8484
%r = fdiv double %a, 2.0
8585
ret double %r

llvm/test/CodeGen/AArch64/frem-power2.ll

+61-46
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
define float @frem2(float %x) {
66
; CHECK-SD-LABEL: frem2:
77
; CHECK-SD: // %bb.0: // %entry
8-
; CHECK-SD-NEXT: fmov s1, #2.00000000
8+
; CHECK-SD-NEXT: fmov s1, #0.50000000
99
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
10-
; CHECK-SD-NEXT: fdiv s2, s0, s1
11-
; CHECK-SD-NEXT: frintz s2, s2
12-
; CHECK-SD-NEXT: fmsub s1, s2, s1, s0
10+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
11+
; CHECK-SD-NEXT: fmul s1, s0, s1
12+
; CHECK-SD-NEXT: frintz s1, s1
13+
; CHECK-SD-NEXT: fmadd s1, s1, s2, s0
1314
; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24
1415
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
1516
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
@@ -27,10 +28,11 @@ entry:
2728
define float @frem2_nsz(float %x) {
2829
; CHECK-SD-LABEL: frem2_nsz:
2930
; CHECK-SD: // %bb.0: // %entry
30-
; CHECK-SD-NEXT: fmov s1, #2.00000000
31-
; CHECK-SD-NEXT: fdiv s2, s0, s1
32-
; CHECK-SD-NEXT: frintz s2, s2
33-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
31+
; CHECK-SD-NEXT: fmov s1, #0.50000000
32+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
33+
; CHECK-SD-NEXT: fmul s1, s0, s1
34+
; CHECK-SD-NEXT: frintz s1, s1
35+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
3436
; CHECK-SD-NEXT: ret
3537
;
3638
; CHECK-GI-LABEL: frem2_nsz:
@@ -65,10 +67,11 @@ define float @frem2_abs(float %x) {
6567
; CHECK-SD-LABEL: frem2_abs:
6668
; CHECK-SD: // %bb.0: // %entry
6769
; CHECK-SD-NEXT: fabs s0, s0
68-
; CHECK-SD-NEXT: fmov s1, #2.00000000
69-
; CHECK-SD-NEXT: fdiv s2, s0, s1
70-
; CHECK-SD-NEXT: frintz s2, s2
71-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
70+
; CHECK-SD-NEXT: fmov s1, #0.50000000
71+
; CHECK-SD-NEXT: fmov s2, #-2.00000000
72+
; CHECK-SD-NEXT: fmul s1, s0, s1
73+
; CHECK-SD-NEXT: frintz s1, s1
74+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
7275
; CHECK-SD-NEXT: ret
7376
;
7477
; CHECK-GI-LABEL: frem2_abs:
@@ -85,9 +88,9 @@ entry:
8588
define half @hrem2_nsz(half %x) {
8689
; CHECK-SD-LABEL: hrem2_nsz:
8790
; CHECK-SD: // %bb.0: // %entry
88-
; CHECK-SD-NEXT: fmov h1, #2.00000000
91+
; CHECK-SD-NEXT: fmov h1, #0.50000000
8992
; CHECK-SD-NEXT: fmov h2, #-2.00000000
90-
; CHECK-SD-NEXT: fdiv h1, h0, h1
93+
; CHECK-SD-NEXT: fmul h1, h0, h1
9194
; CHECK-SD-NEXT: frintz h1, h1
9295
; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
9396
; CHECK-SD-NEXT: ret
@@ -112,10 +115,11 @@ entry:
112115
define double @drem2_nsz(double %x) {
113116
; CHECK-SD-LABEL: drem2_nsz:
114117
; CHECK-SD: // %bb.0: // %entry
115-
; CHECK-SD-NEXT: fmov d1, #2.00000000
116-
; CHECK-SD-NEXT: fdiv d2, d0, d1
117-
; CHECK-SD-NEXT: frintz d2, d2
118-
; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
118+
; CHECK-SD-NEXT: fmov d1, #0.50000000
119+
; CHECK-SD-NEXT: fmov d2, #-2.00000000
120+
; CHECK-SD-NEXT: fmul d1, d0, d1
121+
; CHECK-SD-NEXT: frintz d1, d1
122+
; CHECK-SD-NEXT: fmadd d0, d1, d2, d0
119123
; CHECK-SD-NEXT: ret
120124
;
121125
; CHECK-GI-LABEL: drem2_nsz:
@@ -176,10 +180,11 @@ entry:
176180
define float @fremm2_nsz(float %x) {
177181
; CHECK-SD-LABEL: fremm2_nsz:
178182
; CHECK-SD: // %bb.0: // %entry
179-
; CHECK-SD-NEXT: fmov s1, #-2.00000000
180-
; CHECK-SD-NEXT: fdiv s2, s0, s1
181-
; CHECK-SD-NEXT: frintz s2, s2
182-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
183+
; CHECK-SD-NEXT: fmov s1, #-0.50000000
184+
; CHECK-SD-NEXT: fmov s2, #2.00000000
185+
; CHECK-SD-NEXT: fmul s1, s0, s1
186+
; CHECK-SD-NEXT: frintz s1, s1
187+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
183188
; CHECK-SD-NEXT: ret
184189
;
185190
; CHECK-GI-LABEL: fremm2_nsz:
@@ -195,10 +200,11 @@ define float @frem4_abs(float %x) {
195200
; CHECK-SD-LABEL: frem4_abs:
196201
; CHECK-SD: // %bb.0: // %entry
197202
; CHECK-SD-NEXT: fabs s0, s0
198-
; CHECK-SD-NEXT: fmov s1, #4.00000000
199-
; CHECK-SD-NEXT: fdiv s2, s0, s1
200-
; CHECK-SD-NEXT: frintz s2, s2
201-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
203+
; CHECK-SD-NEXT: fmov s1, #0.25000000
204+
; CHECK-SD-NEXT: fmov s2, #-4.00000000
205+
; CHECK-SD-NEXT: fmul s1, s0, s1
206+
; CHECK-SD-NEXT: frintz s1, s1
207+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
202208
; CHECK-SD-NEXT: ret
203209
;
204210
; CHECK-GI-LABEL: frem4_abs:
@@ -216,10 +222,12 @@ define float @frem16_abs(float %x) {
216222
; CHECK-SD-LABEL: frem16_abs:
217223
; CHECK-SD: // %bb.0: // %entry
218224
; CHECK-SD-NEXT: fabs s0, s0
219-
; CHECK-SD-NEXT: fmov s1, #16.00000000
220-
; CHECK-SD-NEXT: fdiv s2, s0, s1
221-
; CHECK-SD-NEXT: frintz s2, s2
222-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
225+
; CHECK-SD-NEXT: mov w8, #1031798784 // =0x3d800000
226+
; CHECK-SD-NEXT: fmov s2, #-16.00000000
227+
; CHECK-SD-NEXT: fmov s1, w8
228+
; CHECK-SD-NEXT: fmul s1, s0, s1
229+
; CHECK-SD-NEXT: frintz s1, s1
230+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
223231
; CHECK-SD-NEXT: ret
224232
;
225233
; CHECK-GI-LABEL: frem16_abs:
@@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) {
237245
; CHECK-SD-LABEL: frem4294967296_abs:
238246
; CHECK-SD: // %bb.0: // %entry
239247
; CHECK-SD-NEXT: fabs s0, s0
240-
; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
248+
; CHECK-SD-NEXT: mov w8, #796917760 // =0x2f800000
241249
; CHECK-SD-NEXT: fmov s1, w8
242-
; CHECK-SD-NEXT: fdiv s2, s0, s1
243-
; CHECK-SD-NEXT: frintz s2, s2
244-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
250+
; CHECK-SD-NEXT: mov w8, #-813694976 // =0xcf800000
251+
; CHECK-SD-NEXT: fmov s2, w8
252+
; CHECK-SD-NEXT: fmul s1, s0, s1
253+
; CHECK-SD-NEXT: frintz s1, s1
254+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
245255
; CHECK-SD-NEXT: ret
246256
;
247257
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) {
260270
; CHECK-SD-LABEL: frem1152921504606846976_abs:
261271
; CHECK-SD: // %bb.0: // %entry
262272
; CHECK-SD-NEXT: fabs s0, s0
263-
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
273+
; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000
264274
; CHECK-SD-NEXT: fmov s1, w8
265-
; CHECK-SD-NEXT: fdiv s2, s0, s1
266-
; CHECK-SD-NEXT: frintz s2, s2
267-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
275+
; CHECK-SD-NEXT: mov w8, #-578813952 // =0xdd800000
276+
; CHECK-SD-NEXT: fmov s2, w8
277+
; CHECK-SD-NEXT: fmul s1, s0, s1
278+
; CHECK-SD-NEXT: frintz s1, s1
279+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
268280
; CHECK-SD-NEXT: ret
269281
;
270282
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) {
283295
; CHECK-SD-LABEL: frem4611686018427387904_abs:
284296
; CHECK-SD: // %bb.0: // %entry
285297
; CHECK-SD-NEXT: fabs s0, s0
286-
; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
298+
; CHECK-SD-NEXT: mov w8, #545259520 // =0x20800000
287299
; CHECK-SD-NEXT: fmov s1, w8
288-
; CHECK-SD-NEXT: fdiv s2, s0, s1
289-
; CHECK-SD-NEXT: frintz s2, s2
290-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
300+
; CHECK-SD-NEXT: mov w8, #-562036736 // =0xde800000
301+
; CHECK-SD-NEXT: fmov s2, w8
302+
; CHECK-SD-NEXT: fmul s1, s0, s1
303+
; CHECK-SD-NEXT: frintz s1, s1
304+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
291305
; CHECK-SD-NEXT: ret
292306
;
293307
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -305,11 +319,12 @@ entry:
305319
define float @frem9223372036854775808_abs(float %x) {
306320
; CHECK-SD-LABEL: frem9223372036854775808_abs:
307321
; CHECK-SD: // %bb.0: // %entry
308-
; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
322+
; CHECK-SD-NEXT: movi v1.2s, #32, lsl #24
309323
; CHECK-SD-NEXT: fabs s0, s0
310-
; CHECK-SD-NEXT: fdiv s2, s0, s1
311-
; CHECK-SD-NEXT: frintz s2, s2
312-
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
324+
; CHECK-SD-NEXT: movi v2.2s, #223, lsl #24
325+
; CHECK-SD-NEXT: fmul s1, s0, s1
326+
; CHECK-SD-NEXT: frintz s1, s1
327+
; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
313328
; CHECK-SD-NEXT: ret
314329
;
315330
; CHECK-GI-LABEL: frem9223372036854775808_abs:

0 commit comments

Comments
 (0)