Skip to content

Commit cf57613

Browse files
committed
Updates
1 parent 16fc3dc commit cf57613

File tree

6 files changed

+58
-124
lines changed

6 files changed

+58
-124
lines changed

llvm/include/llvm/ADT/APFloat.h

+8
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,14 @@ class APFloat : public APFloatBase {
964964
return Val;
965965
}
966966

967+
/// Factory for Positive and Negative One.
968+
///
969+
/// \param Negative True iff the number should be negative.
970+
static APFloat getOne(const fltSemantics &Sem, bool Negative = false) {
971+
APFloat Val(Sem, Negative ? -1 : 1);
972+
return Val;
973+
}
974+
967975
/// Factory for Positive and Negative Infinity.
968976
///
969977
/// \param Negative True iff the number should be negative.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -17273,10 +17273,10 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1727317273

1727417274
// fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
1727517275
// the loss is acceptable with AllowReciprocal.
17276-
if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17276+
if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
1727717277
// Compute the reciprocal 1.0 / c2.
1727817278
const APFloat &N1APF = N1CFP->getValueAPF();
17279-
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17279+
APFloat Recip = APFloat::getOne(N1APF.getSemantics());
1728017280
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
1728117281
// Only do the transform if the reciprocal is a legal fp immediate that
1728217282
// isn't too nasty (eg NaN, denormal, ...).

llvm/test/CodeGen/AArch64/fdiv-const.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ define double @divf64_2(double %a) nounwind {
8888
define <4 x float> @divv4f32_2(<4 x float> %a) nounwind {
8989
; CHECK-LABEL: divv4f32_2:
9090
; CHECK: // %bb.0:
91-
; CHECK-NEXT: movi v1.4s, #64, lsl #24
92-
; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s
91+
; CHECK-NEXT: movi v1.4s, #63, lsl #24
92+
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
9393
; CHECK-NEXT: ret
9494
%r = fdiv <4 x float> %a, <float 2.0, float 2.0, float 2.0, float 2.0>
9595
ret <4 x float> %r
@@ -141,9 +141,8 @@ define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind {
141141
define <vscale x 4 x float> @divnxv4f32_2(<vscale x 4 x float> %a) nounwind {
142142
; CHECK-LABEL: divnxv4f32_2:
143143
; CHECK: // %bb.0:
144-
; CHECK-NEXT: fmov z1.s, #2.00000000
145144
; CHECK-NEXT: ptrue p0.s
146-
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
145+
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5
147146
; CHECK-NEXT: ret
148147
%r = fdiv <vscale x 4 x float> %a, splat (float 2.0)
149148
ret <vscale x 4 x float> %r

llvm/test/CodeGen/AArch64/frem-power2.ll

+16-12
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,12 @@ entry:
341341
define <4 x float> @frem2_vec(<4 x float> %x) {
342342
; CHECK-SD-LABEL: frem2_vec:
343343
; CHECK-SD: // %bb.0: // %entry
344-
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
344+
; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24
345+
; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24
345346
; CHECK-SD-NEXT: mov v3.16b, v0.16b
346-
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
347-
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
348-
; CHECK-SD-NEXT: fmls v3.4s, v1.4s, v2.4s
347+
; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
348+
; CHECK-SD-NEXT: frintz v1.4s, v1.4s
349+
; CHECK-SD-NEXT: fmls v3.4s, v2.4s, v1.4s
349350
; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24
350351
; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b
351352
; CHECK-SD-NEXT: ret
@@ -402,10 +403,11 @@ entry:
402403
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
403404
; CHECK-SD-LABEL: frem2_nsz_vec:
404405
; CHECK-SD: // %bb.0: // %entry
405-
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
406-
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
407-
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
408-
; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
406+
; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24
407+
; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24
408+
; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
409+
; CHECK-SD-NEXT: frintz v1.4s, v1.4s
410+
; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s
409411
; CHECK-SD-NEXT: ret
410412
;
411413
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -460,12 +462,14 @@ entry:
460462
define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
461463
; CHECK-SD-LABEL: frem1152921504606846976_absv:
462464
; CHECK-SD: // %bb.0: // %entry
463-
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
465+
; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000
464466
; CHECK-SD-NEXT: fabs v0.4s, v0.4s
465467
; CHECK-SD-NEXT: dup v1.4s, w8
466-
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
467-
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
468-
; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
468+
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
469+
; CHECK-SD-NEXT: dup v2.4s, w8
470+
; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
471+
; CHECK-SD-NEXT: frintz v1.4s, v1.4s
472+
; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s
469473
; CHECK-SD-NEXT: ret
470474
;
471475
; CHECK-GI-LABEL: frem1152921504606846976_absv:

llvm/test/CodeGen/ARM/vdiv_combine.ll

+17-88
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind {
66
; CHECK-LABEL: t1:
77
; CHECK: @ %bb.0: @ %entry
8-
; CHECK-NEXT: vmov.f32 s2, #1.250000e-01
9-
; CHECK-NEXT: vcvt.f32.s32 d2, d0
10-
; CHECK-NEXT: vmul.f32 s1, s5, s2
11-
; CHECK-NEXT: vmul.f32 s0, s4, s2
8+
; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3
129
; CHECK-NEXT: bx lr
1310
entry:
1411
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -20,10 +17,7 @@ entry:
2017
define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind {
2118
; CHECK-LABEL: t2:
2219
; CHECK: @ %bb.0: @ %entry
23-
; CHECK-NEXT: vmov.f32 s2, #1.250000e-01
24-
; CHECK-NEXT: vcvt.f32.u32 d2, d0
25-
; CHECK-NEXT: vmul.f32 s1, s5, s2
26-
; CHECK-NEXT: vmul.f32 s0, s4, s2
20+
; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3
2721
; CHECK-NEXT: bx lr
2822
entry:
2923
%vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -56,17 +50,10 @@ entry:
5650
define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind {
5751
; CHECK-LABEL: t4:
5852
; CHECK: @ %bb.0: @ %entry
59-
; CHECK-NEXT: vcvt.f32.s32 d2, d0
60-
; CHECK-NEXT: vldr s2, LCPI3_0
61-
; CHECK-NEXT: vdiv.f32 s1, s5, s2
62-
; CHECK-NEXT: vdiv.f32 s0, s4, s2
53+
; CHECK-NEXT: vcvt.f32.s32 d16, d0
54+
; CHECK-NEXT: vmov.i32 d17, #0x2f000000
55+
; CHECK-NEXT: vmul.f32 d0, d16, d17
6356
; CHECK-NEXT: bx lr
64-
; CHECK-NEXT: .p2align 2
65-
; CHECK-NEXT: @ %bb.1:
66-
; CHECK-NEXT: .data_region
67-
; CHECK-NEXT: LCPI3_0:
68-
; CHECK-NEXT: .long 0x50000000 @ float 8.58993459E+9
69-
; CHECK-NEXT: .end_data_region
7057
entry:
7158
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
7259
%div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -77,17 +64,8 @@ entry:
7764
define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind {
7865
; CHECK-LABEL: t5:
7966
; CHECK: @ %bb.0: @ %entry
80-
; CHECK-NEXT: vcvt.f32.s32 d2, d0
81-
; CHECK-NEXT: vldr s2, LCPI4_0
82-
; CHECK-NEXT: vdiv.f32 s1, s5, s2
83-
; CHECK-NEXT: vdiv.f32 s0, s4, s2
67+
; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32
8468
; CHECK-NEXT: bx lr
85-
; CHECK-NEXT: .p2align 2
86-
; CHECK-NEXT: @ %bb.1:
87-
; CHECK-NEXT: .data_region
88-
; CHECK-NEXT: LCPI4_0:
89-
; CHECK-NEXT: .long 0x4f800000 @ float 4.2949673E+9
90-
; CHECK-NEXT: .end_data_region
9169
entry:
9270
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
9371
%div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -98,12 +76,7 @@ entry:
9876
define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind {
9977
; CHECK-LABEL: t6:
10078
; CHECK: @ %bb.0: @ %entry
101-
; CHECK-NEXT: vmov.f32 s4, #1.250000e-01
102-
; CHECK-NEXT: vcvt.f32.s32 q2, q0
103-
; CHECK-NEXT: vmul.f32 s3, s11, s4
104-
; CHECK-NEXT: vmul.f32 s2, s10, s4
105-
; CHECK-NEXT: vmul.f32 s1, s9, s4
106-
; CHECK-NEXT: vmul.f32 s0, s8, s4
79+
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
10780
; CHECK-NEXT: bx lr
10881
entry:
10982
%vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
@@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
11588
; CHECK-LABEL: fix_unsigned_i16_to_float:
11689
; CHECK: @ %bb.0:
11790
; CHECK-NEXT: vmovl.u16 q8, d0
118-
; CHECK-NEXT: vmov.f32 s4, #5.000000e-01
119-
; CHECK-NEXT: vcvt.f32.u32 q2, q8
120-
; CHECK-NEXT: vmul.f32 s3, s11, s4
121-
; CHECK-NEXT: vmul.f32 s2, s10, s4
122-
; CHECK-NEXT: vmul.f32 s1, s9, s4
123-
; CHECK-NEXT: vmul.f32 s0, s8, s4
91+
; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1
12492
; CHECK-NEXT: bx lr
12593
%conv = uitofp <4 x i16> %in to <4 x float>
12694
%shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
13199
; CHECK-LABEL: fix_signed_i16_to_float:
132100
; CHECK: @ %bb.0:
133101
; CHECK-NEXT: vmovl.s16 q8, d0
134-
; CHECK-NEXT: vmov.f32 s4, #5.000000e-01
135-
; CHECK-NEXT: vcvt.f32.s32 q2, q8
136-
; CHECK-NEXT: vmul.f32 s3, s11, s4
137-
; CHECK-NEXT: vmul.f32 s2, s10, s4
138-
; CHECK-NEXT: vmul.f32 s1, s9, s4
139-
; CHECK-NEXT: vmul.f32 s0, s8, s4
102+
; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1
140103
; CHECK-NEXT: bx lr
141104
%conv = sitofp <4 x i16> %in to <4 x float>
142105
%shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) {
152115
; CHECK-NEXT: vmov r0, r1, d9
153116
; CHECK-NEXT: bl ___floatundisf
154117
; CHECK-NEXT: vmov r2, r1, d8
155-
; CHECK-NEXT: vmov s18, r0
156-
; CHECK-NEXT: vmov.f32 s16, #5.000000e-01
118+
; CHECK-NEXT: vmov s19, r0
119+
; CHECK-NEXT: vmov.i32 d8, #0x3f000000
157120
; CHECK-NEXT: mov r0, r2
158121
; CHECK-NEXT: bl ___floatundisf
159-
; CHECK-NEXT: vmov s2, r0
160-
; CHECK-NEXT: vmul.f32 s1, s18, s16
161-
; CHECK-NEXT: vmul.f32 s0, s2, s16
122+
; CHECK-NEXT: vmov s18, r0
123+
; CHECK-NEXT: vmul.f32 d0, d9, d8
162124
; CHECK-NEXT: vpop {d8, d9}
163125
; CHECK-NEXT: pop {lr}
164126
; CHECK-NEXT: bx lr
@@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
196158
define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind {
197159
; CHECK-LABEL: test7:
198160
; CHECK: @ %bb.0: @ %entry
199-
; CHECK-NEXT: vpush {d8, d9}
200-
; CHECK-NEXT: vmov.f32 s12, #1.250000e-01
201-
; CHECK-NEXT: vcvt.f32.s32 q4, q0
202-
; CHECK-NEXT: vcvt.f32.s32 q2, q1
203-
; CHECK-NEXT: vmul.f32 s3, s19, s12
204-
; CHECK-NEXT: vmul.f32 s2, s18, s12
205-
; CHECK-NEXT: vmul.f32 s7, s11, s12
206-
; CHECK-NEXT: vmul.f32 s6, s10, s12
207-
; CHECK-NEXT: vmul.f32 s1, s17, s12
208-
; CHECK-NEXT: vmul.f32 s5, s9, s12
209-
; CHECK-NEXT: vmul.f32 s0, s16, s12
210-
; CHECK-NEXT: vmul.f32 s4, s8, s12
211-
; CHECK-NEXT: vpop {d8, d9}
161+
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
162+
; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3
212163
; CHECK-NEXT: bx lr
213164
entry:
214165
%vcvt.i = sitofp <8 x i32> %in to <8 x float>
@@ -220,19 +171,8 @@ entry:
220171
define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
221172
; CHECK-LABEL: test8:
222173
; CHECK: @ %bb.0:
223-
; CHECK-NEXT: vmov.f32 s4, #5.000000e-01
224-
; CHECK-NEXT: vcvt.f32.s32 q2, q0
225-
; CHECK-NEXT: vmul.f32 s2, s10, s4
226-
; CHECK-NEXT: vmul.f32 s1, s9, s4
227-
; CHECK-NEXT: vmul.f32 s0, s8, s4
228-
; CHECK-NEXT: vldr s3, LCPI11_0
174+
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1
229175
; CHECK-NEXT: bx lr
230-
; CHECK-NEXT: .p2align 2
231-
; CHECK-NEXT: @ %bb.1:
232-
; CHECK-NEXT: .data_region
233-
; CHECK-NEXT: LCPI11_0:
234-
; CHECK-NEXT: .long 0x7fc00000 @ float NaN
235-
; CHECK-NEXT: .end_data_region
236176
%vcvt.i = sitofp <4 x i32> %in to <4 x float>
237177
%div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
238178
ret <4 x float> %div.i
@@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
241181
define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) {
242182
; CHECK-LABEL: test_illegal_int_to_fp:
243183
; CHECK: @ %bb.0:
244-
; CHECK-NEXT: vmov.f32 s4, #2.500000e-01
245-
; CHECK-NEXT: vcvt.f32.s32 q2, q0
246-
; CHECK-NEXT: vmul.f32 s2, s10, s4
247-
; CHECK-NEXT: vmul.f32 s1, s9, s4
248-
; CHECK-NEXT: vmul.f32 s0, s8, s4
249-
; CHECK-NEXT: vldr s3, LCPI12_0
184+
; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2
250185
; CHECK-NEXT: bx lr
251-
; CHECK-NEXT: .p2align 2
252-
; CHECK-NEXT: @ %bb.1:
253-
; CHECK-NEXT: .data_region
254-
; CHECK-NEXT: LCPI12_0:
255-
; CHECK-NEXT: .long 0x7fc00000 @ float NaN
256-
; CHECK-NEXT: .end_data_region
257186
%conv = sitofp <3 x i32> %in to <3 x float>
258187
%res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0>
259188
ret <3 x float> %res

llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll

+12-18
Original file line numberDiff line numberDiff line change
@@ -1367,12 +1367,11 @@ define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) {
13671367
; CHECK-LABEL: bcast_unfold_fdiv_v16f32:
13681368
; CHECK: # %bb.0: # %bb
13691369
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
1370-
; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1370+
; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1]
13711371
; CHECK-NEXT: .p2align 4, 0x90
13721372
; CHECK-NEXT: .LBB42_1: # %bb1
13731373
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1374-
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %zmm1
1375-
; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm1
1374+
; CHECK-NEXT: vmulps 4096(%rdi,%rax), %zmm0, %zmm1
13761375
; CHECK-NEXT: vmovups %zmm1, 4096(%rdi,%rax)
13771376
; CHECK-NEXT: addq $64, %rax
13781377
; CHECK-NEXT: jne .LBB42_1
@@ -1400,12 +1399,11 @@ define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) {
14001399
; CHECK-LABEL: bcast_unfold_fdiv_v8f32:
14011400
; CHECK: # %bb.0: # %bb
14021401
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
1403-
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1402+
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1]
14041403
; CHECK-NEXT: .p2align 4, 0x90
14051404
; CHECK-NEXT: .LBB43_1: # %bb1
14061405
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1407-
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %ymm1
1408-
; CHECK-NEXT: vdivps %ymm0, %ymm1, %ymm1
1406+
; CHECK-NEXT: vmulps 4096(%rdi,%rax), %ymm0, %ymm1
14091407
; CHECK-NEXT: vmovups %ymm1, 4096(%rdi,%rax)
14101408
; CHECK-NEXT: addq $32, %rax
14111409
; CHECK-NEXT: jne .LBB43_1
@@ -1433,12 +1431,11 @@ define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) {
14331431
; CHECK-LABEL: bcast_unfold_fdiv_v4f32:
14341432
; CHECK: # %bb.0: # %bb
14351433
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
1436-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1434+
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
14371435
; CHECK-NEXT: .p2align 4, 0x90
14381436
; CHECK-NEXT: .LBB44_1: # %bb1
14391437
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1440-
; CHECK-NEXT: vmovups 4096(%rdi,%rax), %xmm1
1441-
; CHECK-NEXT: vdivps %xmm0, %xmm1, %xmm1
1438+
; CHECK-NEXT: vmulps 4096(%rdi,%rax), %xmm0, %xmm1
14421439
; CHECK-NEXT: vmovups %xmm1, 4096(%rdi,%rax)
14431440
; CHECK-NEXT: addq $16, %rax
14441441
; CHECK-NEXT: jne .LBB44_1
@@ -1465,12 +1462,11 @@ define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) {
14651462
; CHECK-LABEL: bcast_unfold_fdiv_v8f64:
14661463
; CHECK: # %bb.0: # %bb
14671464
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
1468-
; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1465+
; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1,5.0E-1]
14691466
; CHECK-NEXT: .p2align 4, 0x90
14701467
; CHECK-NEXT: .LBB45_1: # %bb1
14711468
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1472-
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %zmm1
1473-
; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm1
1469+
; CHECK-NEXT: vmulpd 8192(%rdi,%rax), %zmm0, %zmm1
14741470
; CHECK-NEXT: vmovupd %zmm1, 8192(%rdi,%rax)
14751471
; CHECK-NEXT: addq $64, %rax
14761472
; CHECK-NEXT: jne .LBB45_1
@@ -1498,12 +1494,11 @@ define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) {
14981494
; CHECK-LABEL: bcast_unfold_fdiv_v4f64:
14991495
; CHECK: # %bb.0: # %bb
15001496
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
1501-
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
1497+
; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [5.0E-1,5.0E-1,5.0E-1,5.0E-1]
15021498
; CHECK-NEXT: .p2align 4, 0x90
15031499
; CHECK-NEXT: .LBB46_1: # %bb1
15041500
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1505-
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %ymm1
1506-
; CHECK-NEXT: vdivpd %ymm0, %ymm1, %ymm1
1501+
; CHECK-NEXT: vmulpd 8192(%rdi,%rax), %ymm0, %ymm1
15071502
; CHECK-NEXT: vmovupd %ymm1, 8192(%rdi,%rax)
15081503
; CHECK-NEXT: addq $32, %rax
15091504
; CHECK-NEXT: jne .LBB46_1
@@ -1531,13 +1526,12 @@ define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) {
15311526
; CHECK-LABEL: bcast_unfold_fdiv_v2f64:
15321527
; CHECK: # %bb.0: # %bb
15331528
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
1534-
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0]
1529+
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [5.0E-1,5.0E-1]
15351530
; CHECK-NEXT: # xmm0 = mem[0,0]
15361531
; CHECK-NEXT: .p2align 4, 0x90
15371532
; CHECK-NEXT: .LBB47_1: # %bb1
15381533
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1539-
; CHECK-NEXT: vmovupd 8192(%rdi,%rax), %xmm1
1540-
; CHECK-NEXT: vdivpd %xmm0, %xmm1, %xmm1
1534+
; CHECK-NEXT: vmulpd 8192(%rdi,%rax), %xmm0, %xmm1
15411535
; CHECK-NEXT: vmovupd %xmm1, 8192(%rdi,%rax)
15421536
; CHECK-NEXT: addq $16, %rax
15431537
; CHECK-NEXT: jne .LBB47_1

0 commit comments

Comments
 (0)