Skip to content

Commit af43094

Browse files
committed
[PowerPC][AIX] Allow VSX patterns to be 32-bit and 64-bit safe on P8+.
This patch updates two patterns involving `scalar_to_vector` and `SCALAR_TO_VECTOR_PERMUTED` nodes to be safe for both 64-bit and 32-bit by pulling the patterns out of the 64-bit specific guard. These patterns are matched on POWER8 and above. Differential Revision: https://reviews.llvm.org/D125389
1 parent b479ea4 commit af43094

10 files changed

+93
-180
lines changed

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3367,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
33673367

33683368
def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
33693369
(f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
3370+
3371+
defm : ScalToVecWPermute<
3372+
v4i32, (i32 (load ForceXForm:$src)),
3373+
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
3374+
(SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3375+
defm : ScalToVecWPermute<
3376+
v4f32, (f32 (load ForceXForm:$src)),
3377+
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
3378+
(SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
33703379
} // HasVSX, HasP8Vector, IsBigEndian
33713380

33723381
// Big endian Power8 64Bit VSX subtarget.
@@ -3381,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))),
33813390
(v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>;
33823391
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))),
33833392
(v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>;
3384-
defm : ScalToVecWPermute<
3385-
v4i32, (i32 (load ForceXForm:$src)),
3386-
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
3387-
(SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
3388-
defm : ScalToVecWPermute<
3389-
v4f32, (f32 (load ForceXForm:$src)),
3390-
(XXSLDWIs (LIWZX ForceXForm:$src), 1),
3391-
(SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
33923393

33933394
def : Pat<DWToSPExtractConv.BVU,
33943395
(v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),

llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
3030
;
3131
; AIX-P8-32-LABEL: test_f2:
3232
; AIX-P8-32: # %bb.0:
33-
; AIX-P8-32-NEXT: lfs f0, 4(r3)
34-
; AIX-P8-32-NEXT: lfs f1, 0(r3)
3533
; AIX-P8-32-NEXT: lwz r6, L..C0(r2) # %const.0
36-
; AIX-P8-32-NEXT: lfs f2, 4(r4)
37-
; AIX-P8-32-NEXT: xscvdpspn v2, f0
38-
; AIX-P8-32-NEXT: lfs f0, 0(r4)
39-
; AIX-P8-32-NEXT: lxvw4x v0, 0, r6
40-
; AIX-P8-32-NEXT: xscvdpspn v3, f1
41-
; AIX-P8-32-NEXT: xscvdpspn v4, f2
42-
; AIX-P8-32-NEXT: xscvdpspn v5, f0
43-
; AIX-P8-32-NEXT: vperm v2, v3, v2, v0
44-
; AIX-P8-32-NEXT: vperm v3, v5, v4, v0
34+
; AIX-P8-32-NEXT: li r7, 4
35+
; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3
36+
; AIX-P8-32-NEXT: lxsiwzx v0, 0, r4
37+
; AIX-P8-32-NEXT: lxsiwzx v2, r3, r7
38+
; AIX-P8-32-NEXT: lxsiwzx v5, r4, r7
39+
; AIX-P8-32-NEXT: lxvw4x v4, 0, r6
40+
; AIX-P8-32-NEXT: vperm v2, v3, v2, v4
41+
; AIX-P8-32-NEXT: vperm v3, v0, v5, v4
4542
; AIX-P8-32-NEXT: xvaddsp vs0, v2, v3
4643
; AIX-P8-32-NEXT: xxsldwi vs1, vs0, vs0, 1
4744
; AIX-P8-32-NEXT: xscvspdpn f0, vs0
@@ -60,15 +57,15 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
6057
;
6158
; AIX-P9-32-LABEL: test_f2:
6259
; AIX-P9-32: # %bb.0:
63-
; AIX-P9-32-NEXT: lfs f0, 0(r3)
60+
; AIX-P9-32-NEXT: lfiwzx f0, 0, r3
6461
; AIX-P9-32-NEXT: lwz r3, 4(r3)
62+
; AIX-P9-32-NEXT: xxsldwi vs0, f0, f0, 1
6563
; AIX-P9-32-NEXT: mtfprwz f1, r3
6664
; AIX-P9-32-NEXT: lwz r3, 4(r4)
67-
; AIX-P9-32-NEXT: xscvdpspn vs0, f0
68-
; AIX-P9-32-NEXT: mtfprwz f2, r3
6965
; AIX-P9-32-NEXT: xxinsertw vs0, vs1, 4
70-
; AIX-P9-32-NEXT: lfs f1, 0(r4)
71-
; AIX-P9-32-NEXT: xscvdpspn vs1, f1
66+
; AIX-P9-32-NEXT: lfiwzx f1, 0, r4
67+
; AIX-P9-32-NEXT: mtfprwz f2, r3
68+
; AIX-P9-32-NEXT: xxsldwi vs1, f1, f1, 1
7269
; AIX-P9-32-NEXT: xxinsertw vs1, vs2, 4
7370
; AIX-P9-32-NEXT: xvaddsp vs0, vs0, vs1
7471
; AIX-P9-32-NEXT: xscvspdpn f1, vs0

llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -974,8 +974,7 @@ define dso_local <4 x i32> @testSplat4hi(<8 x i8>* nocapture readonly %ptr) loca
974974
;
975975
; P8-AIX-32-LABEL: testSplat4hi:
976976
; P8-AIX-32: # %bb.0: # %entry
977-
; P8-AIX-32-NEXT: lwz r3, 0(r3)
978-
; P8-AIX-32-NEXT: mtfprwz f0, r3
977+
; P8-AIX-32-NEXT: lfiwzx f0, 0, r3
979978
; P8-AIX-32-NEXT: xxspltw v2, vs0, 1
980979
; P8-AIX-32-NEXT: blr
981980
entry:

llvm/test/CodeGen/PowerPC/float-vector-gather.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,13 @@ float* nocapture readonly %d) {
4343

4444
; CHECK-BE-AIX-32-LABEL: vector_gatherf:
4545
; CHECK-BE-AIX-32-LABEL: # %bb.0: # %entry
46-
; CHECK-BE-AIX-32-DAG: lfs f[[REG0:[0-9]+]]
47-
; CHECK-BE-AIX-32-DAG: lfs f[[REG1:[0-9]+]]
48-
; CHECK-BE-AIX-32-DAG: lfs f[[REG2:[0-9]+]]
49-
; CHECK-BE-AIX-32-DAG: lfs f[[REG3:[0-9]+]]
50-
; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG0]]
51-
; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG1:[0-9]+]], f[[REG1]]
52-
; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG2:[0-9]+]], f[[REG2]]
53-
; CHECK-BE-AIX-32-DAG: xscvdpspn v[[VREG0:[0-9]+]], f[[REG3]]
54-
; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG1]], v[[VREG0]], v[[VREG1]]
55-
; CHECK-BE-AIX-32-DAG: vmrgow v[[VREG0]], v[[VREG2]], v[[VREG0]]
56-
; CHECK-BE-AIX-32-NEXT: xxmrghd v[[VREG1]], v[[VREG0]], v[[VREG1]]
46+
; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG0:[0-9]+]]
47+
; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG1:[0-9]+]]
48+
; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG2:[0-9]+]]
49+
; CHECK-BE-AIX-32-DAG: lxsiwzx v[[REG3:[0-9]+]]
50+
; CHECK-BE-AIX-32-DAG: vmrgow v[[REG0]], v[[REG1]], v[[REG0]]
51+
; CHECK-BE-AIX-32-DAG: vmrgow v[[REG3]], v[[REG2]], v[[REG3]]
52+
; CHECK-BE-AIX-32-NEXT: xxmrghd v[[REG0]], v[[REG3]], v[[REG0]]
5753
; CHECK-BE-AIX-32-NEXT: blr
5854
entry:
5955
%0 = load float, float* %a, align 4

llvm/test/CodeGen/PowerPC/load-and-splat.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,7 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
560560
;
561561
; P8-AIX32-LABEL: unadjusted_lxvwsx:
562562
; P8-AIX32: # %bb.0: # %entry
563-
; P8-AIX32-NEXT: lwz r3, 0(r3)
564-
; P8-AIX32-NEXT: mtfprwz f0, r3
563+
; P8-AIX32-NEXT: lfiwzx f0, 0, r3
565564
; P8-AIX32-NEXT: xxspltw v2, vs0, 1
566565
; P8-AIX32-NEXT: blr
567566
;

llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ define <16 x i8> @test(i32* %s, i32* %t) {
2828
;
2929
; CHECK-AIX-32-LABEL: test:
3030
; CHECK-AIX-32: # %bb.0: # %entry
31-
; CHECK-AIX-32-NEXT: lwz r3, 0(r3)
32-
; CHECK-AIX-32-NEXT: mtfprwz f0, r3
31+
; CHECK-AIX-32-NEXT: lfiwzx f0, 0, r3
3332
; CHECK-AIX-32-NEXT: xxspltw v2, vs0, 1
3433
; CHECK-AIX-32-NEXT: blr
3534

llvm/test/CodeGen/PowerPC/pre-inc-disable.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
6666
; P9BE: lxsiwzx [[REG:[0-9]+]]
6767
; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
6868
; P9BE-32-LABEL: test32:
69-
; P9BE-32: lwzx [[REG1:[0-9]+]]
70-
; P9BE-32: mtvsrwz [[REG2:[0-9]+]], [[REG1]]
71-
; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
69+
; P9BE-32: lxsiwzx [[REG:[0-9]+]]
70+
; P9BE-32: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]]
7271
entry:
7372
%idx.ext63 = sext i32 %i_pix2 to i64
7473
%add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63

llvm/test/CodeGen/PowerPC/reduce_scalarization.ll

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,15 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl
6868
;
6969
; AIX-32-LABEL: test2:
7070
; AIX-32: # %bb.0: # %entry
71-
; AIX-32-NEXT: lfs f0, 4(r3)
72-
; AIX-32-NEXT: lfs f1, 0(r3)
7371
; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0
74-
; AIX-32-NEXT: lfs f2, 4(r4)
75-
; AIX-32-NEXT: xscvdpspn v2, f0
76-
; AIX-32-NEXT: lfs f0, 0(r4)
77-
; AIX-32-NEXT: lxvw4x v0, 0, r5
78-
; AIX-32-NEXT: xscvdpspn v3, f1
79-
; AIX-32-NEXT: xscvdpspn v4, f2
80-
; AIX-32-NEXT: xscvdpspn v5, f0
81-
; AIX-32-NEXT: vperm v2, v3, v2, v0
82-
; AIX-32-NEXT: vperm v3, v5, v4, v0
72+
; AIX-32-NEXT: li r6, 4
73+
; AIX-32-NEXT: lxsiwzx v3, 0, r3
74+
; AIX-32-NEXT: lxsiwzx v0, 0, r4
75+
; AIX-32-NEXT: lxsiwzx v2, r3, r6
76+
; AIX-32-NEXT: lxsiwzx v5, r4, r6
77+
; AIX-32-NEXT: lxvw4x v4, 0, r5
78+
; AIX-32-NEXT: vperm v2, v3, v2, v4
79+
; AIX-32-NEXT: vperm v3, v0, v5, v4
8380
; AIX-32-NEXT: xvsubsp vs0, v2, v3
8481
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
8582
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -117,18 +114,15 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl
117114
;
118115
; AIX-32-LABEL: test3:
119116
; AIX-32: # %bb.0: # %entry
120-
; AIX-32-NEXT: lfs f0, 4(r3)
121-
; AIX-32-NEXT: lfs f1, 0(r3)
122117
; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0
123-
; AIX-32-NEXT: lfs f2, 4(r4)
124-
; AIX-32-NEXT: xscvdpspn v2, f0
125-
; AIX-32-NEXT: lfs f0, 0(r4)
126-
; AIX-32-NEXT: lxvw4x v0, 0, r5
127-
; AIX-32-NEXT: xscvdpspn v3, f1
128-
; AIX-32-NEXT: xscvdpspn v4, f2
129-
; AIX-32-NEXT: xscvdpspn v5, f0
130-
; AIX-32-NEXT: vperm v2, v3, v2, v0
131-
; AIX-32-NEXT: vperm v3, v5, v4, v0
118+
; AIX-32-NEXT: li r6, 4
119+
; AIX-32-NEXT: lxsiwzx v3, 0, r3
120+
; AIX-32-NEXT: lxsiwzx v0, 0, r4
121+
; AIX-32-NEXT: lxsiwzx v2, r3, r6
122+
; AIX-32-NEXT: lxsiwzx v5, r4, r6
123+
; AIX-32-NEXT: lxvw4x v4, 0, r5
124+
; AIX-32-NEXT: vperm v2, v3, v2, v4
125+
; AIX-32-NEXT: vperm v3, v0, v5, v4
132126
; AIX-32-NEXT: xvaddsp vs0, v2, v3
133127
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
134128
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -166,18 +160,15 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl
166160
;
167161
; AIX-32-LABEL: test4:
168162
; AIX-32: # %bb.0: # %entry
169-
; AIX-32-NEXT: lfs f0, 4(r3)
170-
; AIX-32-NEXT: lfs f1, 0(r3)
171163
; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0
172-
; AIX-32-NEXT: lfs f2, 4(r4)
173-
; AIX-32-NEXT: xscvdpspn v2, f0
174-
; AIX-32-NEXT: lfs f0, 0(r4)
175-
; AIX-32-NEXT: lxvw4x v0, 0, r5
176-
; AIX-32-NEXT: xscvdpspn v3, f1
177-
; AIX-32-NEXT: xscvdpspn v4, f2
178-
; AIX-32-NEXT: xscvdpspn v5, f0
179-
; AIX-32-NEXT: vperm v2, v3, v2, v0
180-
; AIX-32-NEXT: vperm v3, v5, v4, v0
164+
; AIX-32-NEXT: li r6, 4
165+
; AIX-32-NEXT: lxsiwzx v3, 0, r3
166+
; AIX-32-NEXT: lxsiwzx v0, 0, r4
167+
; AIX-32-NEXT: lxsiwzx v2, r3, r6
168+
; AIX-32-NEXT: lxsiwzx v5, r4, r6
169+
; AIX-32-NEXT: lxvw4x v4, 0, r5
170+
; AIX-32-NEXT: vperm v2, v3, v2, v4
171+
; AIX-32-NEXT: vperm v3, v0, v5, v4
181172
; AIX-32-NEXT: xvmulsp vs0, v2, v3
182173
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
183174
; AIX-32-NEXT: xscvspdpn f0, vs0

llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll

Lines changed: 24 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010

1111
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1212
; RUN: -mtriple=powerpc64-ibm-aix-xcoff< %s | FileCheck %s \
13-
; RUN: --check-prefixes=P9-AIX,P9-AIX-64
13+
; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-64
1414
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1515
; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
16-
; RUN: --check-prefixes=P9-AIX,P9-AIX-32
16+
; RUN: --check-prefixes=AIX,P9-AIX,P9-AIX-32
1717
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1818
; RUN: -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
19-
; RUN: --check-prefixes=P8-AIX-64
19+
; RUN: --check-prefixes=AIX,P8-AIX-64
2020
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
2121
; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \
22-
; RUN: --check-prefixes=P8-AIX-32
22+
; RUN: --check-prefixes=AIX,P8-AIX-32
2323

2424
; Function Attrs: norecurse nounwind readonly
2525
define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
@@ -422,9 +422,8 @@ define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec
422422
;
423423
; P8-AIX-32-LABEL: s2v_test_f1:
424424
; P8-AIX-32: # %bb.0: # %entry
425-
; P8-AIX-32-NEXT: lfs f0, 0(r3)
426425
; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0
427-
; P8-AIX-32-NEXT: xscvdpspn v3, f0
426+
; P8-AIX-32-NEXT: lxsiwzx v3, 0, r3
428427
; P8-AIX-32-NEXT: lxvw4x v4, 0, r4
429428
; P8-AIX-32-NEXT: vperm v2, v3, v2, v4
430429
; P8-AIX-32-NEXT: blr
@@ -466,33 +465,12 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
466465
; P8BE-NEXT: vmrgow v2, v3, v2
467466
; P8BE-NEXT: blr
468467
;
469-
; P9-AIX-64-LABEL: s2v_test_f2:
470-
; P9-AIX-64: # %bb.0: # %entry
471-
; P9-AIX-64-NEXT: addi r3, r3, 4
472-
; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
473-
; P9-AIX-64-NEXT: vmrgow v2, v3, v2
474-
; P9-AIX-64-NEXT: blr
475-
;
476-
; P9-AIX-32-LABEL: s2v_test_f2:
477-
; P9-AIX-32: # %bb.0: # %entry
478-
; P9-AIX-32-NEXT: lfs f0, 4(r3)
479-
; P9-AIX-32-NEXT: xscvdpspn v3, f0
480-
; P9-AIX-32-NEXT: vmrgow v2, v3, v2
481-
; P9-AIX-32-NEXT: blr
482-
;
483-
; P8-AIX-64-LABEL: s2v_test_f2:
484-
; P8-AIX-64: # %bb.0: # %entry
485-
; P8-AIX-64-NEXT: addi r3, r3, 4
486-
; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
487-
; P8-AIX-64-NEXT: vmrgow v2, v3, v2
488-
; P8-AIX-64-NEXT: blr
489-
;
490-
; P8-AIX-32-LABEL: s2v_test_f2:
491-
; P8-AIX-32: # %bb.0: # %entry
492-
; P8-AIX-32-NEXT: lfs f0, 4(r3)
493-
; P8-AIX-32-NEXT: xscvdpspn v3, f0
494-
; P8-AIX-32-NEXT: vmrgow v2, v3, v2
495-
; P8-AIX-32-NEXT: blr
468+
; AIX-LABEL: s2v_test_f2:
469+
; AIX: # %bb.0: # %entry
470+
; AIX-NEXT: addi r3, r3, 4
471+
; AIX-NEXT: lxsiwzx v3, 0, r3
472+
; AIX-NEXT: vmrgow v2, v3, v2
473+
; AIX-NEXT: blr
496474
entry:
497475
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
498476
%0 = load float, float* %arrayidx, align 8
@@ -542,8 +520,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
542520
; P9-AIX-32-LABEL: s2v_test_f3:
543521
; P9-AIX-32: # %bb.0: # %entry
544522
; P9-AIX-32-NEXT: slwi r4, r4, 2
545-
; P9-AIX-32-NEXT: lfsx f0, r3, r4
546-
; P9-AIX-32-NEXT: xscvdpspn v3, f0
523+
; P9-AIX-32-NEXT: lxsiwzx v3, r3, r4
547524
; P9-AIX-32-NEXT: vmrgow v2, v3, v2
548525
; P9-AIX-32-NEXT: blr
549526
;
@@ -557,8 +534,7 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
557534
; P8-AIX-32-LABEL: s2v_test_f3:
558535
; P8-AIX-32: # %bb.0: # %entry
559536
; P8-AIX-32-NEXT: slwi r4, r4, 2
560-
; P8-AIX-32-NEXT: lfsx f0, r3, r4
561-
; P8-AIX-32-NEXT: xscvdpspn v3, f0
537+
; P8-AIX-32-NEXT: lxsiwzx v3, r3, r4
562538
; P8-AIX-32-NEXT: vmrgow v2, v3, v2
563539
; P8-AIX-32-NEXT: blr
564540
entry:
@@ -601,33 +577,12 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
601577
; P8BE-NEXT: vmrgow v2, v3, v2
602578
; P8BE-NEXT: blr
603579
;
604-
; P9-AIX-64-LABEL: s2v_test_f4:
605-
; P9-AIX-64: # %bb.0: # %entry
606-
; P9-AIX-64-NEXT: addi r3, r3, 4
607-
; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
608-
; P9-AIX-64-NEXT: vmrgow v2, v3, v2
609-
; P9-AIX-64-NEXT: blr
610-
;
611-
; P9-AIX-32-LABEL: s2v_test_f4:
612-
; P9-AIX-32: # %bb.0: # %entry
613-
; P9-AIX-32-NEXT: lfs f0, 4(r3)
614-
; P9-AIX-32-NEXT: xscvdpspn v3, f0
615-
; P9-AIX-32-NEXT: vmrgow v2, v3, v2
616-
; P9-AIX-32-NEXT: blr
617-
;
618-
; P8-AIX-64-LABEL: s2v_test_f4:
619-
; P8-AIX-64: # %bb.0: # %entry
620-
; P8-AIX-64-NEXT: addi r3, r3, 4
621-
; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
622-
; P8-AIX-64-NEXT: vmrgow v2, v3, v2
623-
; P8-AIX-64-NEXT: blr
624-
;
625-
; P8-AIX-32-LABEL: s2v_test_f4:
626-
; P8-AIX-32: # %bb.0: # %entry
627-
; P8-AIX-32-NEXT: lfs f0, 4(r3)
628-
; P8-AIX-32-NEXT: xscvdpspn v3, f0
629-
; P8-AIX-32-NEXT: vmrgow v2, v3, v2
630-
; P8-AIX-32-NEXT: blr
580+
; AIX-LABEL: s2v_test_f4:
581+
; AIX: # %bb.0: # %entry
582+
; AIX-NEXT: addi r3, r3, 4
583+
; AIX-NEXT: lxsiwzx v3, 0, r3
584+
; AIX-NEXT: vmrgow v2, v3, v2
585+
; AIX-NEXT: blr
631586
entry:
632587
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
633588
%0 = load float, float* %arrayidx, align 8
@@ -663,31 +618,11 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr
663618
; P8BE-NEXT: vmrgow v2, v3, v2
664619
; P8BE-NEXT: blr
665620
;
666-
; P9-AIX-64-LABEL: s2v_test_f5:
667-
; P9-AIX-64: # %bb.0: # %entry
668-
; P9-AIX-64-NEXT: lxsiwzx v3, 0, r3
669-
; P9-AIX-64-NEXT: vmrgow v2, v3, v2
670-
; P9-AIX-64-NEXT: blr
671-
;
672-
; P9-AIX-32-LABEL: s2v_test_f5:
673-
; P9-AIX-32: # %bb.0: # %entry
674-
; P9-AIX-32-NEXT: lfs f0, 0(r3)
675-
; P9-AIX-32-NEXT: xscvdpspn v3, f0
676-
; P9-AIX-32-NEXT: vmrgow v2, v3, v2
677-
; P9-AIX-32-NEXT: blr
678-
;
679-
; P8-AIX-64-LABEL: s2v_test_f5:
680-
; P8-AIX-64: # %bb.0: # %entry
681-
; P8-AIX-64-NEXT: lxsiwzx v3, 0, r3
682-
; P8-AIX-64-NEXT: vmrgow v2, v3, v2
683-
; P8-AIX-64-NEXT: blr
684-
;
685-
; P8-AIX-32-LABEL: s2v_test_f5:
686-
; P8-AIX-32: # %bb.0: # %entry
687-
; P8-AIX-32-NEXT: lfs f0, 0(r3)
688-
; P8-AIX-32-NEXT: xscvdpspn v3, f0
689-
; P8-AIX-32-NEXT: vmrgow v2, v3, v2
690-
; P8-AIX-32-NEXT: blr
621+
; AIX-LABEL: s2v_test_f5:
622+
; AIX: # %bb.0: # %entry
623+
; AIX-NEXT: lxsiwzx v3, 0, r3
624+
; AIX-NEXT: vmrgow v2, v3, v2
625+
; AIX-NEXT: blr
691626
entry:
692627
%0 = load float, float* %ptr1, align 8
693628
%vecins = insertelement <2 x float> %vec, float %0, i32 0

0 commit comments

Comments
 (0)