Skip to content

Commit fef8113

Browse files
alban-bridonneaupeterwaller-arm
authored andcommitted
[SVE] Optimize new cases for lowerConvertToSVBool
Converts to SVBool are already considered as a nop, if they are converting an operand from a ptrue or a cmp, because they zero the extra predicate lanes by construction. This patch adds 2 similar cases: - The wide cmp, which were not directly recognized by the test for other forms of cmp - Splats of 1, which will be generated as ptrue, and as such will also zero the extra predicate lines. Reviewed By: paulwalker-arm, peterwaller-arm Differential Revision: https://reviews.llvm.org/D124908
1 parent a48adc5 commit fef8113

14 files changed

+404
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -4181,10 +4181,26 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
41814181
case AArch64ISD::SETCC_MERGE_ZERO:
41824182
return Reinterpret;
41834183
case ISD::INTRINSIC_WO_CHAIN:
4184-
if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
4184+
switch (InOp.getConstantOperandVal(0)) {
4185+
case Intrinsic::aarch64_sve_ptrue:
4186+
case Intrinsic::aarch64_sve_cmpeq_wide:
4187+
case Intrinsic::aarch64_sve_cmpne_wide:
4188+
case Intrinsic::aarch64_sve_cmpge_wide:
4189+
case Intrinsic::aarch64_sve_cmpgt_wide:
4190+
case Intrinsic::aarch64_sve_cmplt_wide:
4191+
case Intrinsic::aarch64_sve_cmple_wide:
4192+
case Intrinsic::aarch64_sve_cmphs_wide:
4193+
case Intrinsic::aarch64_sve_cmphi_wide:
4194+
case Intrinsic::aarch64_sve_cmplo_wide:
4195+
case Intrinsic::aarch64_sve_cmpls_wide:
41854196
return Reinterpret;
4197+
}
41864198
}
41874199

4200+
// Splat vectors of 1 will generate ptrue instructions
4201+
if (ISD::isConstantSplatVectorAllOnes(InOp.getNode()))
4202+
return Reinterpret;
4203+
41884204
// Otherwise, zero the newly introduced lanes.
41894205
SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
41904206
SDValue MaskReinterpret =
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+
4+
; This test should belong in sve-intrinsics-reinterpret.ll, but uses types
5+
; that are invalid with sve-streaming
6+
7+
define <vscale x 16 x i1> @reinterpret_bool_from_splat() {
8+
; CHECK-LABEL: reinterpret_bool_from_splat:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: ptrue p0.d
11+
; CHECK-NEXT: ret
12+
%ins = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
13+
%splat = shufflevector <vscale x 2 x i1> %ins, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
14+
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %splat)
15+
ret <vscale x 16 x i1> %out
16+
}
17+
18+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
19+

llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll

+15
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,22 @@ define <vscale x 16 x i1> @reinterpret_cmpgt(<vscale x 8 x i1> %p, <vscale x 8 x
102102
ret <vscale x 16 x i1> %2
103103
}
104104

105+
; The first reinterpret should prevent the second one from being simplified as a nop
106+
define <vscale x 16 x i1> @chained_reinterpret() {
107+
; CHECK-LABEL: chained_reinterpret:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: ptrue p0.b
110+
; CHECK-NEXT: ptrue p1.d
111+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
112+
; CHECK-NEXT: ret
113+
%in = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
114+
%cast2 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %in)
115+
%out = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %cast2)
116+
ret <vscale x 16 x i1> %out
117+
}
118+
105119
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
120+
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
106121
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
107122

108123
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv16i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll

+34
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,43 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
4646
ret i32 %conv
4747
}
4848

49+
define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
50+
; CHECK-LABEL: cmpeq_wide_nxv8i16:
51+
; CHECK: cmpeq p0.h, p0/z, z0.h, z1.d
52+
; CHECK-NEXT: cset w0, ne
53+
; CHECK-NEXT: ret
54+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
55+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
56+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
57+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
58+
%conv = zext i1 %4 to i32
59+
ret i32 %conv
60+
}
61+
62+
define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
63+
; CHECK-LABEL: cmpeq_wide_nxv4i32:
64+
; CHECK: cmpeq p0.s, p0/z, z0.s, z1.d
65+
; CHECK-NEXT: cset w0, ne
66+
; CHECK-NEXT: ret
67+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
68+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
69+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
70+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
71+
%conv = zext i1 %4 to i32
72+
ret i32 %conv
73+
}
74+
4975
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
5076
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
77+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
78+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
5179

5280
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
5381

5482
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
83+
84+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
85+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
86+
87+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
88+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll

+34
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,43 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
4646
ret i32 %conv
4747
}
4848

49+
define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
50+
; CHECK-LABEL: cmpge_wide_nxv8i16:
51+
; CHECK: cmpge p0.h, p0/z, z0.h, z1.d
52+
; CHECK-NEXT: cset w0, ne
53+
; CHECK-NEXT: ret
54+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
55+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
56+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
57+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
58+
%conv = zext i1 %4 to i32
59+
ret i32 %conv
60+
}
61+
62+
define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
63+
; CHECK-LABEL: cmpge_wide_nxv4i32:
64+
; CHECK: cmpge p0.s, p0/z, z0.s, z1.d
65+
; CHECK-NEXT: cset w0, ne
66+
; CHECK-NEXT: ret
67+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
68+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
69+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
70+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
71+
%conv = zext i1 %4 to i32
72+
ret i32 %conv
73+
}
74+
4975
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
5076
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
77+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
78+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
5179

5280
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
5381

5482
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
83+
84+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
85+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
86+
87+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
88+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll

+34
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,43 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
4646
ret i32 %conv
4747
}
4848

49+
define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
50+
; CHECK-LABEL: cmpgt_wide_nxv8i16:
51+
; CHECK: cmpgt p0.h, p0/z, z0.h, z1.d
52+
; CHECK-NEXT: cset w0, ne
53+
; CHECK-NEXT: ret
54+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
55+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
56+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
57+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
58+
%conv = zext i1 %4 to i32
59+
ret i32 %conv
60+
}
61+
62+
define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
63+
; CHECK-LABEL: cmpgt_wide_nxv4i32:
64+
; CHECK: cmpgt p0.s, p0/z, z0.s, z1.d
65+
; CHECK-NEXT: cset w0, ne
66+
; CHECK-NEXT: ret
67+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
68+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
69+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
70+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
71+
%conv = zext i1 %4 to i32
72+
ret i32 %conv
73+
}
74+
4975
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
5076
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
77+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
78+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
5179

5280
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
5381

5482
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
83+
84+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
85+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
86+
87+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
88+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll

+34
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,43 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
4646
ret i32 %conv
4747
}
4848

49+
define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
50+
; CHECK-LABEL: cmphi_wide_nxv8i16:
51+
; CHECK: cmphi p0.h, p0/z, z0.h, z1.d
52+
; CHECK-NEXT: cset w0, ne
53+
; CHECK-NEXT: ret
54+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
55+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
56+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
57+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
58+
%conv = zext i1 %4 to i32
59+
ret i32 %conv
60+
}
61+
62+
define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
63+
; CHECK-LABEL: cmphi_wide_nxv4i32:
64+
; CHECK: cmphi p0.s, p0/z, z0.s, z1.d
65+
; CHECK-NEXT: cset w0, ne
66+
; CHECK-NEXT: ret
67+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
68+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
69+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
70+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
71+
%conv = zext i1 %4 to i32
72+
ret i32 %conv
73+
}
74+
4975
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
5076
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
77+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
78+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
5179

5280
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
5381

5482
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
83+
84+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
85+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
86+
87+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
88+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll

+34
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,43 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
4646
ret i32 %conv
4747
}
4848

49+
define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
50+
; CHECK-LABEL: cmphs_wide_nxv8i16:
51+
; CHECK: cmphs p0.h, p0/z, z0.h, z1.d
52+
; CHECK-NEXT: cset w0, ne
53+
; CHECK-NEXT: ret
54+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
55+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
56+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
57+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
58+
%conv = zext i1 %4 to i32
59+
ret i32 %conv
60+
}
61+
62+
define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
63+
; CHECK-LABEL: cmphs_wide_nxv4i32:
64+
; CHECK: cmphs p0.s, p0/z, z0.s, z1.d
65+
; CHECK-NEXT: cset w0, ne
66+
; CHECK-NEXT: ret
67+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
68+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
69+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
70+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
71+
%conv = zext i1 %4 to i32
72+
ret i32 %conv
73+
}
74+
4975
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
5076
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
77+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
78+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
5179

5280
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
5381

5482
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
83+
84+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
85+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
86+
87+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
88+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll

+34
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,43 @@ define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
3131
ret i32 %conv
3232
}
3333

34+
define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
35+
; CHECK-LABEL: cmple_wide_nxv8i16:
36+
; CHECK: cmple p0.h, p0/z, z0.h, z1.d
37+
; CHECK-NEXT: cset w0, ne
38+
; CHECK-NEXT: ret
39+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
40+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b)
41+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
42+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
43+
%conv = zext i1 %4 to i32
44+
ret i32 %conv
45+
}
46+
47+
define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
48+
; CHECK-LABEL: cmple_wide_nxv4i32:
49+
; CHECK: cmple p0.s, p0/z, z0.s, z1.d
50+
; CHECK-NEXT: cset w0, ne
51+
; CHECK-NEXT: ret
52+
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
53+
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b)
54+
%3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
55+
%4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %3)
56+
%conv = zext i1 %4 to i32
57+
ret i32 %conv
58+
}
59+
3460
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
3561
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
62+
declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
63+
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
3664

3765
declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
3866

3967
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
68+
69+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
70+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
71+
72+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
73+
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)

0 commit comments

Comments
 (0)