Skip to content

Commit d8ad1ee

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (7/11) (#116833)
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics. This patch adds support for emitting the zeroing forms of certain `FLOGB` instructions.
1 parent e21b804 commit d8ad1ee

File tree

3 files changed

+264
-2
lines changed

3 files changed

+264
-2
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4280,7 +4280,7 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
42804280
defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>;
42814281
defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>;
42824282
// Signed integer base 2 logarithm of fp value, zeroing predicate
4283-
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
4283+
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb", int_aarch64_sve_flogb>;
42844284

42854285
// SVE2 integer unary operations, zeroing predicate
42864286
def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3332,10 +3332,14 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperato
33323332
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
33333333
}
33343334

3335-
multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
3335+
multiclass sve_fp_z2op_p_zd_d_flogb<string asm, SDPatternOperator op> {
33363336
def _H : sve_fp_z2op_p_zd<0b0011001, asm, ZPR16, ZPR16>;
33373337
def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>;
33383338
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
3339+
3340+
defm : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
3341+
defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
3342+
defm : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
33393343
}
33403344

33413345
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve2 < %s | FileCheck %s
3+
; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
4+
5+
; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
6+
; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
7+
8+
target triple = "aarch64-linux"
9+
10+
define <vscale x 8 x i16> @test_svlogb_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
11+
; CHECK-LABEL: test_svlogb_f16_x_1:
12+
; CHECK: // %bb.0: // %entry
13+
; CHECK-NEXT: flogb z0.h, p0/m, z0.h
14+
; CHECK-NEXT: ret
15+
;
16+
; CHECK-2p2-LABEL: test_svlogb_f16_x_1:
17+
; CHECK-2p2: // %bb.0: // %entry
18+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z0.h
19+
; CHECK-2p2-NEXT: ret
20+
entry:
21+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
22+
ret <vscale x 8 x i16> %0
23+
}
24+
25+
define <vscale x 8 x i16> @test_svlogb_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
26+
; CHECK-LABEL: test_svlogb_f16_x_2:
27+
; CHECK: // %bb.0: // %entry
28+
; CHECK-NEXT: flogb z0.h, p0/m, z1.h
29+
; CHECK-NEXT: ret
30+
;
31+
; CHECK-2p2-LABEL: test_svlogb_f16_x_2:
32+
; CHECK-2p2: // %bb.0: // %entry
33+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
34+
; CHECK-2p2-NEXT: ret
35+
entry:
36+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
37+
ret <vscale x 8 x i16> %0
38+
}
39+
40+
define <vscale x 8 x i16> @test_svlogb_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
41+
; CHECK-LABEL: test_svlogb_f16_z:
42+
; CHECK: // %bb.0: // %entry
43+
; CHECK-NEXT: mov z0.h, #0 // =0x0
44+
; CHECK-NEXT: flogb z0.h, p0/m, z1.h
45+
; CHECK-NEXT: ret
46+
;
47+
; CHECK-2p2-LABEL: test_svlogb_f16_z:
48+
; CHECK-2p2: // %bb.0: // %entry
49+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
50+
; CHECK-2p2-NEXT: ret
51+
entry:
52+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
53+
ret <vscale x 8 x i16> %0
54+
}
55+
56+
define <vscale x 4 x i32> @test_svlogb_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
57+
; CHECK-LABEL: test_svlogb_f32_x_1:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: flogb z0.s, p0/m, z0.s
60+
; CHECK-NEXT: ret
61+
;
62+
; CHECK-2p2-LABEL: test_svlogb_f32_x_1:
63+
; CHECK-2p2: // %bb.0: // %entry
64+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z0.s
65+
; CHECK-2p2-NEXT: ret
66+
entry:
67+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
68+
ret <vscale x 4 x i32> %0
69+
}
70+
71+
define <vscale x 4 x i32> @test_svlogb_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
72+
; CHECK-LABEL: test_svlogb_f32_x_2:
73+
; CHECK: // %bb.0: // %entry
74+
; CHECK-NEXT: flogb z0.s, p0/m, z1.s
75+
; CHECK-NEXT: ret
76+
;
77+
; CHECK-2p2-LABEL: test_svlogb_f32_x_2:
78+
; CHECK-2p2: // %bb.0: // %entry
79+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
80+
; CHECK-2p2-NEXT: ret
81+
entry:
82+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
83+
ret <vscale x 4 x i32> %0
84+
}
85+
86+
define <vscale x 4 x i32> @test_svlogb_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
87+
; CHECK-LABEL: test_svlogb_f32_z:
88+
; CHECK: // %bb.0: // %entry
89+
; CHECK-NEXT: mov z0.s, #0 // =0x0
90+
; CHECK-NEXT: flogb z0.s, p0/m, z1.s
91+
; CHECK-NEXT: ret
92+
;
93+
; CHECK-2p2-LABEL: test_svlogb_f32_z:
94+
; CHECK-2p2: // %bb.0: // %entry
95+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
96+
; CHECK-2p2-NEXT: ret
97+
entry:
98+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
99+
ret <vscale x 4 x i32> %0
100+
}
101+
102+
define <vscale x 2 x i64> @test_svlogb_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
103+
; CHECK-LABEL: test_svlogb_f64_x_1:
104+
; CHECK: // %bb.0: // %entry
105+
; CHECK-NEXT: flogb z0.d, p0/m, z0.d
106+
; CHECK-NEXT: ret
107+
;
108+
; CHECK-2p2-LABEL: test_svlogb_f64_x_1:
109+
; CHECK-2p2: // %bb.0: // %entry
110+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z0.d
111+
; CHECK-2p2-NEXT: ret
112+
entry:
113+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
114+
ret <vscale x 2 x i64> %0
115+
}
116+
117+
define <vscale x 2 x i64> @test_svlogb_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
118+
; CHECK-LABEL: test_svlogb_f64_x_2:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: flogb z0.d, p0/m, z1.d
121+
; CHECK-NEXT: ret
122+
;
123+
; CHECK-2p2-LABEL: test_svlogb_f64_x_2:
124+
; CHECK-2p2: // %bb.0: // %entry
125+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
126+
; CHECK-2p2-NEXT: ret
127+
entry:
128+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
129+
ret <vscale x 2 x i64> %0
130+
}
131+
132+
define <vscale x 2 x i64> @test_svlogb_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
133+
; CHECK-LABEL: test_svlogb_f64_z:
134+
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: mov z0.d, #0 // =0x0
136+
; CHECK-NEXT: flogb z0.d, p0/m, z1.d
137+
; CHECK-NEXT: ret
138+
;
139+
; CHECK-2p2-LABEL: test_svlogb_f64_z:
140+
; CHECK-2p2: // %bb.0: // %entry
141+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
142+
; CHECK-2p2-NEXT: ret
143+
entry:
144+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
145+
ret <vscale x 2 x i64> %0
146+
}
147+
148+
define <vscale x 8 x i16> @test_svlogb_nxv8f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
149+
; CHECK-LABEL: test_svlogb_nxv8f16_ptrue_u:
150+
; CHECK: // %bb.0: // %entry
151+
; CHECK-NEXT: ptrue p0.h
152+
; CHECK-NEXT: flogb z0.h, p0/m, z1.h
153+
; CHECK-NEXT: ret
154+
;
155+
; CHECK-2p2-LABEL: test_svlogb_nxv8f16_ptrue_u:
156+
; CHECK-2p2: // %bb.0: // %entry
157+
; CHECK-2p2-NEXT: ptrue p0.h
158+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
159+
; CHECK-2p2-NEXT: ret
160+
entry:
161+
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
162+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
163+
ret <vscale x 8 x i16> %0
164+
}
165+
166+
define <vscale x 8 x i16> @test_svlogb_nxv8f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
167+
; CHECK-LABEL: test_svlogb_nxv8f16_ptrue:
168+
; CHECK: // %bb.0: // %entry
169+
; CHECK-NEXT: mov z0.d, z1.d
170+
; CHECK-NEXT: ptrue p0.h
171+
; CHECK-NEXT: flogb z0.h, p0/m, z2.h
172+
; CHECK-NEXT: ret
173+
;
174+
; CHECK-2p2-LABEL: test_svlogb_nxv8f16_ptrue:
175+
; CHECK-2p2: // %bb.0: // %entry
176+
; CHECK-2p2-NEXT: ptrue p0.h
177+
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z2.h
178+
; CHECK-2p2-NEXT: ret
179+
entry:
180+
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
181+
%0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
182+
ret <vscale x 8 x i16> %0
183+
}
184+
185+
define <vscale x 4 x i32> @test_svlogb_nxv4f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
186+
; CHECK-LABEL: test_svlogb_nxv4f32_ptrue_u:
187+
; CHECK: // %bb.0: // %entry
188+
; CHECK-NEXT: ptrue p0.s
189+
; CHECK-NEXT: flogb z0.s, p0/m, z1.s
190+
; CHECK-NEXT: ret
191+
;
192+
; CHECK-2p2-LABEL: test_svlogb_nxv4f32_ptrue_u:
193+
; CHECK-2p2: // %bb.0: // %entry
194+
; CHECK-2p2-NEXT: ptrue p0.s
195+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
196+
; CHECK-2p2-NEXT: ret
197+
entry:
198+
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
199+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
200+
ret <vscale x 4 x i32> %0
201+
}
202+
203+
define <vscale x 4 x i32> @test_svlogb_nxv4f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
204+
; CHECK-LABEL: test_svlogb_nxv4f32_ptrue:
205+
; CHECK: // %bb.0: // %entry
206+
; CHECK-NEXT: mov z0.d, z1.d
207+
; CHECK-NEXT: ptrue p0.s
208+
; CHECK-NEXT: flogb z0.s, p0/m, z2.s
209+
; CHECK-NEXT: ret
210+
;
211+
; CHECK-2p2-LABEL: test_svlogb_nxv4f32_ptrue:
212+
; CHECK-2p2: // %bb.0: // %entry
213+
; CHECK-2p2-NEXT: ptrue p0.s
214+
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z2.s
215+
; CHECK-2p2-NEXT: ret
216+
entry:
217+
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
218+
%0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
219+
ret <vscale x 4 x i32> %0
220+
}
221+
222+
define <vscale x 2 x i64> @test_svlogb_nxv2f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
223+
; CHECK-LABEL: test_svlogb_nxv2f64_ptrue_u:
224+
; CHECK: // %bb.0: // %entry
225+
; CHECK-NEXT: ptrue p0.d
226+
; CHECK-NEXT: flogb z0.d, p0/m, z1.d
227+
; CHECK-NEXT: ret
228+
;
229+
; CHECK-2p2-LABEL: test_svlogb_nxv2f64_ptrue_u:
230+
; CHECK-2p2: // %bb.0: // %entry
231+
; CHECK-2p2-NEXT: ptrue p0.d
232+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
233+
; CHECK-2p2-NEXT: ret
234+
entry:
235+
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
236+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
237+
ret <vscale x 2 x i64> %0
238+
}
239+
240+
define <vscale x 2 x i64> @test_svlogb_nxv2f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
241+
; CHECK-LABEL: test_svlogb_nxv2f64_ptrue:
242+
; CHECK: // %bb.0: // %entry
243+
; CHECK-NEXT: mov z0.d, z1.d
244+
; CHECK-NEXT: ptrue p0.d
245+
; CHECK-NEXT: flogb z0.d, p0/m, z2.d
246+
; CHECK-NEXT: ret
247+
;
248+
; CHECK-2p2-LABEL: test_svlogb_nxv2f64_ptrue:
249+
; CHECK-2p2: // %bb.0: // %entry
250+
; CHECK-2p2-NEXT: ptrue p0.d
251+
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z2.d
252+
; CHECK-2p2-NEXT: ret
253+
entry:
254+
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
255+
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
256+
ret <vscale x 2 x i64> %0
257+
}
258+

0 commit comments

Comments
 (0)