@@ -146,54 +146,47 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
146
146
define <8 x i16 > @v8i16 (i32 %index , i32 %TC , <8 x i16 > %V1 , <8 x i16 > %V2 ) {
147
147
; CHECK-LABEL: v8i16:
148
148
; CHECK: @ %bb.0:
149
- ; CHECK-NEXT: vpush {d8, d9}
149
+ ; CHECK-NEXT: push {r4, lr}
150
+ ; CHECK-NEXT: sub sp, #16
150
151
; CHECK-NEXT: adr.w r12, .LCPI3_0
151
152
; CHECK-NEXT: vdup.32 q1, r1
152
153
; CHECK-NEXT: vldrw.u32 q0, [r12]
153
- ; CHECK-NEXT: vmov.i8 q2, #0x0
154
- ; CHECK-NEXT: vmov.i8 q3, #0xff
154
+ ; CHECK-NEXT: vmov.i8 q2, #0xff
155
+ ; CHECK-NEXT: mov r4, sp
156
+ ; CHECK-NEXT: adr r1, .LCPI3_1
155
157
; CHECK-NEXT: vqadd.u32 q0, q0, r0
156
158
; CHECK-NEXT: vcmp.u32 hi, q1, q0
157
- ; CHECK-NEXT: vpsel q4, q3, q2
158
- ; CHECK-NEXT: vmov r1, r12, d8
159
- ; CHECK-NEXT: vmov.16 q0[0], r1
160
- ; CHECK-NEXT: vmov.16 q0[1], r12
161
- ; CHECK-NEXT: vmov r1, r12, d9
162
- ; CHECK-NEXT: vmov.16 q0[2], r1
163
- ; CHECK-NEXT: adr r1, .LCPI3_1
164
- ; CHECK-NEXT: vldrw.u32 q4, [r1]
165
- ; CHECK-NEXT: vmov.16 q0[3], r12
166
- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
167
- ; CHECK-NEXT: vcmp.u32 hi, q1, q4
168
- ; CHECK-NEXT: vpsel q1, q3, q2
169
- ; CHECK-NEXT: vmov r0, r1, d2
170
- ; CHECK-NEXT: vmov.16 q0[4], r0
171
- ; CHECK-NEXT: vmov.16 q0[5], r1
172
- ; CHECK-NEXT: vmov r0, r1, d3
173
- ; CHECK-NEXT: vmov.16 q0[6], r0
174
- ; CHECK-NEXT: add r0, sp, #24
175
- ; CHECK-NEXT: vmov.16 q0[7], r1
159
+ ; CHECK-NEXT: vmov.i8 q0, #0x0
160
+ ; CHECK-NEXT: vpsel q3, q2, q0
161
+ ; CHECK-NEXT: vstrh.32 q3, [r4, #8]
162
+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
163
+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
164
+ ; CHECK-NEXT: add r0, sp, #32
165
+ ; CHECK-NEXT: vcmp.u32 hi, q1, q3
176
166
; CHECK-NEXT: vldrw.u32 q1, [r0]
177
- ; CHECK-NEXT: vcmp.i16 ne, q0, zr
178
- ; CHECK-NEXT: vldr d1, [sp, #16]
167
+ ; CHECK-NEXT: vpsel q0, q2, q0
168
+ ; CHECK-NEXT: vstrh.32 q0, [r4]
169
+ ; CHECK-NEXT: vldr d1, [sp, #24]
170
+ ; CHECK-NEXT: vldrw.u32 q2, [r4]
179
171
; CHECK-NEXT: vmov d0, r2, r3
172
+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
180
173
; CHECK-NEXT: vpsel q0, q0, q1
181
174
; CHECK-NEXT: vmov r0, r1, d0
182
175
; CHECK-NEXT: vmov r2, r3, d1
183
- ; CHECK-NEXT: vpop {d8, d9}
184
- ; CHECK-NEXT: bx lr
176
+ ; CHECK-NEXT: add sp, #16
177
+ ; CHECK-NEXT: pop {r4, pc}
185
178
; CHECK-NEXT: .p2align 4
186
179
; CHECK-NEXT: @ %bb.1:
187
180
; CHECK-NEXT: .LCPI3_0:
188
- ; CHECK-NEXT: .long 0 @ 0x0
189
- ; CHECK-NEXT: .long 1 @ 0x1
190
- ; CHECK-NEXT: .long 2 @ 0x2
191
- ; CHECK-NEXT: .long 3 @ 0x3
192
- ; CHECK-NEXT: .LCPI3_1:
193
181
; CHECK-NEXT: .long 4 @ 0x4
194
182
; CHECK-NEXT: .long 5 @ 0x5
195
183
; CHECK-NEXT: .long 6 @ 0x6
196
184
; CHECK-NEXT: .long 7 @ 0x7
185
+ ; CHECK-NEXT: .LCPI3_1:
186
+ ; CHECK-NEXT: .long 0 @ 0x0
187
+ ; CHECK-NEXT: .long 1 @ 0x1
188
+ ; CHECK-NEXT: .long 2 @ 0x2
189
+ ; CHECK-NEXT: .long 3 @ 0x3
197
190
%active.lane.mask = call <8 x i1 > @llvm.get.active.lane.mask.v8i1.i32 (i32 %index , i32 %TC )
198
191
%select = select <8 x i1 > %active.lane.mask , <8 x i16 > %V1 , <8 x i16 > %V2
199
192
ret <8 x i16 > %select
@@ -202,122 +195,79 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
202
195
define <16 x i8 > @v16i8 (i32 %index , i32 %TC , <16 x i8 > %V1 , <16 x i8 > %V2 ) {
203
196
; CHECK-LABEL: v16i8:
204
197
; CHECK: @ %bb.0:
205
- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
198
+ ; CHECK-NEXT: push {r4, r5, r7, lr}
199
+ ; CHECK-NEXT: sub sp, #48
206
200
; CHECK-NEXT: adr.w r12, .LCPI4_0
207
- ; CHECK-NEXT: vdup.32 q3 , r1
201
+ ; CHECK-NEXT: vdup.32 q2 , r1
208
202
; CHECK-NEXT: vldrw.u32 q0, [r12]
209
203
; CHECK-NEXT: vmov.i8 q1, #0xff
204
+ ; CHECK-NEXT: add r5, sp, #16
205
+ ; CHECK-NEXT: adr r1, .LCPI4_1
210
206
; CHECK-NEXT: vqadd.u32 q0, q0, r0
211
- ; CHECK-NEXT: vcmp.u32 hi, q3, q0
207
+ ; CHECK-NEXT: adr r4, .LCPI4_3
208
+ ; CHECK-NEXT: vcmp.u32 hi, q2, q0
212
209
; CHECK-NEXT: vmov.i8 q0, #0x0
213
- ; CHECK-NEXT: vpsel q4, q1, q0
214
- ; CHECK-NEXT: vmov r1, r12, d8
215
- ; CHECK-NEXT: vmov.16 q2[0], r1
216
- ; CHECK-NEXT: vmov.16 q2[1], r12
217
- ; CHECK-NEXT: vmov r1, r12, d9
218
- ; CHECK-NEXT: vmov.16 q2[2], r1
219
- ; CHECK-NEXT: adr r1, .LCPI4_1
220
- ; CHECK-NEXT: vldrw.u32 q4, [r1]
221
- ; CHECK-NEXT: vmov.16 q2[3], r12
222
- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
223
- ; CHECK-NEXT: vcmp.u32 hi, q3, q4
224
- ; CHECK-NEXT: vpsel q4, q1, q0
225
- ; CHECK-NEXT: vmov r1, r12, d8
226
- ; CHECK-NEXT: vmov.16 q2[4], r1
227
- ; CHECK-NEXT: vmov.16 q2[5], r12
228
- ; CHECK-NEXT: vmov r1, r12, d9
229
- ; CHECK-NEXT: vmov.16 q2[6], r1
230
- ; CHECK-NEXT: vmov.16 q2[7], r12
231
- ; CHECK-NEXT: vcmp.i16 ne, q2, zr
232
- ; CHECK-NEXT: vpsel q4, q1, q0
233
- ; CHECK-NEXT: vmov.u16 r1, q4[0]
234
- ; CHECK-NEXT: vmov.8 q2[0], r1
235
- ; CHECK-NEXT: vmov.u16 r1, q4[1]
236
- ; CHECK-NEXT: vmov.8 q2[1], r1
237
- ; CHECK-NEXT: vmov.u16 r1, q4[2]
238
- ; CHECK-NEXT: vmov.8 q2[2], r1
239
- ; CHECK-NEXT: vmov.u16 r1, q4[3]
240
- ; CHECK-NEXT: vmov.8 q2[3], r1
241
- ; CHECK-NEXT: vmov.u16 r1, q4[4]
242
- ; CHECK-NEXT: vmov.8 q2[4], r1
243
- ; CHECK-NEXT: vmov.u16 r1, q4[5]
244
- ; CHECK-NEXT: vmov.8 q2[5], r1
245
- ; CHECK-NEXT: vmov.u16 r1, q4[6]
246
- ; CHECK-NEXT: vmov.8 q2[6], r1
247
- ; CHECK-NEXT: vmov.u16 r1, q4[7]
248
- ; CHECK-NEXT: vmov.8 q2[7], r1
210
+ ; CHECK-NEXT: vpsel q3, q1, q0
211
+ ; CHECK-NEXT: vstrh.32 q3, [r5, #8]
212
+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
249
213
; CHECK-NEXT: adr r1, .LCPI4_2
250
- ; CHECK-NEXT: vldrw.u32 q4, [r1]
251
- ; CHECK-NEXT: vqadd.u32 q4, q4, r0
252
- ; CHECK-NEXT: vcmp.u32 hi, q3, q4
253
- ; CHECK-NEXT: vpsel q5, q1, q0
254
- ; CHECK-NEXT: vmov r1, r12, d10
255
- ; CHECK-NEXT: vmov.16 q4[0], r1
256
- ; CHECK-NEXT: vmov.16 q4[1], r12
257
- ; CHECK-NEXT: vmov r1, r12, d11
258
- ; CHECK-NEXT: vmov.16 q4[2], r1
259
- ; CHECK-NEXT: adr r1, .LCPI4_3
260
- ; CHECK-NEXT: vldrw.u32 q5, [r1]
261
- ; CHECK-NEXT: vmov.16 q4[3], r12
262
- ; CHECK-NEXT: vqadd.u32 q5, q5, r0
263
- ; CHECK-NEXT: vcmp.u32 hi, q3, q5
214
+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
215
+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
216
+ ; CHECK-NEXT: vpsel q3, q1, q0
217
+ ; CHECK-NEXT: vstrh.32 q3, [r5]
218
+ ; CHECK-NEXT: vldrw.u32 q3, [r1]
219
+ ; CHECK-NEXT: mov r1, sp
220
+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
221
+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
264
222
; CHECK-NEXT: vpsel q3, q1, q0
265
- ; CHECK-NEXT: vmov r0, r1, d6
266
- ; CHECK-NEXT: vmov.16 q4[4], r0
267
- ; CHECK-NEXT: vmov.16 q4[5], r1
268
- ; CHECK-NEXT: vmov r0, r1, d7
269
- ; CHECK-NEXT: vmov.16 q4[6], r0
270
- ; CHECK-NEXT: vmov.16 q4[7], r1
271
- ; CHECK-NEXT: vcmp.i16 ne, q4, zr
223
+ ; CHECK-NEXT: vstrh.32 q3, [r1, #8]
224
+ ; CHECK-NEXT: vldrw.u32 q3, [r4]
225
+ ; CHECK-NEXT: vqadd.u32 q3, q3, r0
226
+ ; CHECK-NEXT: add r0, sp, #32
227
+ ; CHECK-NEXT: vcmp.u32 hi, q2, q3
228
+ ; CHECK-NEXT: vpsel q2, q1, q0
229
+ ; CHECK-NEXT: vstrh.32 q2, [r1]
230
+ ; CHECK-NEXT: vldrw.u32 q2, [r5]
231
+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
232
+ ; CHECK-NEXT: vpsel q2, q1, q0
233
+ ; CHECK-NEXT: vstrb.16 q2, [r0, #8]
234
+ ; CHECK-NEXT: vldrw.u32 q2, [r1]
235
+ ; CHECK-NEXT: add r1, sp, #72
236
+ ; CHECK-NEXT: vcmp.i16 ne, q2, zr
272
237
; CHECK-NEXT: vpsel q0, q1, q0
273
- ; CHECK-NEXT: vmov.u16 r0, q0[0]
274
- ; CHECK-NEXT: vmov.8 q2[8], r0
275
- ; CHECK-NEXT: vmov.u16 r0, q0[1]
276
- ; CHECK-NEXT: vmov.8 q2[9], r0
277
- ; CHECK-NEXT: vmov.u16 r0, q0[2]
278
- ; CHECK-NEXT: vmov.8 q2[10], r0
279
- ; CHECK-NEXT: vmov.u16 r0, q0[3]
280
- ; CHECK-NEXT: vmov.8 q2[11], r0
281
- ; CHECK-NEXT: vmov.u16 r0, q0[4]
282
- ; CHECK-NEXT: vmov.8 q2[12], r0
283
- ; CHECK-NEXT: vmov.u16 r0, q0[5]
284
- ; CHECK-NEXT: vmov.8 q2[13], r0
285
- ; CHECK-NEXT: vmov.u16 r0, q0[6]
286
- ; CHECK-NEXT: vmov.8 q2[14], r0
287
- ; CHECK-NEXT: vmov.u16 r0, q0[7]
288
- ; CHECK-NEXT: vmov.8 q2[15], r0
289
- ; CHECK-NEXT: add r0, sp, #40
290
- ; CHECK-NEXT: vldr d1, [sp, #32]
291
- ; CHECK-NEXT: vldrw.u32 q1, [r0]
292
- ; CHECK-NEXT: vcmp.i8 ne, q2, zr
238
+ ; CHECK-NEXT: vldrw.u32 q1, [r1]
239
+ ; CHECK-NEXT: vstrb.16 q0, [r0]
240
+ ; CHECK-NEXT: vldr d1, [sp, #64]
241
+ ; CHECK-NEXT: vldrw.u32 q2, [r0]
293
242
; CHECK-NEXT: vmov d0, r2, r3
243
+ ; CHECK-NEXT: vcmp.i8 ne, q2, zr
294
244
; CHECK-NEXT: vpsel q0, q0, q1
295
245
; CHECK-NEXT: vmov r0, r1, d0
296
246
; CHECK-NEXT: vmov r2, r3, d1
297
- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
298
- ; CHECK-NEXT: bx lr
247
+ ; CHECK-NEXT: add sp, #48
248
+ ; CHECK-NEXT: pop {r4, r5, r7, pc}
299
249
; CHECK-NEXT: .p2align 4
300
250
; CHECK-NEXT: @ %bb.1:
301
251
; CHECK-NEXT: .LCPI4_0:
302
- ; CHECK-NEXT: .long 0 @ 0x0
303
- ; CHECK-NEXT: .long 1 @ 0x1
304
- ; CHECK-NEXT: .long 2 @ 0x2
305
- ; CHECK-NEXT: .long 3 @ 0x3
252
+ ; CHECK-NEXT: .long 12 @ 0xc
253
+ ; CHECK-NEXT: .long 13 @ 0xd
254
+ ; CHECK-NEXT: .long 14 @ 0xe
255
+ ; CHECK-NEXT: .long 15 @ 0xf
306
256
; CHECK-NEXT: .LCPI4_1:
307
- ; CHECK-NEXT: .long 4 @ 0x4
308
- ; CHECK-NEXT: .long 5 @ 0x5
309
- ; CHECK-NEXT: .long 6 @ 0x6
310
- ; CHECK-NEXT: .long 7 @ 0x7
311
- ; CHECK-NEXT: .LCPI4_2:
312
257
; CHECK-NEXT: .long 8 @ 0x8
313
258
; CHECK-NEXT: .long 9 @ 0x9
314
259
; CHECK-NEXT: .long 10 @ 0xa
315
260
; CHECK-NEXT: .long 11 @ 0xb
261
+ ; CHECK-NEXT: .LCPI4_2:
262
+ ; CHECK-NEXT: .long 4 @ 0x4
263
+ ; CHECK-NEXT: .long 5 @ 0x5
264
+ ; CHECK-NEXT: .long 6 @ 0x6
265
+ ; CHECK-NEXT: .long 7 @ 0x7
316
266
; CHECK-NEXT: .LCPI4_3:
317
- ; CHECK-NEXT: .long 12 @ 0xc
318
- ; CHECK-NEXT: .long 13 @ 0xd
319
- ; CHECK-NEXT: .long 14 @ 0xe
320
- ; CHECK-NEXT: .long 15 @ 0xf
267
+ ; CHECK-NEXT: .long 0 @ 0x0
268
+ ; CHECK-NEXT: .long 1 @ 0x1
269
+ ; CHECK-NEXT: .long 2 @ 0x2
270
+ ; CHECK-NEXT: .long 3 @ 0x3
321
271
%active.lane.mask = call <16 x i1 > @llvm.get.active.lane.mask.v16i1.i32 (i32 %index , i32 %TC )
322
272
%select = select <16 x i1 > %active.lane.mask , <16 x i8 > %V1 , <16 x i8 > %V2
323
273
ret <16 x i8 > %select
0 commit comments