Skip to content

Commit 171a3a6

Browse files
authored
[VP][NFC] Add 32-bit test for VP (#68582)
1 parent 85feb93 commit 171a3a6

File tree

2 files changed

+445
-0
lines changed

2 files changed

+445
-0
lines changed

llvm/test/CodeGen/X86/expand-vp-fp-intrinsics.ll

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
23
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE
34
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
45
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
56
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
67

78
define void @vp_fadd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
9+
; X86-LABEL: vp_fadd_v4f32:
10+
; X86: # %bb.0:
11+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12+
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
13+
; X86-NEXT: vmovaps %xmm0, (%eax)
14+
; X86-NEXT: retl
15+
;
816
; SSE-LABEL: vp_fadd_v4f32:
917
; SSE: # %bb.0:
1018
; SSE-NEXT: addps %xmm1, %xmm0
@@ -23,6 +31,13 @@ define void @vp_fadd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
2331
declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
2432

2533
define void @vp_fsub_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
34+
; X86-LABEL: vp_fsub_v4f32:
35+
; X86: # %bb.0:
36+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
37+
; X86-NEXT: vsubps %xmm1, %xmm0, %xmm0
38+
; X86-NEXT: vmovaps %xmm0, (%eax)
39+
; X86-NEXT: retl
40+
;
2641
; SSE-LABEL: vp_fsub_v4f32:
2742
; SSE: # %bb.0:
2843
; SSE-NEXT: subps %xmm1, %xmm0
@@ -41,6 +56,13 @@ define void @vp_fsub_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
4156
declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
4257

4358
define void @vp_fmul_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
59+
; X86-LABEL: vp_fmul_v4f32:
60+
; X86: # %bb.0:
61+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
62+
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
63+
; X86-NEXT: vmovaps %xmm0, (%eax)
64+
; X86-NEXT: retl
65+
;
4466
; SSE-LABEL: vp_fmul_v4f32:
4567
; SSE: # %bb.0:
4668
; SSE-NEXT: mulps %xmm1, %xmm0
@@ -59,6 +81,13 @@ define void @vp_fmul_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
5981
declare <4 x float> @llvm.vp.fmul.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
6082

6183
define void @vp_fdiv_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
84+
; X86-LABEL: vp_fdiv_v4f32:
85+
; X86: # %bb.0:
86+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
87+
; X86-NEXT: vdivps %xmm1, %xmm0, %xmm0
88+
; X86-NEXT: vmovaps %xmm0, (%eax)
89+
; X86-NEXT: retl
90+
;
6291
; SSE-LABEL: vp_fdiv_v4f32:
6392
; SSE: # %bb.0:
6493
; SSE-NEXT: divps %xmm1, %xmm0
@@ -77,6 +106,48 @@ define void @vp_fdiv_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
77106
declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
78107

79108
define void @vp_frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
109+
; X86-LABEL: vp_frem_v4f32:
110+
; X86: # %bb.0:
111+
; X86-NEXT: pushl %esi
112+
; X86-NEXT: subl $80, %esp
113+
; X86-NEXT: vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
114+
; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
115+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
116+
; X86-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp)
117+
; X86-NEXT: vextractps $2, %xmm0, (%esp)
118+
; X86-NEXT: calll fmodf
119+
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
120+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
121+
; X86-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
122+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
123+
; X86-NEXT: vextractps $1, %xmm0, (%esp)
124+
; X86-NEXT: calll fmodf
125+
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
126+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
127+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
128+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
129+
; X86-NEXT: vmovss %xmm0, (%esp)
130+
; X86-NEXT: calll fmodf
131+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
132+
; X86-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp)
133+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
134+
; X86-NEXT: vextractps $3, %xmm0, (%esp)
135+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
136+
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
137+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
138+
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
139+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
140+
; X86-NEXT: calll fmodf
141+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
142+
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
143+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
144+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
145+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
146+
; X86-NEXT: vmovaps %xmm0, (%esi)
147+
; X86-NEXT: addl $80, %esp
148+
; X86-NEXT: popl %esi
149+
; X86-NEXT: retl
150+
;
80151
; SSE-LABEL: vp_frem_v4f32:
81152
; SSE: # %bb.0:
82153
; SSE-NEXT: pushq %rbx
@@ -157,6 +228,13 @@ define void @vp_frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
157228
declare <4 x float> @llvm.vp.frem.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
158229

159230
define void @vp_fabs_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
231+
; X86-LABEL: vp_fabs_v4f32:
232+
; X86: # %bb.0:
233+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
234+
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
235+
; X86-NEXT: vmovaps %xmm0, (%eax)
236+
; X86-NEXT: retl
237+
;
160238
; SSE-LABEL: vp_fabs_v4f32:
161239
; SSE: # %bb.0:
162240
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -188,6 +266,13 @@ define void @vp_fabs_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
188266
declare <4 x float> @llvm.vp.fabs.v4f32(<4 x float>, <4 x i1>, i32)
189267

190268
define void @vp_sqrt_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
269+
; X86-LABEL: vp_sqrt_v4f32:
270+
; X86: # %bb.0:
271+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
272+
; X86-NEXT: vsqrtps %xmm0, %xmm0
273+
; X86-NEXT: vmovaps %xmm0, (%eax)
274+
; X86-NEXT: retl
275+
;
191276
; SSE-LABEL: vp_sqrt_v4f32:
192277
; SSE: # %bb.0:
193278
; SSE-NEXT: sqrtps %xmm0, %xmm0
@@ -206,6 +291,13 @@ define void @vp_sqrt_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
206291
declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32)
207292

208293
define void @vp_fneg_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
294+
; X86-LABEL: vp_fneg_v4f32:
295+
; X86: # %bb.0:
296+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
297+
; X86-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
298+
; X86-NEXT: vmovaps %xmm0, (%eax)
299+
; X86-NEXT: retl
300+
;
209301
; SSE-LABEL: vp_fneg_v4f32:
210302
; SSE: # %bb.0:
211303
; SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -237,6 +329,55 @@ define void @vp_fneg_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp)
237329
declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32)
238330

239331
define void @vp_fma_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
332+
; X86-LABEL: vp_fma_v4f32:
333+
; X86: # %bb.0:
334+
; X86-NEXT: pushl %esi
335+
; X86-NEXT: subl $84, %esp
336+
; X86-NEXT: vmovupd %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
337+
; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
338+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
339+
; X86-NEXT: vextractps $2, %xmm0, (%esp)
340+
; X86-NEXT: vshufpd {{.*#+}} xmm0 = xmm1[1,0]
341+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
342+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
343+
; X86-NEXT: calll fmaf
344+
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
345+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
346+
; X86-NEXT: vextractps $1, %xmm0, (%esp)
347+
; X86-NEXT: vmovshdup {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
348+
; X86-NEXT: # xmm0 = mem[1,1,3,3]
349+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
350+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
351+
; X86-NEXT: calll fmaf
352+
; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
353+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
354+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
355+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
356+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
357+
; X86-NEXT: vmovss %xmm0, (%esp)
358+
; X86-NEXT: calll fmaf
359+
; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
360+
; X86-NEXT: vextractps $3, %xmm0, (%esp)
361+
; X86-NEXT: vpermilps $255, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
362+
; X86-NEXT: # xmm0 = mem[3,3,3,3]
363+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
364+
; X86-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
365+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
366+
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
367+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
368+
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
369+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
370+
; X86-NEXT: calll fmaf
371+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
372+
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
373+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
374+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
375+
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
376+
; X86-NEXT: vmovaps %xmm0, (%esi)
377+
; X86-NEXT: addl $84, %esp
378+
; X86-NEXT: popl %esi
379+
; X86-NEXT: retl
380+
;
240381
; SSE-LABEL: vp_fma_v4f32:
241382
; SSE: # %bb.0:
242383
; SSE-NEXT: pushq %rbx
@@ -372,6 +513,14 @@ define void @vp_fma_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) no
372513
declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
373514

374515
define void @vp_fmuladd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
516+
; X86-LABEL: vp_fmuladd_v4f32:
517+
; X86: # %bb.0:
518+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
519+
; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0
520+
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
521+
; X86-NEXT: vmovaps %xmm0, (%eax)
522+
; X86-NEXT: retl
523+
;
375524
; SSE-LABEL: vp_fmuladd_v4f32:
376525
; SSE: # %bb.0:
377526
; SSE-NEXT: mulps %xmm1, %xmm0
@@ -406,6 +555,13 @@ declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>
406555

407556
declare <4 x float> @llvm.vp.maxnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
408557
define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
558+
; X86-LABEL: vfmax_vv_v4f32:
559+
; X86: # %bb.0:
560+
; X86-NEXT: vmaxps %xmm0, %xmm1, %xmm2
561+
; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
562+
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
563+
; X86-NEXT: retl
564+
;
409565
; SSE-LABEL: vfmax_vv_v4f32:
410566
; SSE: # %bb.0:
411567
; SSE-NEXT: movaps %xmm1, %xmm2
@@ -443,6 +599,13 @@ define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m
443599

444600
declare <8 x float> @llvm.vp.maxnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
445601
define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
602+
; X86-LABEL: vfmax_vv_v8f32:
603+
; X86: # %bb.0:
604+
; X86-NEXT: vmaxps %ymm0, %ymm1, %ymm2
605+
; X86-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0
606+
; X86-NEXT: vblendvps %ymm0, %ymm1, %ymm2, %ymm0
607+
; X86-NEXT: retl
608+
;
446609
; SSE-LABEL: vfmax_vv_v8f32:
447610
; SSE: # %bb.0:
448611
; SSE-NEXT: movaps %xmm2, %xmm4
@@ -486,6 +649,13 @@ define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m
486649

487650
declare <4 x float> @llvm.vp.minnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
488651
define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
652+
; X86-LABEL: vfmin_vv_v4f32:
653+
; X86: # %bb.0:
654+
; X86-NEXT: vminps %xmm0, %xmm1, %xmm2
655+
; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
656+
; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
657+
; X86-NEXT: retl
658+
;
489659
; SSE-LABEL: vfmin_vv_v4f32:
490660
; SSE: # %bb.0:
491661
; SSE-NEXT: movaps %xmm1, %xmm2
@@ -523,6 +693,13 @@ define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m
523693

524694
declare <8 x float> @llvm.vp.minnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
525695
define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
696+
; X86-LABEL: vfmin_vv_v8f32:
697+
; X86: # %bb.0:
698+
; X86-NEXT: vminps %ymm0, %ymm1, %ymm2
699+
; X86-NEXT: vcmpunordps %ymm0, %ymm0, %ymm0
700+
; X86-NEXT: vblendvps %ymm0, %ymm1, %ymm2, %ymm0
701+
; X86-NEXT: retl
702+
;
526703
; SSE-LABEL: vfmin_vv_v8f32:
527704
; SSE: # %bb.0:
528705
; SSE-NEXT: movaps %xmm2, %xmm4

0 commit comments

Comments
 (0)