|
5 | 5 | ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
|
6 | 6 | ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
|
7 | 7 | ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
|
8 |
| -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512 |
9 |
| -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512 |
10 |
| -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512 |
| 8 | +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512F |
| 9 | +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE |
| 10 | +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE |
11 | 11 |
|
12 | 12 | ;
|
13 | 13 | ; Half to Float
|
@@ -3156,6 +3156,49 @@ define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
|
3156 | 3156 | ; F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
|
3157 | 3157 | ; F16C-NEXT: addq $40, %rsp
|
3158 | 3158 | ; F16C-NEXT: retq
|
| 3159 | +; |
| 3160 | +; AVX512F-LABEL: cvt_2f64_to_2i16: |
| 3161 | +; AVX512F: # %bb.0: |
| 3162 | +; AVX512F-NEXT: subq $104, %rsp |
| 3163 | +; AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill |
| 3164 | +; AVX512F-NEXT: callq __truncdfhf2@PLT |
| 3165 | +; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0 |
| 3166 | +; AVX512F-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill |
| 3167 | +; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload |
| 3168 | +; AVX512F-NEXT: vzeroupper |
| 3169 | +; AVX512F-NEXT: callq __truncdfhf2@PLT |
| 3170 | +; AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| 3171 | +; AVX512F-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload |
| 3172 | +; AVX512F-NEXT: # xmm0 = mem[1,0] |
| 3173 | +; AVX512F-NEXT: callq __truncdfhf2@PLT |
| 3174 | +; AVX512F-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload |
| 3175 | +; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| 3176 | +; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [16,0,0,0] |
| 3177 | +; AVX512F-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload |
| 3178 | +; AVX512F-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 |
| 3179 | +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 |
| 3180 | +; AVX512F-NEXT: addq $104, %rsp |
| 3181 | +; AVX512F-NEXT: vzeroupper |
| 3182 | +; AVX512F-NEXT: retq |
| 3183 | +; |
| 3184 | +; AVX512-FASTLANE-LABEL: cvt_2f64_to_2i16: |
| 3185 | +; AVX512-FASTLANE: # %bb.0: |
| 3186 | +; AVX512-FASTLANE-NEXT: subq $40, %rsp |
| 3187 | +; AVX512-FASTLANE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| 3188 | +; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT |
| 3189 | +; AVX512-FASTLANE-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill |
| 3190 | +; AVX512-FASTLANE-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload |
| 3191 | +; AVX512-FASTLANE-NEXT: # xmm0 = mem[1,0] |
| 3192 | +; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT |
| 3193 | +; AVX512-FASTLANE-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload |
| 3194 | +; AVX512-FASTLANE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] |
| 3195 | +; AVX512-FASTLANE-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill |
| 3196 | +; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT |
| 3197 | +; AVX512-FASTLANE-NEXT: vpbroadcastw %xmm0, %xmm1 |
| 3198 | +; AVX512-FASTLANE-NEXT: vmovaps {{.*#+}} xmm0 = [4,0,0,0] |
| 3199 | +; AVX512-FASTLANE-NEXT: vpermi2ps (%rsp), %xmm1, %xmm0 # 16-byte Folded Reload |
| 3200 | +; AVX512-FASTLANE-NEXT: addq $40, %rsp |
| 3201 | +; AVX512-FASTLANE-NEXT: retq |
3159 | 3202 | %1 = fptrunc <2 x double> %a0 to <2 x half>
|
3160 | 3203 | %2 = bitcast <2 x half> %1 to <2 x i16>
|
3161 | 3204 | ret <2 x i16> %2
|
|
0 commit comments