Skip to content

Commit dfc03c4

Browse files
committed
[X86] vector-half-conversions.ll - regenerate with AVX512 slow/fast lane shuffles
Adds missing check prefixes
1 parent 4a8b43b commit dfc03c4

File tree

1 file changed

+46
-3
lines changed

1 file changed

+46
-3
lines changed

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
66
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
77
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+f16c,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=F16C
8-
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
9-
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
10-
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512
8+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512F
9+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE
10+
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512,AVX512-FASTLANE
1111

1212
;
1313
; Half to Float
@@ -3156,6 +3156,49 @@ define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
31563156
; F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
31573157
; F16C-NEXT: addq $40, %rsp
31583158
; F16C-NEXT: retq
3159+
;
3160+
; AVX512F-LABEL: cvt_2f64_to_2i16:
3161+
; AVX512F: # %bb.0:
3162+
; AVX512F-NEXT: subq $104, %rsp
3163+
; AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
3164+
; AVX512F-NEXT: callq __truncdfhf2@PLT
3165+
; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
3166+
; AVX512F-NEXT: vmovdqu64 %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill
3167+
; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
3168+
; AVX512F-NEXT: vzeroupper
3169+
; AVX512F-NEXT: callq __truncdfhf2@PLT
3170+
; AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3171+
; AVX512F-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
3172+
; AVX512F-NEXT: # xmm0 = mem[1,0]
3173+
; AVX512F-NEXT: callq __truncdfhf2@PLT
3174+
; AVX512F-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
3175+
; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3176+
; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [16,0,0,0]
3177+
; AVX512F-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload
3178+
; AVX512F-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0
3179+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
3180+
; AVX512F-NEXT: addq $104, %rsp
3181+
; AVX512F-NEXT: vzeroupper
3182+
; AVX512F-NEXT: retq
3183+
;
3184+
; AVX512-FASTLANE-LABEL: cvt_2f64_to_2i16:
3185+
; AVX512-FASTLANE: # %bb.0:
3186+
; AVX512-FASTLANE-NEXT: subq $40, %rsp
3187+
; AVX512-FASTLANE-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
3188+
; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT
3189+
; AVX512-FASTLANE-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
3190+
; AVX512-FASTLANE-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
3191+
; AVX512-FASTLANE-NEXT: # xmm0 = mem[1,0]
3192+
; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT
3193+
; AVX512-FASTLANE-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload
3194+
; AVX512-FASTLANE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3195+
; AVX512-FASTLANE-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
3196+
; AVX512-FASTLANE-NEXT: callq __truncdfhf2@PLT
3197+
; AVX512-FASTLANE-NEXT: vpbroadcastw %xmm0, %xmm1
3198+
; AVX512-FASTLANE-NEXT: vmovaps {{.*#+}} xmm0 = [4,0,0,0]
3199+
; AVX512-FASTLANE-NEXT: vpermi2ps (%rsp), %xmm1, %xmm0 # 16-byte Folded Reload
3200+
; AVX512-FASTLANE-NEXT: addq $40, %rsp
3201+
; AVX512-FASTLANE-NEXT: retq
31593202
%1 = fptrunc <2 x double> %a0 to <2 x half>
31603203
%2 = bitcast <2 x half> %1 to <2 x i16>
31613204
ret <2 x i16> %2

0 commit comments

Comments
 (0)