Skip to content

Commit b712590

Browse files
committed
[X86] Add test coverage for #140234
1 parent 9f77c26 commit b712590

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,67 @@ define <4 x double> @shuffle_v4f64_2345_0567_select(<4 x double> %vec1, <4 x dou
857857
ret <4 x double> %res
858858
}
859859

860+
; PR140234
861+
define <4 x double> @shuffle_v4f64_1436_split_load(ptr %px, ptr %py) {
862+
; AVX1-LABEL: shuffle_v4f64_1436_split_load:
863+
; AVX1: # %bb.0:
864+
; AVX1-NEXT: vmovapd (%rsi), %xmm0
865+
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
866+
; AVX1-NEXT: vmovupd (%rdi), %ymm1
867+
; AVX1-NEXT: vinsertf128 $1, 16(%rsi), %ymm0, %ymm0
868+
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[2]
869+
; AVX1-NEXT: retq
870+
;
871+
; AVX2-LABEL: shuffle_v4f64_1436_split_load:
872+
; AVX2: # %bb.0:
873+
; AVX2-NEXT: vmovapd (%rsi), %xmm0
874+
; AVX2-NEXT: vmovupd (%rdi), %ymm1
875+
; AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[0],ymm1[3],ymm0[3]
876+
; AVX2-NEXT: vbroadcastsd 16(%rsi), %ymm1
877+
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
878+
; AVX2-NEXT: retq
879+
;
880+
; AVX512VL-SLOW-LABEL: shuffle_v4f64_1436_split_load:
881+
; AVX512VL-SLOW: # %bb.0:
882+
; AVX512VL-SLOW-NEXT: vmovapd (%rsi), %xmm0
883+
; AVX512VL-SLOW-NEXT: vmovupd (%rdi), %ymm1
884+
; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[0],ymm1[3],ymm0[3]
885+
; AVX512VL-SLOW-NEXT: vbroadcastsd 16(%rsi), %ymm1
886+
; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
887+
; AVX512VL-SLOW-NEXT: retq
888+
;
889+
; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_1436_split_load:
890+
; AVX512VL-FAST-ALL: # %bb.0:
891+
; AVX512VL-FAST-ALL-NEXT: vmovapd (%rsi), %xmm0
892+
; AVX512VL-FAST-ALL-NEXT: vmovapd 16(%rsi), %xmm1
893+
; AVX512VL-FAST-ALL-NEXT: vmovupd (%rdi), %ymm2
894+
; AVX512VL-FAST-ALL-NEXT: vshufpd {{.*#+}} ymm2 = ymm2[1],ymm0[0],ymm2[3],ymm0[3]
895+
; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [0,1,2,4]
896+
; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm1, %ymm2, %ymm0
897+
; AVX512VL-FAST-ALL-NEXT: retq
898+
;
899+
; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_1436_split_load:
900+
; AVX512VL-FAST-PERLANE: # %bb.0:
901+
; AVX512VL-FAST-PERLANE-NEXT: vmovapd (%rsi), %xmm0
902+
; AVX512VL-FAST-PERLANE-NEXT: vmovupd (%rdi), %ymm1
903+
; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[0],ymm1[3],ymm0[3]
904+
; AVX512VL-FAST-PERLANE-NEXT: vbroadcastsd 16(%rsi), %ymm1
905+
; AVX512VL-FAST-PERLANE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
906+
; AVX512VL-FAST-PERLANE-NEXT: retq
907+
%pxhi = getelementptr inbounds nuw i8, ptr %px, i64 16
908+
%pyhi = getelementptr inbounds nuw i8, ptr %py, i64 16
909+
%x0 = load <2 x double>, ptr %px, align 16
910+
%y0 = load <2 x double>, ptr %py, align 16
911+
%x1 = load <2 x double>, ptr %pxhi, align 16
912+
%y1 = load <2 x double>, ptr %pyhi, align 16
913+
%shuf0 = shufflevector <2 x double> %x0, <2 x double> %y0, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
914+
%shuf1 = shufflevector <2 x double> %x1, <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
915+
%shuf2 = shufflevector <4 x double> %shuf0, <4 x double> %shuf1, <4 x i32> <i32 0, i32 1, i32 5, i32 poison>
916+
%shuf3 = shufflevector <2 x double> %y1, <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
917+
%shuf4 = shufflevector <4 x double> %shuf2, <4 x double> %shuf3, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
918+
ret <4 x double> %shuf4
919+
}
920+
860921
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
861922
; AVX1-LABEL: shuffle_v4i64_0000:
862923
; AVX1: # %bb.0:

0 commit comments

Comments
 (0)