@@ -4749,6 +4749,127 @@ define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_
4749
4749
ret <32 x i8 > %shuffle
4750
4750
}
4751
4751
4752
+ define <32 x i8 > @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62 (<32 x i8 > %a0 , <32 x i8 > %a1 ) {
4753
+ ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4754
+ ; AVX1: # %bb.0:
4755
+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4756
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4757
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
4758
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
4759
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
4760
+ ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
4761
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
4762
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
4763
+ ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4764
+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4765
+ ; AVX1-NEXT: retq
4766
+ ;
4767
+ ; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4768
+ ; AVX2: # %bb.0:
4769
+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4770
+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4771
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4772
+ ; AVX2-NEXT: retq
4773
+ ;
4774
+ ; AVX512VLBW-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4775
+ ; AVX512VLBW: # %bb.0:
4776
+ ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4777
+ ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4778
+ ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4779
+ ; AVX512VLBW-NEXT: retq
4780
+ ;
4781
+ ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4782
+ ; AVX512VLVBMI: # %bb.0:
4783
+ ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
4784
+ ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
4785
+ ; AVX512VLVBMI-NEXT: retq
4786
+ ;
4787
+ ; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4788
+ ; XOPAVX1: # %bb.0:
4789
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4790
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4791
+ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4792
+ ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
4793
+ ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
4794
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4795
+ ; XOPAVX1-NEXT: retq
4796
+ ;
4797
+ ; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4798
+ ; XOPAVX2: # %bb.0:
4799
+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4800
+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4801
+ ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4802
+ ; XOPAVX2-NEXT: retq
4803
+ %1 = shufflevector <32 x i8 > %a0 , <32 x i8 > %a1 , <32 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 32 , i32 34 , i32 36 , i32 38 , i32 40 , i32 42 , i32 44 , i32 46 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 , i32 48 , i32 50 , i32 52 , i32 54 , i32 56 , i32 58 , i32 60 , i32 62 >
4804
+ ret <32 x i8 > %1
4805
+ }
4806
+
4807
+ define <32 x i8 > @shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62 (<32 x i8 > %a0 , <32 x i8 > %a1 ) {
4808
+ ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4809
+ ; AVX1: # %bb.0:
4810
+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4811
+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
4812
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
4813
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
4814
+ ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
4815
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4816
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
4817
+ ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
4818
+ ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
4819
+ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4820
+ ; AVX1-NEXT: retq
4821
+ ;
4822
+ ; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4823
+ ; AVX2: # %bb.0:
4824
+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4825
+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4826
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4827
+ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4828
+ ; AVX2-NEXT: retq
4829
+ ;
4830
+ ; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4831
+ ; AVX512VLBW-SLOW: # %bb.0:
4832
+ ; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4833
+ ; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4834
+ ; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4835
+ ; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4836
+ ; AVX512VLBW-SLOW-NEXT: retq
4837
+ ;
4838
+ ; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4839
+ ; AVX512VLBW-FAST: # %bb.0:
4840
+ ; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4841
+ ; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4842
+ ; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,5,7]
4843
+ ; AVX512VLBW-FAST-NEXT: vpermi2q %ymm1, %ymm2, %ymm0
4844
+ ; AVX512VLBW-FAST-NEXT: retq
4845
+ ;
4846
+ ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4847
+ ; AVX512VLVBMI: # %bb.0:
4848
+ ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62]
4849
+ ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
4850
+ ; AVX512VLVBMI-NEXT: retq
4851
+ ;
4852
+ ; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4853
+ ; XOPAVX1: # %bb.0:
4854
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4855
+ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4856
+ ; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm1, %xmm1
4857
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4858
+ ; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm0, %xmm0
4859
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4860
+ ; XOPAVX1-NEXT: retq
4861
+ ;
4862
+ ; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4863
+ ; XOPAVX2: # %bb.0:
4864
+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4865
+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4866
+ ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4867
+ ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4868
+ ; XOPAVX2-NEXT: retq
4869
+ %1 = shufflevector <32 x i8 > %a0 , <32 x i8 > %a1 , <32 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 , i32 32 , i32 34 , i32 36 , i32 38 , i32 40 , i32 42 , i32 44 , i32 46 , i32 48 , i32 50 , i32 52 , i32 54 , i32 56 , i32 58 , i32 60 , i32 62 >
4870
+ ret <32 x i8 > %1
4871
+ }
4872
+
4752
4873
define <32 x i8 > @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62 (<16 x i16 > %a0 , <16 x i16 > %a1 ) {
4753
4874
; AVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4754
4875
; AVX1: # %bb.0:
0 commit comments