|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
|
4 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-SLOW |
5 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX-FAST-ALL |
6 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX-FAST-PERLANE |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-SLOW |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST-ALL |
| 6 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX2-FAST-PERLANE |
| 7 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 |
7 | 8 |
|
8 | 9 | ; fold (shl 0, x) -> 0
|
9 | 10 | define <4 x i32> @combine_vec_shl_zero(<4 x i32> %x) {
|
@@ -137,32 +138,40 @@ define <4 x i32> @combine_vec_shl_trunc_and(<4 x i32> %x, <4 x i64> %y) {
|
137 | 138 | ; SSE41-NEXT: pmulld %xmm1, %xmm0
|
138 | 139 | ; SSE41-NEXT: retq
|
139 | 140 | ;
|
140 |
| -; AVX-SLOW-LABEL: combine_vec_shl_trunc_and: |
141 |
| -; AVX-SLOW: # %bb.0: |
142 |
| -; AVX-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2 |
143 |
| -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] |
144 |
| -; AVX-SLOW-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
145 |
| -; AVX-SLOW-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
146 |
| -; AVX-SLOW-NEXT: vzeroupper |
147 |
| -; AVX-SLOW-NEXT: retq |
148 |
| -; |
149 |
| -; AVX-FAST-ALL-LABEL: combine_vec_shl_trunc_and: |
150 |
| -; AVX-FAST-ALL: # %bb.0: |
151 |
| -; AVX-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm2 = [0,2,4,6,0,0,0,0] |
152 |
| -; AVX-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1 |
153 |
| -; AVX-FAST-ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
154 |
| -; AVX-FAST-ALL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
155 |
| -; AVX-FAST-ALL-NEXT: vzeroupper |
156 |
| -; AVX-FAST-ALL-NEXT: retq |
157 |
| -; |
158 |
| -; AVX-FAST-PERLANE-LABEL: combine_vec_shl_trunc_and: |
159 |
| -; AVX-FAST-PERLANE: # %bb.0: |
160 |
| -; AVX-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2 |
161 |
| -; AVX-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] |
162 |
| -; AVX-FAST-PERLANE-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
163 |
| -; AVX-FAST-PERLANE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
164 |
| -; AVX-FAST-PERLANE-NEXT: vzeroupper |
165 |
| -; AVX-FAST-PERLANE-NEXT: retq |
| 141 | +; AVX2-SLOW-LABEL: combine_vec_shl_trunc_and: |
| 142 | +; AVX2-SLOW: # %bb.0: |
| 143 | +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2 |
| 144 | +; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] |
| 145 | +; AVX2-SLOW-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| 146 | +; AVX2-SLOW-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
| 147 | +; AVX2-SLOW-NEXT: vzeroupper |
| 148 | +; AVX2-SLOW-NEXT: retq |
| 149 | +; |
| 150 | +; AVX2-FAST-ALL-LABEL: combine_vec_shl_trunc_and: |
| 151 | +; AVX2-FAST-ALL: # %bb.0: |
| 152 | +; AVX2-FAST-ALL-NEXT: vpmovsxbd {{.*#+}} ymm2 = [0,2,4,6,0,0,0,0] |
| 153 | +; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm2, %ymm1 |
| 154 | +; AVX2-FAST-ALL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| 155 | +; AVX2-FAST-ALL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
| 156 | +; AVX2-FAST-ALL-NEXT: vzeroupper |
| 157 | +; AVX2-FAST-ALL-NEXT: retq |
| 158 | +; |
| 159 | +; AVX2-FAST-PERLANE-LABEL: combine_vec_shl_trunc_and: |
| 160 | +; AVX2-FAST-PERLANE: # %bb.0: |
| 161 | +; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2 |
| 162 | +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] |
| 163 | +; AVX2-FAST-PERLANE-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| 164 | +; AVX2-FAST-PERLANE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
| 165 | +; AVX2-FAST-PERLANE-NEXT: vzeroupper |
| 166 | +; AVX2-FAST-PERLANE-NEXT: retq |
| 167 | +; |
| 168 | +; AVX512-LABEL: combine_vec_shl_trunc_and: |
| 169 | +; AVX512: # %bb.0: |
| 170 | +; AVX512-NEXT: vpmovqd %ymm1, %xmm1 |
| 171 | +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| 172 | +; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 |
| 173 | +; AVX512-NEXT: vzeroupper |
| 174 | +; AVX512-NEXT: retq |
166 | 175 | %1 = and <4 x i64> %y, <i64 15, i64 255, i64 4095, i64 65535>
|
167 | 176 | %2 = trunc <4 x i64> %1 to <4 x i32>
|
168 | 177 | %3 = shl <4 x i32> %x, %2
|
@@ -353,11 +362,17 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
|
353 | 362 | ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
354 | 363 | ; SSE41-NEXT: retq
|
355 | 364 | ;
|
356 |
| -; AVX-LABEL: combine_vec_shl_zext_lshr0: |
357 |
| -; AVX: # %bb.0: |
358 |
| -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
359 |
| -; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
360 |
| -; AVX-NEXT: retq |
| 365 | +; AVX2-LABEL: combine_vec_shl_zext_lshr0: |
| 366 | +; AVX2: # %bb.0: |
| 367 | +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 368 | +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| 369 | +; AVX2-NEXT: retq |
| 370 | +; |
| 371 | +; AVX512-LABEL: combine_vec_shl_zext_lshr0: |
| 372 | +; AVX512: # %bb.0: |
| 373 | +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 374 | +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| 375 | +; AVX512-NEXT: retq |
361 | 376 | %1 = lshr <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
362 | 377 | %2 = zext <8 x i16> %1 to <8 x i32>
|
363 | 378 | %3 = shl <8 x i32> %2, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
@@ -504,12 +519,18 @@ define <4 x i32> @combine_vec_shl_gt_lshr0(<4 x i32> %x) {
|
504 | 519 | ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
505 | 520 | ; SSE-NEXT: retq
|
506 | 521 | ;
|
507 |
| -; AVX-LABEL: combine_vec_shl_gt_lshr0: |
508 |
| -; AVX: # %bb.0: |
509 |
| -; AVX-NEXT: vpslld $2, %xmm0, %xmm0 |
510 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] |
511 |
| -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 |
512 |
| -; AVX-NEXT: retq |
| 522 | +; AVX2-LABEL: combine_vec_shl_gt_lshr0: |
| 523 | +; AVX2: # %bb.0: |
| 524 | +; AVX2-NEXT: vpslld $2, %xmm0, %xmm0 |
| 525 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] |
| 526 | +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| 527 | +; AVX2-NEXT: retq |
| 528 | +; |
| 529 | +; AVX512-LABEL: combine_vec_shl_gt_lshr0: |
| 530 | +; AVX512: # %bb.0: |
| 531 | +; AVX512-NEXT: vpslld $2, %xmm0, %xmm0 |
| 532 | +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 533 | +; AVX512-NEXT: retq |
513 | 534 | %1 = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
|
514 | 535 | %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
|
515 | 536 | ret <4 x i32> %2
|
@@ -540,12 +561,18 @@ define <4 x i32> @combine_vec_shl_le_lshr0(<4 x i32> %x) {
|
540 | 561 | ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
541 | 562 | ; SSE-NEXT: retq
|
542 | 563 | ;
|
543 |
| -; AVX-LABEL: combine_vec_shl_le_lshr0: |
544 |
| -; AVX: # %bb.0: |
545 |
| -; AVX-NEXT: vpsrld $2, %xmm0, %xmm0 |
546 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816] |
547 |
| -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 |
548 |
| -; AVX-NEXT: retq |
| 564 | +; AVX2-LABEL: combine_vec_shl_le_lshr0: |
| 565 | +; AVX2: # %bb.0: |
| 566 | +; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0 |
| 567 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1073741816,1073741816,1073741816,1073741816] |
| 568 | +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| 569 | +; AVX2-NEXT: retq |
| 570 | +; |
| 571 | +; AVX512-LABEL: combine_vec_shl_le_lshr0: |
| 572 | +; AVX512: # %bb.0: |
| 573 | +; AVX512-NEXT: vpsrld $2, %xmm0, %xmm0 |
| 574 | +; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 575 | +; AVX512-NEXT: retq |
549 | 576 | %1 = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
|
550 | 577 | %2 = shl <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
|
551 | 578 | ret <4 x i32> %2
|
@@ -587,11 +614,16 @@ define <4 x i32> @combine_vec_shl_ashr0(<4 x i32> %x) {
|
587 | 614 | ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
588 | 615 | ; SSE-NEXT: retq
|
589 | 616 | ;
|
590 |
| -; AVX-LABEL: combine_vec_shl_ashr0: |
591 |
| -; AVX: # %bb.0: |
592 |
| -; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] |
593 |
| -; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 |
594 |
| -; AVX-NEXT: retq |
| 617 | +; AVX2-LABEL: combine_vec_shl_ashr0: |
| 618 | +; AVX2: # %bb.0: |
| 619 | +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [4294967264,4294967264,4294967264,4294967264] |
| 620 | +; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 |
| 621 | +; AVX2-NEXT: retq |
| 622 | +; |
| 623 | +; AVX512-LABEL: combine_vec_shl_ashr0: |
| 624 | +; AVX512: # %bb.0: |
| 625 | +; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 626 | +; AVX512-NEXT: retq |
595 | 627 | %1 = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
|
596 | 628 | %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5>
|
597 | 629 | ret <4 x i32> %2
|
@@ -620,12 +652,18 @@ define <4 x i32> @combine_vec_shl_add0(<4 x i32> %x) {
|
620 | 652 | ; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
621 | 653 | ; SSE-NEXT: retq
|
622 | 654 | ;
|
623 |
| -; AVX-LABEL: combine_vec_shl_add0: |
624 |
| -; AVX: # %bb.0: |
625 |
| -; AVX-NEXT: vpslld $2, %xmm0, %xmm0 |
626 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
627 |
| -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
628 |
| -; AVX-NEXT: retq |
| 655 | +; AVX2-LABEL: combine_vec_shl_add0: |
| 656 | +; AVX2: # %bb.0: |
| 657 | +; AVX2-NEXT: vpslld $2, %xmm0, %xmm0 |
| 658 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
| 659 | +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 660 | +; AVX2-NEXT: retq |
| 661 | +; |
| 662 | +; AVX512-LABEL: combine_vec_shl_add0: |
| 663 | +; AVX512: # %bb.0: |
| 664 | +; AVX512-NEXT: vpslld $2, %xmm0, %xmm0 |
| 665 | +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 666 | +; AVX512-NEXT: retq |
629 | 667 | %1 = add <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
|
630 | 668 | %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
|
631 | 669 | ret <4 x i32> %2
|
@@ -667,12 +705,18 @@ define <4 x i32> @combine_vec_shl_or0(<4 x i32> %x) {
|
667 | 705 | ; SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
668 | 706 | ; SSE-NEXT: retq
|
669 | 707 | ;
|
670 |
| -; AVX-LABEL: combine_vec_shl_or0: |
671 |
| -; AVX: # %bb.0: |
672 |
| -; AVX-NEXT: vpslld $2, %xmm0, %xmm0 |
673 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
674 |
| -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 |
675 |
| -; AVX-NEXT: retq |
| 708 | +; AVX2-LABEL: combine_vec_shl_or0: |
| 709 | +; AVX2: # %bb.0: |
| 710 | +; AVX2-NEXT: vpslld $2, %xmm0, %xmm0 |
| 711 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
| 712 | +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| 713 | +; AVX2-NEXT: retq |
| 714 | +; |
| 715 | +; AVX512-LABEL: combine_vec_shl_or0: |
| 716 | +; AVX512: # %bb.0: |
| 717 | +; AVX512-NEXT: vpslld $2, %xmm0, %xmm0 |
| 718 | +; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 719 | +; AVX512-NEXT: retq |
676 | 720 | %1 = or <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
|
677 | 721 | %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
|
678 | 722 | ret <4 x i32> %2
|
@@ -724,11 +768,16 @@ define <4 x i32> @combine_vec_shl_mul0(<4 x i32> %x) {
|
724 | 768 | ; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
725 | 769 | ; SSE41-NEXT: retq
|
726 | 770 | ;
|
727 |
| -; AVX-LABEL: combine_vec_shl_mul0: |
728 |
| -; AVX: # %bb.0: |
729 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
730 |
| -; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 |
731 |
| -; AVX-NEXT: retq |
| 771 | +; AVX2-LABEL: combine_vec_shl_mul0: |
| 772 | +; AVX2: # %bb.0: |
| 773 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] |
| 774 | +; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 |
| 775 | +; AVX2-NEXT: retq |
| 776 | +; |
| 777 | +; AVX512-LABEL: combine_vec_shl_mul0: |
| 778 | +; AVX512: # %bb.0: |
| 779 | +; AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 780 | +; AVX512-NEXT: retq |
732 | 781 | %1 = mul <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
|
733 | 782 | %2 = shl <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
|
734 | 783 | ret <4 x i32> %2
|
@@ -778,12 +827,18 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
|
778 | 827 | ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
779 | 828 | ; SSE41-NEXT: retq
|
780 | 829 | ;
|
781 |
| -; AVX-LABEL: combine_vec_add_shl_nonsplat: |
782 |
| -; AVX: # %bb.0: |
783 |
| -; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
784 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] |
785 |
| -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 |
786 |
| -; AVX-NEXT: retq |
| 830 | +; AVX2-LABEL: combine_vec_add_shl_nonsplat: |
| 831 | +; AVX2: # %bb.0: |
| 832 | +; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 833 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] |
| 834 | +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| 835 | +; AVX2-NEXT: retq |
| 836 | +; |
| 837 | +; AVX512-LABEL: combine_vec_add_shl_nonsplat: |
| 838 | +; AVX512: # %bb.0: |
| 839 | +; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 840 | +; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 841 | +; AVX512-NEXT: retq |
787 | 842 | %1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 4, i32 5>
|
788 | 843 | %2 = add <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
|
789 | 844 | ret <4 x i32> %2
|
@@ -812,14 +867,22 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
|
812 | 867 | ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
813 | 868 | ; SSE41-NEXT: retq
|
814 | 869 | ;
|
815 |
| -; AVX-LABEL: combine_vec_add_shl_and_nonsplat: |
816 |
| -; AVX: # %bb.0: |
817 |
| -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
818 |
| -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] |
819 |
| -; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
820 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] |
821 |
| -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 |
822 |
| -; AVX-NEXT: retq |
| 870 | +; AVX2-LABEL: combine_vec_add_shl_and_nonsplat: |
| 871 | +; AVX2: # %bb.0: |
| 872 | +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| 873 | +; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] |
| 874 | +; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 875 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] |
| 876 | +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| 877 | +; AVX2-NEXT: retq |
| 878 | +; |
| 879 | +; AVX512-LABEL: combine_vec_add_shl_and_nonsplat: |
| 880 | +; AVX512: # %bb.0: |
| 881 | +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| 882 | +; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] |
| 883 | +; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 884 | +; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 885 | +; AVX512-NEXT: retq |
823 | 886 | %1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
|
824 | 887 | %2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
|
825 | 888 | %3 = add <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
|
@@ -847,13 +910,20 @@ define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) {
|
847 | 910 | ; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
848 | 911 | ; SSE41-NEXT: retq
|
849 | 912 | ;
|
850 |
| -; AVX-LABEL: combine_vec_add_shuffle_shl: |
851 |
| -; AVX: # %bb.0: |
852 |
| -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] |
853 |
| -; AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
854 |
| -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] |
855 |
| -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 |
856 |
| -; AVX-NEXT: retq |
| 913 | +; AVX2-LABEL: combine_vec_add_shuffle_shl: |
| 914 | +; AVX2: # %bb.0: |
| 915 | +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] |
| 916 | +; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 917 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] |
| 918 | +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| 919 | +; AVX2-NEXT: retq |
| 920 | +; |
| 921 | +; AVX512-LABEL: combine_vec_add_shuffle_shl: |
| 922 | +; AVX512: # %bb.0: |
| 923 | +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] |
| 924 | +; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 925 | +; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| 926 | +; AVX512-NEXT: retq |
857 | 927 | %1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 0, i32 1>
|
858 | 928 | %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
|
859 | 929 | %3 = add <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
|
|
0 commit comments