Skip to content

Commit 34845ac

Browse files
authored
[LoongArch] Try to widen shuffle mask (#136081)
1 parent ab680c5 commit 34845ac

File tree

3 files changed

+66
-48
lines changed

3 files changed

+66
-48
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "llvm/Support/ErrorHandling.h"
3232
#include "llvm/Support/KnownBits.h"
3333
#include "llvm/Support/MathExtras.h"
34+
#include <llvm/Analysis/VectorUtils.h>
3435

3536
using namespace llvm;
3637

@@ -543,6 +544,37 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
543544
}
544545
}
545546

547+
// Widen element type to get a new mask value (if possible).
548+
// For example:
549+
// shufflevector <4 x i32> %a, <4 x i32> %b,
550+
// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
551+
// is equivalent to:
552+
// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
553+
// can be lowered to:
554+
// VPACKOD_D vr0, vr0, vr1
555+
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
556+
SDValue V1, SDValue V2, SelectionDAG &DAG) {
557+
unsigned EltBits = VT.getScalarSizeInBits();
558+
559+
if (EltBits > 32 || EltBits == 1)
560+
return SDValue();
561+
562+
SmallVector<int, 8> NewMask;
563+
if (widenShuffleMaskElts(Mask, NewMask)) {
564+
MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
565+
: MVT::getIntegerVT(EltBits * 2);
566+
MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
567+
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
568+
SDValue NewV1 = DAG.getBitcast(NewVT, V1);
569+
SDValue NewV2 = DAG.getBitcast(NewVT, V2);
570+
return DAG.getBitcast(
571+
VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
572+
}
573+
}
574+
575+
return SDValue();
576+
}
577+
546578
/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
547579
/// instruction.
548580
// The funciton matches elements from one of the input vector shuffled to the
@@ -1365,6 +1397,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
13651397
return Result;
13661398
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
13671399
return Result;
1400+
if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1401+
return NewShuffle;
13681402
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
13691403
return Result;
13701404
return SDValue();
@@ -1803,6 +1837,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
18031837
return Result;
18041838
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
18051839
return Result;
1840+
if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
1841+
return NewShuffle;
18061842
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
18071843
return Result;
18081844

llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v16i16(<32 x i8> %a, <32 x i8> %b)
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
88
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
9-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
9+
; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
10+
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
1011
; CHECK-NEXT: ret
1112
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 32, i32 33, i32 34, i32 35, i32 24, i32 25, i32 26, i32 27, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 30, i32 31>
1213
ret <32 x i8> %r
@@ -17,7 +18,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v8i32(<32 x i8> %a, <32 x i8> %b)
1718
; CHECK: # %bb.0:
1819
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
1920
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0)
20-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
21+
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
22+
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
2123
; CHECK-NEXT: ret
2224
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 32, i32 33, i32 34, i32 35, i32 24, i32 25, i32 26, i32 27, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 62, i32 63>
2325
ret <32 x i8> %r
@@ -28,7 +30,8 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_v4i64(<32 x i8> %a, <32 x i8> %b)
2830
; CHECK: # %bb.0:
2931
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
3032
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
31-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
33+
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
34+
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
3235
; CHECK-NEXT: ret
3336
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
3437
ret <32 x i8> %r
@@ -39,7 +42,7 @@ define <16 x i16> @widen_shuffle_mask_v16i16_to_v8i32(<16 x i16> %a, <16 x i16>
3942
; CHECK: # %bb.0:
4043
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
4144
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0)
42-
; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
45+
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
4346
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
4447
; CHECK-NEXT: ret
4548
%r = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 16, i32 17, i32 2, i32 3, i32 10, i32 11, i32 12, i32 13, i32 24, i32 25, i32 26, i32 27>
@@ -51,7 +54,7 @@ define <16 x i16> @widen_shuffle_mask_v16i16_to_v4i64(<16 x i16> %a, <16 x i16>
5154
; CHECK: # %bb.0:
5255
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
5356
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
54-
; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
57+
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
5558
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
5659
; CHECK-NEXT: ret
5760
%r = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
@@ -63,7 +66,7 @@ define <8 x i32> @widen_shuffle_mask_v8i32_to_v4i64(<8 x i32> %a, <8 x i32> %b)
6366
; CHECK: # %bb.0:
6467
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
6568
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0)
66-
; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
69+
; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
6770
; CHECK-NEXT: xvori.b $xr0, $xr2, 0
6871
; CHECK-NEXT: ret
6972
%r = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 14, i32 15, i32 6, i32 7>
@@ -73,9 +76,7 @@ define <8 x i32> @widen_shuffle_mask_v8i32_to_v4i64(<8 x i32> %a, <8 x i32> %b)
7376
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackev_h(<32 x i8> %a, <32 x i8> %b) {
7477
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpackev_h:
7578
; CHECK: # %bb.0:
76-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
77-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
78-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
79+
; CHECK-NEXT: xvpackev.h $xr0, $xr1, $xr0
7980
; CHECK-NEXT: ret
8081
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 32, i32 33, i32 4, i32 5, i32 36, i32 37, i32 8, i32 9, i32 40, i32 41, i32 12, i32 13, i32 44, i32 45, i32 16, i32 17, i32 48, i32 49, i32 20, i32 21, i32 52, i32 53, i32 24, i32 25, i32 56, i32 57, i32 28, i32 29, i32 60, i32 61>
8182
ret <32 x i8> %r
@@ -84,9 +85,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackev_h(<32 x i8> %a, <32 x i8>
8485
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackod_h(<32 x i8> %a, <32 x i8> %b) {
8586
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpackod_h:
8687
; CHECK: # %bb.0:
87-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
88-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI7_0)
89-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
88+
; CHECK-NEXT: xvpackod.h $xr0, $xr1, $xr0
9089
; CHECK-NEXT: ret
9190
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 2, i32 3, i32 34, i32 35, i32 6, i32 7, i32 38, i32 39, i32 10, i32 11, i32 42, i32 43, i32 14, i32 15, i32 46, i32 47, i32 18, i32 19, i32 50, i32 51, i32 22, i32 23, i32 54, i32 55, i32 26, i32 27, i32 58, i32 59, i32 30, i32 31, i32 62, i32 63>
9291
ret <32 x i8> %r
@@ -95,9 +94,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpackod_h(<32 x i8> %a, <32 x i8>
9594
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickev_h(<32 x i8> %a, <32 x i8> %b) {
9695
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpickev_h:
9796
; CHECK: # %bb.0:
98-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
99-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0)
100-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
97+
; CHECK-NEXT: xvpickev.h $xr0, $xr1, $xr0
10198
; CHECK-NEXT: ret
10299
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61>
103100
ret <32 x i8> %r
@@ -106,9 +103,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickev_h(<32 x i8> %a, <32 x i8>
106103
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickod_h(<32 x i8> %a, <32 x i8> %b) {
107104
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvpickod_h:
108105
; CHECK: # %bb.0:
109-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
110-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI9_0)
111-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
106+
; CHECK-NEXT: xvpickod.h $xr0, $xr1, $xr0
112107
; CHECK-NEXT: ret
113108
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 2, i32 3, i32 6, i32 7, i32 10, i32 11, i32 14, i32 15, i32 34, i32 35, i32 38, i32 39, i32 42, i32 43, i32 46, i32 47, i32 18, i32 19, i32 22, i32 23, i32 26, i32 27, i32 30, i32 31, i32 50, i32 51, i32 54, i32 55, i32 58, i32 59, i32 62, i32 63>
114109
ret <32 x i8> %r
@@ -117,9 +112,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvpickod_h(<32 x i8> %a, <32 x i8>
117112
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvl_h(<32 x i8> %a, <32 x i8> %b) {
118113
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvilvl_h:
119114
; CHECK: # %bb.0:
120-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
121-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
122-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
115+
; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0
123116
; CHECK-NEXT: ret
124117
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 32, i32 33, i32 2, i32 3, i32 34, i32 35, i32 4, i32 5, i32 36, i32 37, i32 6, i32 7, i32 38, i32 39, i32 16, i32 17, i32 48, i32 49, i32 18, i32 19, i32 50, i32 51, i32 20, i32 21, i32 52, i32 53, i32 22, i32 23, i32 54, i32 55>
125118
ret <32 x i8> %r
@@ -128,9 +121,7 @@ define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvl_h(<32 x i8> %a, <32 x i8> %
128121
define <32 x i8> @widen_shuffle_mask_v32i8_to_xvilvh_h(<32 x i8> %a, <32 x i8> %b) {
129122
; CHECK-LABEL: widen_shuffle_mask_v32i8_to_xvilvh_h:
130123
; CHECK: # %bb.0:
131-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
132-
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI11_0)
133-
; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
124+
; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0
134125
; CHECK-NEXT: ret
135126
%r = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 9, i32 40, i32 41, i32 10, i32 11, i32 42, i32 43, i32 12, i32 13, i32 44, i32 45, i32 14, i32 15, i32 46, i32 47, i32 24, i32 25, i32 56, i32 57, i32 26, i32 27, i32 58, i32 59, i32 28, i32 29, i32 60, i32 61, i32 30, i32 31, i32 62, i32 63>
136127
ret <32 x i8> %r

llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v8i16(<16 x i8> %a, <16 x i8> %b)
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
88
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI0_0)
9-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
9+
; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
10+
; CHECK-NEXT: vori.b $vr0, $vr2, 0
1011
; CHECK-NEXT: ret
1112
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 24, i32 25, i32 26, i32 27, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 2, i32 3>
1213
ret <16 x i8> %r
@@ -17,7 +18,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b)
1718
; CHECK: # %bb.0:
1819
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
1920
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI1_0)
20-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
21+
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
22+
; CHECK-NEXT: vori.b $vr0, $vr2, 0
2123
; CHECK-NEXT: ret
2224
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
2325
ret <16 x i8> %r
@@ -28,7 +30,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b)
2830
; CHECK: # %bb.0:
2931
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
3032
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0)
31-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
33+
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
34+
; CHECK-NEXT: vori.b $vr0, $vr2, 0
3235
; CHECK-NEXT: ret
3336
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
3437
ret <16 x i8> %r
@@ -39,7 +42,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b)
3942
; CHECK: # %bb.0:
4043
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
4144
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
42-
; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
45+
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
4346
; CHECK-NEXT: vori.b $vr0, $vr2, 0
4447
; CHECK-NEXT: ret
4548
%r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 12, i32 13, i32 14, i32 15, i32 2, i32 3>
@@ -51,7 +54,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b)
5154
; CHECK: # %bb.0:
5255
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
5356
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
54-
; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
57+
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
5558
; CHECK-NEXT: vori.b $vr0, $vr2, 0
5659
; CHECK-NEXT: ret
5760
%r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
@@ -63,7 +66,7 @@ define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b)
6366
; CHECK: # %bb.0:
6467
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
6568
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
66-
; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
69+
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
6770
; CHECK-NEXT: vori.b $vr0, $vr2, 0
6871
; CHECK-NEXT: ret
6972
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -73,9 +76,7 @@ define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b)
7376
define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackev_h(<16 x i8> %a, <16 x i8> %b) {
7477
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpackev_h:
7578
; CHECK: # %bb.0:
76-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
77-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0)
78-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
79+
; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0
7980
; CHECK-NEXT: ret
8081
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 20, i32 21, i32 8, i32 9, i32 24, i32 25, i32 12, i32 13, i32 28, i32 29>
8182
ret <16 x i8> %r
@@ -84,9 +85,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackev_h(<16 x i8> %a, <16 x i8>
8485
define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackod_h(<16 x i8> %a, <16 x i8> %b) {
8586
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpackod_h:
8687
; CHECK: # %bb.0:
87-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
88-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_0)
89-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
88+
; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0
9089
; CHECK-NEXT: ret
9190
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 18, i32 19, i32 6, i32 7, i32 22, i32 23, i32 10, i32 11, i32 26, i32 27, i32 14, i32 15, i32 30, i32 31>
9291
ret <16 x i8> %r
@@ -95,9 +94,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpackod_h(<16 x i8> %a, <16 x i8>
9594
define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickev_h(<16 x i8> %a, <16 x i8> %b) {
9695
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpickev_h:
9796
; CHECK: # %bb.0:
98-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
99-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI8_0)
100-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
97+
; CHECK-NEXT: vpickev.h $vr0, $vr1, $vr0
10198
; CHECK-NEXT: ret
10299
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29>
103100
ret <16 x i8> %r
@@ -106,9 +103,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickev_h(<16 x i8> %a, <16 x i8>
106103
define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickod_h(<16 x i8> %a, <16 x i8> %b) {
107104
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vpickod_h:
108105
; CHECK: # %bb.0:
109-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
110-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI9_0)
111-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
106+
; CHECK-NEXT: vpickod.h $vr0, $vr1, $vr0
112107
; CHECK-NEXT: ret
113108
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 2, i32 3, i32 6, i32 7, i32 10, i32 11, i32 14, i32 15, i32 18, i32 19, i32 22, i32 23, i32 26, i32 27, i32 30, i32 31>
114109
ret <16 x i8> %r
@@ -117,9 +112,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vpickod_h(<16 x i8> %a, <16 x i8>
117112
define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvl_h(<16 x i8> %a, <16 x i8> %b) {
118113
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vilvl_h:
119114
; CHECK: # %bb.0:
120-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
121-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI10_0)
122-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
115+
; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0
123116
; CHECK-NEXT: ret
124117
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23>
125118
ret <16 x i8> %r
@@ -128,9 +121,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvl_h(<16 x i8> %a, <16 x i8> %b
128121
define <16 x i8> @widen_shuffle_mask_v16i8_to_vilvh_h(<16 x i8> %a, <16 x i8> %b) {
129122
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_vilvh_h:
130123
; CHECK: # %bb.0:
131-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
132-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI11_0)
133-
; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
124+
; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0
134125
; CHECK-NEXT: ret
135126
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
136127
ret <16 x i8> %r

0 commit comments

Comments
 (0)