Skip to content

Commit 13a78fd

Browse files
committed
[AArch64][GlobalISel] Re-commit Legalize G_SHUFFLE_VECTOR for Odd-Sized Vectors (#83038)
Legalize smaller/larger than legal vectors with i8 and i16 element sizes. Vectors with elements smaller than i8 will get widened to i8 elements.
1 parent f407f2d commit 13a78fd

File tree

6 files changed

+268
-121
lines changed

6 files changed

+268
-121
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,18 @@ class LegalizeRuleSet {
908908
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
909909
}
910910

911+
/// Widen the scalar or vector element type to the next power of two that is
912+
/// at least MinSize. No effect if the scalar size is a power of two.
913+
LegalizeRuleSet &widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx,
914+
unsigned MinSize = 0) {
915+
using namespace LegalityPredicates;
916+
return actionIf(
917+
LegalizeAction::WidenScalar,
918+
any(scalarOrEltNarrowerThan(TypeIdx, MinSize),
919+
scalarOrEltSizeNotPow2(typeIdx(TypeIdx))),
920+
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
921+
}
922+
911923
LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
912924
using namespace LegalityPredicates;
913925
return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)),

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2495,6 +2495,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
24952495
case TargetOpcode::G_OR:
24962496
case TargetOpcode::G_XOR:
24972497
case TargetOpcode::G_SUB:
2498+
case TargetOpcode::G_SHUFFLE_VECTOR:
24982499
// Perform operation at larger width (any extension is fines here, high bits
24992500
// don't affect the result) and then truncate the result back to the
25002501
// original type.

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
956956
},
957957
changeTo(1, 0))
958958
.moreElementsToNextPow2(0)
959+
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
960+
.clampNumElements(0, v8s8, v16s8)
961+
.clampNumElements(0, v4s16, v8s16)
959962
.clampNumElements(0, v4s32, v4s32)
960963
.clampNumElements(0, v2s64, v2s64)
961964
.moreElementsIf(

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,8 @@ bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
780780
auto &Unmerge = cast<GUnmerge>(MI);
781781
Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
782782
const LLT SrcTy = MRI.getType(Src1Reg);
783+
if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
784+
return false;
783785
return SrcTy.isVector() && !SrcTy.isScalable() &&
784786
Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
785787
}

llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -287,39 +287,47 @@ body: |
287287
; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0
288288
; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1
289289
; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2
290-
; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
291-
; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
290+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
291+
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
292292
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
293-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
294-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
295-
; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]]
296-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %cmp(s1)
297-
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXT]], 1
298-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[SEXT_INREG]](s32)
299-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
300-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
301-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C2]](s64)
302-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0)
303-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
304-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[C3]](s8)
305-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1)
306-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
307-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>)
308-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]]
309-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>)
310-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>)
311-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
312-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]]
313-
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>)
314-
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>)
315-
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>)
316-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]]
317-
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>)
318-
; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>)
319-
; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>)
320-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]]
321-
; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>)
322-
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>)
293+
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]]
294+
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1
295+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
296+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
297+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
298+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
299+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
300+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16)
301+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
302+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
303+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64)
304+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
305+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
306+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
307+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
308+
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
309+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
310+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
311+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
312+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
313+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
314+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
315+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
316+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
317+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
318+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16)
319+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
320+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]]
321+
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
322+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
323+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]]
324+
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
325+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]]
326+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
327+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
328+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
329+
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
330+
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]]
323331
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
324332
; CHECK-NEXT: RET_ReallyLR implicit $q0
325333
%w0:_(s32) = COPY $w0

0 commit comments

Comments
 (0)