Skip to content

Commit 31e2ec9

Browse files
committed
[EarlyCSE] Compare GEP instructions based on offset
This will provide more opportunities for constant propagation for subsequent optimizations.
1 parent ac1daad commit 31e2ec9

File tree

3 files changed

+134
-118
lines changed

3 files changed

+134
-118
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 133 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ struct SimpleValue {
143143
!CI->getFunction()->isPresplitCoroutine();
144144
}
145145
return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146-
isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
147-
isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
148-
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
149-
isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
150-
isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
146+
isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
147+
isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
148+
isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
149+
isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
150+
isa<FreezeInst>(Inst);
151151
}
152152
};
153153

@@ -307,10 +307,9 @@ static unsigned getHashValueImpl(SimpleValue Val) {
307307
IVI->getOperand(1),
308308
hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
309309

310-
assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
311-
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
312-
isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
313-
isa<FreezeInst>(Inst)) &&
310+
assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
311+
isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
312+
isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
314313
"Invalid/unknown instruction");
315314

316315
// Handle intrinsics with commutative operands.
@@ -553,6 +552,77 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
553552
return LHSI->isIdenticalTo(RHSI);
554553
}
555554

555+
//===----------------------------------------------------------------------===//
556+
// GEPValue
557+
//===----------------------------------------------------------------------===//
558+
559+
namespace {
560+
561+
struct GEPValue {
562+
Instruction *Inst;
563+
APInt ConstantOffset;
564+
bool HasConstantOffset;
565+
566+
GEPValue(Instruction *I) : Inst(I), HasConstantOffset(false) {
567+
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
568+
}
569+
GEPValue(Instruction *I, APInt ConstantOffset, bool HasConstantOffset)
570+
: Inst(I), ConstantOffset(ConstantOffset),
571+
HasConstantOffset(HasConstantOffset) {
572+
assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
573+
}
574+
575+
bool isSentinel() const {
576+
return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
577+
Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
578+
}
579+
580+
static bool canHandle(Instruction *Inst) {
581+
return isa<GetElementPtrInst>(Inst);
582+
}
583+
};
584+
585+
} // namespace
586+
587+
namespace llvm {
588+
589+
template <> struct DenseMapInfo<GEPValue> {
590+
static inline GEPValue getEmptyKey() {
591+
return DenseMapInfo<Instruction *>::getEmptyKey();
592+
}
593+
594+
static inline GEPValue getTombstoneKey() {
595+
return DenseMapInfo<Instruction *>::getTombstoneKey();
596+
}
597+
598+
static unsigned getHashValue(GEPValue Val);
599+
static bool isEqual(GEPValue LHS, GEPValue RHS);
600+
};
601+
602+
} // end namespace llvm
603+
604+
unsigned DenseMapInfo<GEPValue>::getHashValue(GEPValue Val) {
605+
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val.Inst);
606+
if (Val.HasConstantOffset)
607+
return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
608+
Val.ConstantOffset);
609+
return hash_combine(
610+
GEP->getOpcode(),
611+
hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
612+
}
613+
614+
bool DenseMapInfo<GEPValue>::isEqual(GEPValue LHS, GEPValue RHS) {
615+
if (LHS.isSentinel() || RHS.isSentinel())
616+
return LHS.Inst == RHS.Inst;
617+
GetElementPtrInst *LGEP = cast<GetElementPtrInst>(LHS.Inst);
618+
GetElementPtrInst *RGEP = cast<GetElementPtrInst>(RHS.Inst);
619+
if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
620+
return false;
621+
if (LHS.HasConstantOffset && RHS.HasConstantOffset)
622+
return LHS.ConstantOffset == RHS.ConstantOffset;
623+
return LGEP->isIdenticalToWhenDefined(RGEP);
624+
}
625+
556626
//===----------------------------------------------------------------------===//
557627
// EarlyCSE implementation
558628
//===----------------------------------------------------------------------===//
@@ -647,6 +717,13 @@ class EarlyCSE {
647717
ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
648718
CallHTType AvailableCalls;
649719

720+
using GEPMapAllocatorTy =
721+
RecyclingAllocator<BumpPtrAllocator,
722+
ScopedHashTableVal<GEPValue, Value *>>;
723+
using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
724+
GEPMapAllocatorTy>;
725+
GEPHTType AvailableGEPs;
726+
650727
/// This is the current generation of the memory value.
651728
unsigned CurrentGeneration = 0;
652729

@@ -667,9 +744,11 @@ class EarlyCSE {
667744
class NodeScope {
668745
public:
669746
NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
670-
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
671-
: Scope(AvailableValues), LoadScope(AvailableLoads),
672-
InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
747+
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
748+
GEPHTType &AvailableGEPs)
749+
: Scope(AvailableValues), LoadScope(AvailableLoads),
750+
InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
751+
GEPScope(AvailableGEPs) {}
673752
NodeScope(const NodeScope &) = delete;
674753
NodeScope &operator=(const NodeScope &) = delete;
675754

@@ -678,6 +757,7 @@ class EarlyCSE {
678757
LoadHTType::ScopeTy LoadScope;
679758
InvariantHTType::ScopeTy InvariantScope;
680759
CallHTType::ScopeTy CallScope;
760+
GEPHTType::ScopeTy GEPScope;
681761
};
682762

683763
// Contains all the needed information to create a stack for doing a depth
@@ -688,13 +768,13 @@ class EarlyCSE {
688768
public:
689769
StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
690770
InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
691-
unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
771+
GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
772+
DomTreeNode::const_iterator child,
692773
DomTreeNode::const_iterator end)
693774
: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
694775
EndIter(end),
695776
Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
696-
AvailableCalls)
697-
{}
777+
AvailableCalls, AvailableGEPs) {}
698778
StackNode(const StackNode &) = delete;
699779
StackNode &operator=(const StackNode &) = delete;
700780

@@ -1561,6 +1641,39 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
15611641
continue;
15621642
}
15631643

1644+
if (GEPValue::canHandle(&Inst)) {
1645+
GetElementPtrInst *GEP = cast<GetElementPtrInst>(&Inst);
1646+
APInt Offset(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1647+
bool HasConstantOffset = GEP->accumulateConstantOffset(SQ.DL, Offset);
1648+
GEPValue GEPVal(GEP, Offset, HasConstantOffset);
1649+
if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1650+
LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1651+
<< '\n');
1652+
if (auto *I = dyn_cast<Instruction>(V)) {
1653+
// If I being poison triggers UB, there is no need to drop those
1654+
// flags. Otherwise, only retain flags present on both I and Inst.
1655+
// TODO: Currently some fast-math flags are not treated as
1656+
// poison-generating even though they should. Until this is fixed,
1657+
// always retain flags present on both I and Inst for floating point
1658+
// instructions.
1659+
if (isa<FPMathOperator>(I) ||
1660+
(I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1661+
I->andIRFlags(&Inst);
1662+
}
1663+
Inst.replaceAllUsesWith(V);
1664+
salvageKnowledge(&Inst, &AC);
1665+
removeMSSA(Inst);
1666+
Inst.eraseFromParent();
1667+
Changed = true;
1668+
++NumCSE;
1669+
continue;
1670+
}
1671+
1672+
// Otherwise, just remember that this value is available.
1673+
AvailableGEPs.insert(GEPVal, &Inst);
1674+
continue;
1675+
}
1676+
15641677
// A release fence requires that all stores complete before it, but does
15651678
// not prevent the reordering of following loads 'before' the fence. As a
15661679
// result, we don't need to consider it as writing to memory and don't need
@@ -1675,7 +1788,7 @@ bool EarlyCSE::run() {
16751788
// Process the root node.
16761789
nodesToProcess.push_back(new StackNode(
16771790
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1678-
CurrentGeneration, DT.getRootNode(),
1791+
AvailableGEPs, CurrentGeneration, DT.getRootNode(),
16791792
DT.getRootNode()->begin(), DT.getRootNode()->end()));
16801793

16811794
assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
@@ -1698,10 +1811,10 @@ bool EarlyCSE::run() {
16981811
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
16991812
// Push the next child onto the stack.
17001813
DomTreeNode *child = NodeToProcess->nextChild();
1701-
nodesToProcess.push_back(
1702-
new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1703-
AvailableCalls, NodeToProcess->childGeneration(),
1704-
child, child->begin(), child->end()));
1814+
nodesToProcess.push_back(new StackNode(
1815+
AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1816+
AvailableGEPs, NodeToProcess->childGeneration(), child,
1817+
child->begin(), child->end()));
17051818
} else {
17061819
// It has been processed, and there are no more children to process,
17071820
// so delete it and pop it off the stack.

llvm/test/Transforms/EarlyCSE/gep.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
1010
; CHECK-LABEL: define void @foo(
1111
; CHECK-SAME: ptr [[A:%.*]], <4 x i64> [[B:%.*]], i64 [[I:%.*]]) {
1212
; CHECK-NEXT: [[S1A:%.*]] = getelementptr i8, ptr [[A]], i64 8
13-
; CHECK-NEXT: [[S1C:%.*]] = getelementptr [[T1:%.*]], ptr [[A]], i64 0, i32 1
1413
; CHECK-NEXT: [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
15-
; CHECK-NEXT: [[S1E:%.*]] = getelementptr i64, ptr [[A]], i64 1
16-
; CHECK-NEXT: [[S1F:%.*]] = getelementptr i32, ptr [[A]], i64 2
1714
; CHECK-NEXT: [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
1815
; CHECK-NEXT: [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
1916
; CHECK-NEXT: [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>

llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll

Lines changed: 1 addition & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -10,101 +10,7 @@ define void @foo(ptr %a, <32 x i8> %_0) #0 {
1010
; CHECK-LABEL: define void @foo(
1111
; CHECK-SAME: ptr nocapture writeonly [[A:%.*]], <32 x i8> [[_0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: start:
13-
; CHECK-NEXT: [[_34I:%.*]] = getelementptr i8, ptr [[A]], i64 1
14-
; CHECK-NEXT: [[Z_SROA_0_16_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 0
15-
; CHECK-NEXT: store i8 [[Z_SROA_0_16_VEC_EXTRACT]], ptr [[A]], align 1
16-
; CHECK-NEXT: [[_34I_1:%.*]] = getelementptr i8, ptr [[A]], i64 2
17-
; CHECK-NEXT: [[Z_SROA_0_17_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 1
18-
; CHECK-NEXT: store i8 [[Z_SROA_0_17_VEC_EXTRACT]], ptr [[_34I]], align 1
19-
; CHECK-NEXT: [[_34I_2:%.*]] = getelementptr i8, ptr [[A]], i64 3
20-
; CHECK-NEXT: [[Z_SROA_0_18_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 2
21-
; CHECK-NEXT: store i8 [[Z_SROA_0_18_VEC_EXTRACT]], ptr [[_34I_1]], align 1
22-
; CHECK-NEXT: [[_34I_3:%.*]] = getelementptr i8, ptr [[A]], i64 4
23-
; CHECK-NEXT: [[Z_SROA_0_19_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 3
24-
; CHECK-NEXT: store i8 [[Z_SROA_0_19_VEC_EXTRACT]], ptr [[_34I_2]], align 1
25-
; CHECK-NEXT: [[_34I_4:%.*]] = getelementptr i8, ptr [[A]], i64 5
26-
; CHECK-NEXT: [[Z_SROA_0_20_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 4
27-
; CHECK-NEXT: store i8 [[Z_SROA_0_20_VEC_EXTRACT]], ptr [[_34I_3]], align 1
28-
; CHECK-NEXT: [[_34I_5:%.*]] = getelementptr i8, ptr [[A]], i64 6
29-
; CHECK-NEXT: [[Z_SROA_0_21_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 5
30-
; CHECK-NEXT: store i8 [[Z_SROA_0_21_VEC_EXTRACT]], ptr [[_34I_4]], align 1
31-
; CHECK-NEXT: [[_34I_6:%.*]] = getelementptr i8, ptr [[A]], i64 7
32-
; CHECK-NEXT: [[Z_SROA_0_22_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 6
33-
; CHECK-NEXT: store i8 [[Z_SROA_0_22_VEC_EXTRACT]], ptr [[_34I_5]], align 1
34-
; CHECK-NEXT: [[_34I_7:%.*]] = getelementptr i8, ptr [[A]], i64 8
35-
; CHECK-NEXT: [[Z_SROA_0_23_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 7
36-
; CHECK-NEXT: store i8 [[Z_SROA_0_23_VEC_EXTRACT]], ptr [[_34I_6]], align 1
37-
; CHECK-NEXT: [[_34I_8:%.*]] = getelementptr i8, ptr [[A]], i64 9
38-
; CHECK-NEXT: [[Z_SROA_0_24_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 8
39-
; CHECK-NEXT: store i8 [[Z_SROA_0_24_VEC_EXTRACT]], ptr [[_34I_7]], align 1
40-
; CHECK-NEXT: [[_34I_9:%.*]] = getelementptr i8, ptr [[A]], i64 10
41-
; CHECK-NEXT: [[Z_SROA_0_25_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 9
42-
; CHECK-NEXT: store i8 [[Z_SROA_0_25_VEC_EXTRACT]], ptr [[_34I_8]], align 1
43-
; CHECK-NEXT: [[_34I_10:%.*]] = getelementptr i8, ptr [[A]], i64 11
44-
; CHECK-NEXT: [[Z_SROA_0_26_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 10
45-
; CHECK-NEXT: store i8 [[Z_SROA_0_26_VEC_EXTRACT]], ptr [[_34I_9]], align 1
46-
; CHECK-NEXT: [[_34I_11:%.*]] = getelementptr i8, ptr [[A]], i64 12
47-
; CHECK-NEXT: [[Z_SROA_0_27_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 11
48-
; CHECK-NEXT: store i8 [[Z_SROA_0_27_VEC_EXTRACT]], ptr [[_34I_10]], align 1
49-
; CHECK-NEXT: [[_34I_12:%.*]] = getelementptr i8, ptr [[A]], i64 13
50-
; CHECK-NEXT: [[Z_SROA_0_28_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 12
51-
; CHECK-NEXT: store i8 [[Z_SROA_0_28_VEC_EXTRACT]], ptr [[_34I_11]], align 1
52-
; CHECK-NEXT: [[_34I_13:%.*]] = getelementptr i8, ptr [[A]], i64 14
53-
; CHECK-NEXT: [[Z_SROA_0_29_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 13
54-
; CHECK-NEXT: store i8 [[Z_SROA_0_29_VEC_EXTRACT]], ptr [[_34I_12]], align 1
55-
; CHECK-NEXT: [[_34I_14:%.*]] = getelementptr i8, ptr [[A]], i64 15
56-
; CHECK-NEXT: [[Z_SROA_0_30_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 14
57-
; CHECK-NEXT: store i8 [[Z_SROA_0_30_VEC_EXTRACT]], ptr [[_34I_13]], align 1
58-
; CHECK-NEXT: [[_34I_15:%.*]] = getelementptr i8, ptr [[A]], i64 16
59-
; CHECK-NEXT: [[Z_SROA_0_31_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 15
60-
; CHECK-NEXT: store i8 [[Z_SROA_0_31_VEC_EXTRACT]], ptr [[_34I_14]], align 1
61-
; CHECK-NEXT: [[_34I_16:%.*]] = getelementptr i8, ptr [[A]], i64 17
62-
; CHECK-NEXT: [[Z_SROA_0_32_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 16
63-
; CHECK-NEXT: store i8 [[Z_SROA_0_32_VEC_EXTRACT]], ptr [[_34I_15]], align 1
64-
; CHECK-NEXT: [[_34I_17:%.*]] = getelementptr i8, ptr [[A]], i64 18
65-
; CHECK-NEXT: [[Z_SROA_0_33_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 17
66-
; CHECK-NEXT: store i8 [[Z_SROA_0_33_VEC_EXTRACT]], ptr [[_34I_16]], align 1
67-
; CHECK-NEXT: [[_34I_18:%.*]] = getelementptr i8, ptr [[A]], i64 19
68-
; CHECK-NEXT: [[Z_SROA_0_34_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 18
69-
; CHECK-NEXT: store i8 [[Z_SROA_0_34_VEC_EXTRACT]], ptr [[_34I_17]], align 1
70-
; CHECK-NEXT: [[_34I_19:%.*]] = getelementptr i8, ptr [[A]], i64 20
71-
; CHECK-NEXT: [[Z_SROA_0_35_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 19
72-
; CHECK-NEXT: store i8 [[Z_SROA_0_35_VEC_EXTRACT]], ptr [[_34I_18]], align 1
73-
; CHECK-NEXT: [[_34I_20:%.*]] = getelementptr i8, ptr [[A]], i64 21
74-
; CHECK-NEXT: [[Z_SROA_0_36_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 20
75-
; CHECK-NEXT: store i8 [[Z_SROA_0_36_VEC_EXTRACT]], ptr [[_34I_19]], align 1
76-
; CHECK-NEXT: [[_34I_21:%.*]] = getelementptr i8, ptr [[A]], i64 22
77-
; CHECK-NEXT: [[Z_SROA_0_37_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 21
78-
; CHECK-NEXT: store i8 [[Z_SROA_0_37_VEC_EXTRACT]], ptr [[_34I_20]], align 1
79-
; CHECK-NEXT: [[_34I_22:%.*]] = getelementptr i8, ptr [[A]], i64 23
80-
; CHECK-NEXT: [[Z_SROA_0_38_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 22
81-
; CHECK-NEXT: store i8 [[Z_SROA_0_38_VEC_EXTRACT]], ptr [[_34I_21]], align 1
82-
; CHECK-NEXT: [[_34I_23:%.*]] = getelementptr i8, ptr [[A]], i64 24
83-
; CHECK-NEXT: [[Z_SROA_0_39_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 23
84-
; CHECK-NEXT: store i8 [[Z_SROA_0_39_VEC_EXTRACT]], ptr [[_34I_22]], align 1
85-
; CHECK-NEXT: [[_34I_24:%.*]] = getelementptr i8, ptr [[A]], i64 25
86-
; CHECK-NEXT: [[Z_SROA_0_40_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 24
87-
; CHECK-NEXT: store i8 [[Z_SROA_0_40_VEC_EXTRACT]], ptr [[_34I_23]], align 1
88-
; CHECK-NEXT: [[_34I_25:%.*]] = getelementptr i8, ptr [[A]], i64 26
89-
; CHECK-NEXT: [[Z_SROA_0_41_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 25
90-
; CHECK-NEXT: store i8 [[Z_SROA_0_41_VEC_EXTRACT]], ptr [[_34I_24]], align 1
91-
; CHECK-NEXT: [[_34I_26:%.*]] = getelementptr i8, ptr [[A]], i64 27
92-
; CHECK-NEXT: [[Z_SROA_0_42_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 26
93-
; CHECK-NEXT: store i8 [[Z_SROA_0_42_VEC_EXTRACT]], ptr [[_34I_25]], align 1
94-
; CHECK-NEXT: [[_34I_27:%.*]] = getelementptr i8, ptr [[A]], i64 28
95-
; CHECK-NEXT: [[Z_SROA_0_43_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 27
96-
; CHECK-NEXT: store i8 [[Z_SROA_0_43_VEC_EXTRACT]], ptr [[_34I_26]], align 1
97-
; CHECK-NEXT: [[_34I_28:%.*]] = getelementptr i8, ptr [[A]], i64 29
98-
; CHECK-NEXT: [[Z_SROA_0_44_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 28
99-
; CHECK-NEXT: store i8 [[Z_SROA_0_44_VEC_EXTRACT]], ptr [[_34I_27]], align 1
100-
; CHECK-NEXT: [[_34I_29:%.*]] = getelementptr i8, ptr [[A]], i64 30
101-
; CHECK-NEXT: [[Z_SROA_0_45_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 29
102-
; CHECK-NEXT: store i8 [[Z_SROA_0_45_VEC_EXTRACT]], ptr [[_34I_28]], align 1
103-
; CHECK-NEXT: [[_34I_30:%.*]] = getelementptr i8, ptr [[A]], i64 31
104-
; CHECK-NEXT: [[Z_SROA_0_46_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 30
105-
; CHECK-NEXT: store i8 [[Z_SROA_0_46_VEC_EXTRACT]], ptr [[_34I_29]], align 1
106-
; CHECK-NEXT: [[Z_SROA_0_47_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[_0]], i64 31
107-
; CHECK-NEXT: store i8 [[Z_SROA_0_47_VEC_EXTRACT]], ptr [[_34I_30]], align 1
13+
; CHECK-NEXT: store <32 x i8> [[_0]], ptr [[A]], align 1
10814
; CHECK-NEXT: ret void
10915
;
11016
start:

0 commit comments

Comments
 (0)