Skip to content

Commit 43dbb98

Browse files
committed
[InstCombine] Canonicalize more geps with constant gep bases and constant offsets.
This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep instruction, so that the constant can hopefully be combined together (or the offset can be computed in common).
1 parent 092d47b commit 43dbb98

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+11-5
Original file line numberDiff line numberDiff line change
@@ -2822,11 +2822,17 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
28222822
// This has better support in BasicAA.
28232823
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
28242824
// multiplies together.
2825-
if (GEPEltType->isScalableTy() ||
2826-
(!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2827-
match(GEP.getOperand(1),
2828-
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2829-
m_Shl(m_Value(), m_ConstantInt())))))) {
2825+
// - gep (gep @global, C1), %x, C2 is expanded so the two constants can
2826+
// possibly be merged together.
2827+
if (!GEPEltType->isIntegerTy(8) &&
2828+
(GEPEltType->isScalableTy() ||
2829+
(GEP.getNumIndices() == 1 &&
2830+
match(GEP.getOperand(1),
2831+
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2832+
m_Shl(m_Value(), m_ConstantInt()))))) ||
2833+
(isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
2834+
any_of(drop_begin(GEP.indices()),
2835+
[](Value *V) { return isa<Constant>(V); })))) {
28302836
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
28312837
return replaceInstUsesWith(
28322838
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));

llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll

+10-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
77
; CHECK-LABEL: define ptr @x12(
88
; CHECK-SAME: i64 [[X:%.*]]) {
99
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
10+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
11+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
1112
; CHECK-NEXT: ret ptr [[GEP]]
1213
;
1314
entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
1920
; CHECK-LABEL: define ptr @x1y(
2021
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
2122
; CHECK-NEXT: entry:
22-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
23+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
24+
; CHECK-NEXT: [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
26+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
2327
; CHECK-NEXT: ret ptr [[GEP]]
2428
;
2529
entry:
@@ -43,8 +47,10 @@ define i32 @twoloads(i64 %x) {
4347
; CHECK-LABEL: define i32 @twoloads(
4448
; CHECK-SAME: i64 [[X:%.*]]) {
4549
; CHECK-NEXT: entry:
46-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
47-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
50+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
51+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
52+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
53+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
4854
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
4955
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
5056
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]

0 commit comments

Comments
 (0)