-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Canonicalize more geps with constant gep bases and constant offsets. #110033
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesThis is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form Full diff: https://github.com/llvm/llvm-project/pull/110033.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5740285675eba8..ef478bf106f731 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2822,11 +2822,17 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// This has better support in BasicAA.
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
// multiplies together.
- if (GEPEltType->isScalableTy() ||
- (!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
- match(GEP.getOperand(1),
- m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
- m_Shl(m_Value(), m_ConstantInt())))))) {
+ // - gep (gep @global, C1), %x, C2 is expanded so the two constants can
+ // possibly be merged together.
+ if (!GEPEltType->isIntegerTy(8) &&
+ (GEPEltType->isScalableTy() ||
+ (GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1),
+ m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
+ m_Shl(m_Value(), m_ConstantInt()))))) ||
+ (isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
+ any_of(drop_begin(GEP.indices()),
+ [](Value *V) { return isa<Constant>(V); })))) {
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
return replaceInstUsesWith(
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
new file mode 100644
index 00000000000000..f0dbd1b0a49a55
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+@glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
+
+define ptr @x12(i64 %x) {
+; CHECK-LABEL: define ptr @x12(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 1, i64 2
+ ret ptr %gep
+}
+
+define ptr @x1y(i64 %x, i64 %y) {
+; CHECK-LABEL: define ptr @x1y(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 %y
+ ret ptr %gep
+}
+
+define ptr @xzy(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: define ptr @xzy(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 40), i64 0, i64 [[X]], i64 [[Z]], i64 [[Y]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %gep = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 40), i64 0, i64 %x, i64 %z, i64 %y
+ ret ptr %gep
+}
+
+define i32 @twoloads(i64 %x) {
+; CHECK-LABEL: define i32 @twoloads(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
+; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
+; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT: ret i32 [[C]]
+;
+entry:
+ %gep1 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 50), i64 0, i64 %x, i64 2, i64 1
+ %gep2 = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr (i8, ptr @glob, i64 36), i64 0, i64 %x, i64 2, i64 4
+ %a = load i32, ptr %gep1
+ %b = load i32, ptr %gep2
+ %c = add i32 %a, %b
+ ret i32 %c
+}
|
m_Shl(m_Value(), m_ConstantInt()))))) || | ||
(isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) && | ||
any_of(drop_begin(GEP.indices()), | ||
[](Value *V) { return isa<Constant>(V); })))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this condition has reached the point where it wants to live in its own function :)
I'm also wondering whether zero constants should be excluded from this heuristic, as they don't add any actual offset. And then you wouldn't need the drop_begin? Or at least I don't fully get why the drop_begin() is there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - I've pulled out the condition so that it can be more structured. And yeah, the intent for the drop_begin was to rule our the first zero offsets so that is now more explicit too.
43dbb98
to
8d20eac
Compare
@@ -238,6 +238,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final | |||
std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>> | |||
convertOrOfShiftsToFunnelShift(Instruction &Or); | |||
|
|||
Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did this move?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh - This was a relic of a previous version that moved the creation of geps into (a differently named) shouldCanonicalizeGEPToPtrAdd, but it was simpler to just have the condition in that function.
|
||
// gep (gep @global, C1), %x, C2 is expanded so the two constants can | ||
// possibly be merged together. | ||
return isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, should this one be checking for a constant expression, or that the inner GEPOperator hasAllConstantIndices()? Is there a reason why we want to apply this only to GEPs of globals in particular, rather than all constant GEPs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had an older version that was canonicalizing more generally (in a different way IIRC). It was causing some relatively small perf issues so I was trying to avoid making this over-broad. I think that this case should hopefully be OK though, to do the same thing if the geps indices are all constant.
Looking at https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1382/files it seems like we often don't end up folding the offsets. I think this is because of multi-use limitation in canonicalizeGEPOfConstGEPI8. Possibly it could be relaxed (I think we generally don't mind creating extra constant offset geps...) |
8d20eac
to
3ac50be
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…tant offsets. This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep instruction, so that the constant can hopefully be combined together (or the offset can be computed in common).
3ac50be
to
149efb8
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/4989 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/136/builds/1148 Here is the relevant piece of the build log for the reference
|
* commit 'FETCH_HEAD': [X86] combineAndLoadToBZHI - don't do an return early return if we fail to match a load [X86] replace-load-and-with-bzhi.ll - add commuted test cases to show failure to fold [X86] replace-load-and-with-bzhi.ll - cleanup check-prefixes to use X86/X64 for 32/64-bit targets [ExecutionEngine] Avoid repeated hash lookups (NFC) (llvm#111275) [ByteCode] Avoid repeated hash lookups (NFC) (llvm#111273) [StaticAnalyzer] Avoid repeated hash lookups (NFC) (llvm#111272) [CodeGen] Avoid repeated hash lookups (NFC) (llvm#111274) [RISCV] Simplify fixed-vector-fp.ll run lines. NFC [libc++][format][1/3] Adds more benchmarks. (llvm#101803) [X86] combineOrXorWithSETCC - avoid duplicate SDLoc/operands code. NFC. [X86] convertIntLogicToFPLogic - avoid duplicate SDLoc/operands code. NFC. [libc] Clean up some include in `libc`. (llvm#110980) [X86] combineBitOpWithPACK - avoid duplicate SDLoc/operands code. NFC. [X86] combineBitOpWithMOVMSK - avoid duplicate SDLoc/operands code. NFC. [X86] combineBitOpWithShift - avoid duplicate SDLoc/operands code. NFC. [x86] combineMul - use computeKnownBits directly to find MUL_IMM constant splat. [X86] combineSubABS - avoid duplicate SDLoc. NFC. [ValueTypes][RISCV] Add v1bf16 type (llvm#111112) [VPlan] Add additional FOR hoisting test. [clang-tidy] Create bugprone-bitwise-pointer-cast check (llvm#108083) [InstCombine] Canonicalize more geps with constant gep bases and constant offsets. (llvm#110033) [LV] Honor uniform-after-vectorization in setVectorizedCallDecision. [ELF] Pass Ctx & to Arch/ [ELF] Pass Ctx & to Arch/ [libc++] Fix a typo (llvm#111239) [X86] For minsize memset/memcpy, use byte or double-word accesses (llvm#87003) [RISCV] Unify RVBShift_ri and RVBShiftW_ri with Shift_ri and ShiftW_ri. NFC (llvm#111263) Revert "Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (llvm#110714)" (llvm#111059)" [libc] Add missing include to __support/StringUtil/tables/stdc_errors.h. (llvm#111271) [libc] remove errno.h includes (llvm#110934) [NFC][rtsan] Update docs to include [[clang::blocking]] (llvm#111249) [RISCV] Give ZEXT_H_RV32 and ZEXT_H_RV64 R-type format to match PACK. NFC [mlir][SPIRV] Fix build (2) (llvm#111265) [mlir][SPIRV] Fix build error (llvm#111264) [mlir][NFC] Mark type converter in `populate...` functions as `const` (llvm#111250) [Basic] Avoid repeated hash lookups (NFC) (llvm#111228) [RISCV] Use THShift_ri class instead of RVBShift_ri for TH_TST instruction. NFC [VPlan] Only generate first lane for VPPredInstPHI if no others used. [ELF] Don't call getPPC64TargetInfo outside Driver. NFC [GISel] Don't preserve NSW flag when converting G_MUL of INT_MIN to G_SHL. (llvm#111230) [APInt] Slightly simplify APInt::ashrSlowCase. NFC (llvm#111220) [Sema] Avoid repeated hash lookups (NFC) (llvm#111227) [Affine] Avoid repeated hash lookups (NFC) (llvm#111226) [Driver] Avoid repeated hash lookups (NFC) (llvm#111225) [clang][test] Remove a broken bytecode test [ELF] Pass Ctx & [ELF] Pass Ctx & to Relocations Signed-off-by: kyvangka1610 <[email protected]>
This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form
gep (gep @glob, C1), x, C2
and expand the gepinstruction, so that the constant can hopefully be combined together (or the x offset can be computed in common).