-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Don't fold offsets into auipc if offset is larger than the reference global variable. #135297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ference global variable. The global variable should be within 2GB of the PC, but an offset to an address outside the global might not be. Fixes llvm#134525.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThe global variable should be within 2GB of the PC, but an offset to an address outside the global might not be. Fixes #134525. Full diff: https://github.com/llvm/llvm-project/pull/135297.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index bbbb1e1595982..eb3d43c9af7c2 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -35,7 +35,7 @@ class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
- void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
+ bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
int64_t Offset);
bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &TailAdd, Register GSReg);
@@ -142,9 +142,21 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
// Update the offset in Hi and Lo instructions.
// Delete the tail instruction and update all the uses to use the
// output from Lo.
-void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
+bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &Tail, int64_t Offset) {
assert(isInt<32>(Offset) && "Unexpected offset");
+
+ // If Hi is an AUIPC, don't fold the offset if it is outside the bounds of
+ // the global object. The object may be within 2GB of the PC, but addresses
+ // outside of the object might not be.
+ if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) {
+ const GlobalValue *GV = Hi.getOperand(1).getGlobal();
+ Type *Ty = GV->getValueType();
+ if (!Ty->isSized() || Offset < 0 ||
+ (uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty))
+ return false;
+ }
+
// Put the offset back in Hi and the Lo
Hi.getOperand(1).setOffset(Offset);
if (Hi.getOpcode() != RISCV::AUIPC)
@@ -156,6 +168,7 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
Tail.eraseFromParent();
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
<< " " << Hi << " " << Lo;);
+ return true;
}
// Detect patterns for large offsets that are passed into an ADD instruction.
@@ -205,7 +218,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
// Handle rs1 of ADDI is X0.
if (AddiReg == RISCV::X0) {
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
- foldOffset(Hi, Lo, TailAdd, OffLo);
+ if (!foldOffset(Hi, Lo, TailAdd, OffLo))
+ return false;
OffsetTail.eraseFromParent();
return true;
}
@@ -226,7 +240,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
return false;
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
<< " " << OffsetLui);
- foldOffset(Hi, Lo, TailAdd, Offset);
+ if (!foldOffset(Hi, Lo, TailAdd, Offset))
+ return false;
OffsetTail.eraseFromParent();
OffsetLui.eraseFromParent();
return true;
@@ -235,7 +250,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
- foldOffset(Hi, Lo, TailAdd, Offset);
+ if (!foldOffset(Hi, Lo, TailAdd, Offset))
+ return false;
OffsetTail.eraseFromParent();
return true;
}
@@ -294,7 +310,8 @@ bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
Offset = (uint64_t)Offset << ShAmt;
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
- foldOffset(Hi, Lo, TailShXAdd, Offset);
+ if (!foldOffset(Hi, Lo, TailShXAdd, Offset))
+ return false;
OffsetTail.eraseFromParent();
return true;
}
@@ -327,15 +344,15 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
if (TailTail.getOpcode() == RISCV::ADDI) {
Offset += TailTail.getOperand(2).getImm();
LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
- foldOffset(Hi, Lo, TailTail, Offset);
+ if (!foldOffset(Hi, Lo, TailTail, Offset))
+ return false;
Tail.eraseFromParent();
return true;
}
}
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
- foldOffset(Hi, Lo, Tail, Offset);
- return true;
+ return foldOffset(Hi, Lo, Tail, Offset);
}
case RISCV::ADD:
// The offset is too large to fit in the immediate field of ADDI.
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index b8dc7804c4908..255e65490195c 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -1227,3 +1227,72 @@ for.inc.peel: ; preds = %entry
store i32 %spec.select, ptr null, align 4
ret i32 0
}
+
+@ki_end = external dso_local global [0 x i8], align 1
+
+define i1 @pr134525() nounwind {
+; RV32I-LABEL: pr134525:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: lui a0, %hi(ki_end)
+; RV32I-NEXT: addi a0, a0, %lo(ki_end)
+; RV32I-NEXT: lui a1, 523776
+; RV32I-NEXT: lui a2, 32
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: addi a2, a2, 1
+; RV32I-NEXT: sltu a2, a1, a2
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32I-MEDIUM-LABEL: pr134525:
+; RV32I-MEDIUM: # %bb.0: # %entry
+; RV32I-MEDIUM-NEXT: .Lpcrel_hi15:
+; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(ki_end)
+; RV32I-MEDIUM-NEXT: lui a1, 523776
+; RV32I-MEDIUM-NEXT: lui a2, 32
+; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi15)
+; RV32I-MEDIUM-NEXT: addi a2, a2, 1
+; RV32I-MEDIUM-NEXT: add a1, a0, a1
+; RV32I-MEDIUM-NEXT: sltu a2, a1, a2
+; RV32I-MEDIUM-NEXT: sltu a0, a1, a0
+; RV32I-MEDIUM-NEXT: not a0, a0
+; RV32I-MEDIUM-NEXT: and a0, a0, a2
+; RV32I-MEDIUM-NEXT: ret
+;
+; RV64I-LABEL: pr134525:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: lui a0, %hi(ki_end+2145386496)
+; RV64I-NEXT: addi a0, a0, %lo(ki_end+2145386496)
+; RV64I-NEXT: lui a1, 32
+; RV64I-NEXT: addiw a1, a1, 1
+; RV64I-NEXT: sltu a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64I-MEDIUM-LABEL: pr134525:
+; RV64I-MEDIUM: # %bb.0: # %entry
+; RV64I-MEDIUM-NEXT: .Lpcrel_hi15:
+; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(ki_end)
+; RV64I-MEDIUM-NEXT: lui a1, 523776
+; RV64I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi15)
+; RV64I-MEDIUM-NEXT: add a0, a0, a1
+; RV64I-MEDIUM-NEXT: lui a1, 32
+; RV64I-MEDIUM-NEXT: addiw a1, a1, 1
+; RV64I-MEDIUM-NEXT: sltu a0, a0, a1
+; RV64I-MEDIUM-NEXT: ret
+;
+; RV64I-LARGE-LABEL: pr134525:
+; RV64I-LARGE: # %bb.0: # %entry
+; RV64I-LARGE-NEXT: .Lpcrel_hi16:
+; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI22_0)
+; RV64I-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi16)(a0)
+; RV64I-LARGE-NEXT: lui a1, 523776
+; RV64I-LARGE-NEXT: add a0, a0, a1
+; RV64I-LARGE-NEXT: lui a1, 32
+; RV64I-LARGE-NEXT: addiw a1, a1, 1
+; RV64I-LARGE-NEXT: sltu a0, a0, a1
+; RV64I-LARGE-NEXT: ret
+entry:
+ %cmp = icmp ult i64 sub (i64 ptrtoint (ptr @ki_end to i64), i64 -2145386496), 131073
+ ret i1 %cmp
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
// If Hi is an AUIPC, don't fold the offset if it is outside the bounds of | ||
// the global object. The object may be within 2GB of the PC, but addresses | ||
// outside of the object might not be. | ||
if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, actually, one minor question here: if Hi.getOperand(1).isGlobal()
is false, what is the symbol? Do we need to just unconditionally refuse to fold?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will also try fold ConstantPool and BlockAddress offsets in this pass. Though I think the main reason for them to be in this pass is because this pass also folds the ADDI into load/store instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably we don't want to fold offsets into blockaddress.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. I'll commit this to fix the known issue and I'll add separate PR for block address.
* origin/main: [mlir][vector] Prevent folding non memref-type gather into maskedload (llvm#135371) [mlir][SMT] remove custom forall/exists builder because of asan memory leak [bazel] Fix a typo (llvm#135460) [bazel] Add support for SMT Dialect (llvm#135454) [clang] ASTImporter: fix SubstNonTypeTemplateParmExpr source location (llvm#135450) [RISCV] Don't fold offsets into auipc if offset is larger than the reference global variable. (llvm#135297) [gn] port d1fd977 [NFC][LLVM] Apply std::move to object being pushed back in findSymbolCommon (llvm#135290) [AMDGPU] Teach iterative schedulers about IGLP (llvm#134953)
…ference global variable. (llvm#135297) The global variable should be within 2GB of the PC, but an offset to an address outside the global might not be. Fixes llvm#134525.
The global variable should be within 2GB of the PC, but an offset to an address outside the global might not be.
Fixes #134525.