Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 32 additions & 71 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,73 +112,42 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
!GV->getValueType()->isArrayTy() || !GV->isConstant() ||
!GV->hasDefinitiveInitializer())
return nullptr;

Type *GEPSrcEltTy = GEP->getSourceElementType();
if (GEPSrcEltTy->isArrayTy())
GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return nullptr;

Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
TypeSize GlobalSize = DL.getTypeAllocSize(Init->getType());
Type *EltTy = LI->getType();
TypeSize EltSize = DL.getTypeStoreSize(EltTy);
if (EltSize.isScalable())
return nullptr;

uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
// Don't blow up on huge arrays.
if (ArrayElementCount > MaxArraySizeForCombine)
unsigned IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
SmallMapVector<Value *, APInt, 4> VarOffsets;
APInt ConstOffset(IndexBW, 0);
if (!GEP->collectOffset(DL, IndexBW, VarOffsets, ConstOffset) ||
VarOffsets.size() != 1 || IndexBW > 64)
return nullptr;

// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array or elements of equal size.
//
// Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
// Or: GEP i8 GV, Idx

unsigned GEPIdxOp = 1;
if (GEP->getSourceElementType()->isArrayTy()) {
GEPIdxOp = 2;
if (!match(GEP->getOperand(1), m_ZeroInt()))
return nullptr;
}
if (GEP->getNumOperands() < GEPIdxOp + 1 ||
isa<Constant>(GEP->getOperand(GEPIdxOp)))
Value *Idx = VarOffsets.front().first;
const APInt &Stride = VarOffsets.front().second;
// If the index type is non-canonical, wait for it to be canonicalized.
if (Idx->getType()->getScalarSizeInBits() != IndexBW)
return nullptr;

// Check that indices after the variable are constants and in-range for the
// type they index. Collect the indices. This is typically for arrays of
// structs.
SmallVector<unsigned, 4> LaterIndices;

Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Idx)
return nullptr; // Variable index.

uint64_t IdxVal = Idx->getZExtValue();
if ((unsigned)IdxVal != IdxVal)
return nullptr; // Too large array index.

if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
if (IdxVal >= ATy->getNumElements())
return nullptr;
EltTy = ATy->getElementType();
} else {
return nullptr; // Unknown type.
}
// Allow an additional context offset, but only within the stride.
if (!ConstOffset.ult(Stride))
return nullptr;

LaterIndices.push_back(IdxVal);
}
// Don't handle overlapping loads for now.
if (!Stride.uge(EltSize.getFixedValue()))
return nullptr;

Value *Idx = GEP->getOperand(GEPIdxOp);
// If the index type is non-canonical, wait for it to be canonicalized.
if (Idx->getType() != DL.getIndexType(GEP->getType()))
// Don't blow up on huge arrays.
uint64_t ArrayElementCount =
divideCeil((GlobalSize.getFixedValue() - ConstOffset.getZExtValue()),
Stride.getZExtValue());
if (ArrayElementCount > MaxArraySizeForCombine)
return nullptr;

enum { Overdefined = -3, Undefined = -2 };
Expand Down Expand Up @@ -211,18 +180,12 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(

// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
APInt Offset = ConstOffset;
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i, Offset += Stride) {
Constant *Elt = ConstantFoldLoadFromConst(Init, EltTy, Offset, DL);
if (!Elt)
return nullptr;

// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty()) {
Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
if (!Elt)
return nullptr;
}

// If the element is masked, handle it.
if (AndCst) {
Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
Expand Down Expand Up @@ -309,19 +272,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Now that we've scanned the entire array, emit our new comparison(s). We
// order the state machines in complexity of the generated code.

// If inbounds keyword is not present, Idx * ElementSize can overflow.
// Let's assume that ElementSize is 2 and the wanted value is at offset 0.
// If inbounds keyword is not present, Idx * Stride can overflow.
// Let's assume that Stride is 2 and the wanted value is at offset 0.
// Then, there are two possible values for Idx to match offset 0:
// 0x00..00, 0x80..00.
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
// comparison is false if Idx was 0x80..00.
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
unsigned ElementSize =
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
auto MaskIdx = [&](Value *Idx) {
if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
if (!GEP->isInBounds() && Stride.countr_zero() != 0) {
Value *Mask = Constant::getAllOnesValue(Idx->getType());
Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
Mask = Builder.CreateLShr(Mask, Stride.countr_zero());
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
Expand Down
183 changes: 183 additions & 0 deletions llvm/test/Transforms/InstCombine/load-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,186 @@ define i1 @pr93017(i64 %idx) {
%cmp = icmp ne ptr %v, null
ret i1 %cmp
}

@g_i32_lo = internal constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]

; Mask is 0b10101010
define i1 @load_vs_array_type_mismatch1(i32 %idx) {
; CHECK-LABEL: @load_vs_array_type_mismatch1(
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 170
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr inbounds i16, ptr @g_i32_lo, i32 %idx
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

@g_i32_hi = internal constant [4 x i32] [i32 u0x00010000, i32 u0x00020000, i32 u0x00030000, i32 u0x00040000]

; Mask is 0b01010101
define i1 @load_vs_array_type_mismatch2(i32 %idx) {
; CHECK-LABEL: @load_vs_array_type_mismatch2(
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 85
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr inbounds i16, ptr @g_i32_hi, i32 %idx
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

@g_i16_1 = internal constant [8 x i16] [i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1, i16 0]

; idx == 1 || idx == 3
define i1 @load_vs_array_type_mismatch_offset1(i32 %idx) {
; CHECK-LABEL: @load_vs_array_type_mismatch_offset1(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr inbounds {i16, i16}, ptr @g_i16_1, i32 %idx, i32 1
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

@g_i16_2 = internal constant [8 x i16] [i16 1, i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1]

; idx == 0 || idx == 2
define i1 @load_vs_array_type_mismatch_offset2(i32 %idx) {
; CHECK-LABEL: @load_vs_array_type_mismatch_offset2(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr inbounds {i16, i16}, ptr @g_i16_2, i32 %idx, i32 1
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

define i1 @offset_larger_than_stride(i32 %idx) {
; CHECK-LABEL: @offset_larger_than_stride(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i16], ptr @g_i16_1, i32 1, i32 [[TMP1:%.*]]
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr [2 x i16], ptr @g_i16_1, i64 1, i32 %idx
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

define i1 @load_size_larger_stride(i32 %idx) {
; CHECK-LABEL: @load_size_larger_stride(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g_i16_1, i32 [[IDX:%.*]]
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
%gep = getelementptr i8, ptr @g_i16_1, i32 %idx
%load = load i16, ptr %gep
%cmp = icmp eq i16 %load, 0
ret i1 %cmp
}

@CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]

define i1 @cmp_load_constant_array_messy(i32 %x){
; CHECK-LABEL: @cmp_load_constant_array_messy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0:%.*]], 1073741823
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 373
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT: ret i1 [[COND]]
;

entry:
%isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
%isOK = load i32, ptr %isOK_ptr
%cond = icmp slt i32 %isOK, 5
ret i1 %cond
}

define i1 @cmp_diff_load_constant_array_messy0(i32 %x){
; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1:%.*]], 1073741823
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 1, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 373
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK-NEXT: ret i1 [[COND]]
;
%isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
%isOK = load i16, ptr %isOK_ptr
%cond = icmp slt i16 %isOK, 5
ret i1 %cond
}

; Load size larger than store size currently not supported.
define i1 @cmp_diff_load_constant_array_messy1(i32 %x){
; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1:%.*]]
; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[ISOK]], 5
; CHECK-NEXT: ret i1 [[COND]]
;
%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i32 %x
%isOK = load i16, ptr %isOK_ptr
%cond = icmp slt i16 %isOK, 5
ret i1 %cond
}

define i1 @cmp_load_constant_array_variable_icmp(i32 %x, i32 %y) {
; CHECK-LABEL: @cmp_load_constant_array_variable_icmp(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG_MESSY, i32 [[X:%.*]]
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[COND]]
;
entry:
%isOK_ptr = getelementptr inbounds i32, ptr @CG_MESSY, i32 %x
%isOK = load i32, ptr %isOK_ptr
%cond = icmp ult i32 %isOK, %y
ret i1 %cond
}

@CG_CLEAR = constant [10 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9]

; Offsets not supported if negative or larger than stride.
define i1 @cmp_load_constant_additional_positive_offset(i32 %x) {
; CHECK-LABEL: @cmp_load_constant_additional_positive_offset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 [[X:%.*]]
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
; CHECK-NEXT: ret i1 [[COND]]
;
entry:
%isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 %x
%isOK = load i32, ptr %isOK_ptr
%cond = icmp ult i32 %isOK, 5
ret i1 %cond
}

define i1 @cmp_load_constant_additional_negative_offset(i32 %x) {
; CHECK-LABEL: @cmp_load_constant_additional_negative_offset(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 [[X:%.*]], i32 -5
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
; CHECK-NEXT: ret i1 [[COND]]
;
entry:
%isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 %x, i32 -5
%isOK = load i32, ptr %isOK_ptr
%cond = icmp ult i32 %isOK, 5
ret i1 %cond
}
5 changes: 1 addition & 4 deletions llvm/test/Transforms/InstCombine/opaque-ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,7 @@ define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {

define i1 @cmp_load_gep_global_different_gep_type(i64 %idx) {
; CHECK-LABEL: @cmp_load_gep_global_different_gep_type(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr @ary, i64 [[IDX:%.*]]
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
; CHECK-NEXT: ret i1 [[CMP]]
; CHECK-NEXT: ret i1 false
;
%gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
%load = load i16, ptr %gep
Expand Down