Skip to content

Commit 2116921

Browse files
[InstCombine] Fold select of srem and conditional add
Simplify a pattern that may show up when computing the remainder of euclidean division. Particularly, when the divisor is a power of two and never negative, the signed remainder can be folded with a bitwise and. Fixes 64305. Proofs: https://alive2.llvm.org/ce/z/9_KG6c Differential Revision: https://reviews.llvm.org/D156811
1 parent f5cb626 commit 2116921

File tree

2 files changed

+52
-21
lines changed

2 files changed

+52
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2584,6 +2584,48 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
25842584
return nullptr;
25852585
}
25862586

2587+
/// Tries to reduce a pattern that arises when calculating the remainder of the
2588+
/// Euclidean division. When the divisor is a power of two and is guaranteed not
2589+
/// to be negative, a signed remainder can be folded with a bitwise and.
2590+
///
2591+
/// (x % n) < 0 ? (x % n) + n : (x % n)
2592+
/// -> x & (n - 1)
2593+
static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
2594+
IRBuilderBase &Builder) {
2595+
Value *CondVal = SI.getCondition();
2596+
Value *TrueVal = SI.getTrueValue();
2597+
Value *FalseVal = SI.getFalseValue();
2598+
2599+
ICmpInst::Predicate Pred;
2600+
Value *Op, *RemRes, *Remainder;
2601+
const APInt *C;
2602+
bool TrueIfSigned = false;
2603+
2604+
if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_APInt(C))) &&
2605+
IC.isSignBitCheck(Pred, *C, TrueIfSigned)))
2606+
return nullptr;
2607+
2608+
// If the sign bit is not set, we have a SGE/SGT comparison, and the operands
2609+
// of the select are inverted.
2610+
if (!TrueIfSigned)
2611+
std::swap(TrueVal, FalseVal);
2612+
2613+
// We are matching a quite specific pattern here:
2614+
// %rem = srem i32 %x, %n
2615+
// %cnd = icmp slt i32 %rem, 0
2616+
// %add = add i32 %rem, %n
2617+
// %sel = select i1 %cnd, i32 %add, i32 %rem
2618+
if (!(match(TrueVal, m_Add(m_Value(RemRes), m_Value(Remainder))) &&
2619+
match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) &&
2620+
IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) &&
2621+
FalseVal == RemRes))
2622+
return nullptr;
2623+
2624+
Value *Add = Builder.CreateAdd(Remainder,
2625+
Constant::getAllOnesValue(RemRes->getType()));
2626+
return BinaryOperator::CreateAnd(Op, Add);
2627+
}
2628+
25872629
static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
25882630
FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
25892631
if (!FI)
@@ -3430,6 +3472,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
34303472
if (Instruction *I = foldSelectExtConst(SI))
34313473
return I;
34323474

3475+
if (Instruction *I = foldSelectWithSRem(SI, *this, Builder))
3476+
return I;
3477+
34333478
// Fold (select C, (gep Ptr, Idx), Ptr) -> (gep Ptr, (select C, Idx, 0))
34343479
// Fold (select C, Ptr, (gep Ptr, Idx)) -> (gep Ptr, (select C, 0, Idx))
34353480
auto SelectGepWithBase = [&](GetElementPtrInst *Gep, Value *Base,

llvm/test/Transforms/InstCombine/select-divrem.ll

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,7 @@ define i5 @urem_common_dividend_defined_cond(i1 noundef %b, i5 %x, i5 %y, i5 %z)
216216

217217
define i32 @rem_euclid_1(i32 %0) {
218218
; CHECK-LABEL: @rem_euclid_1(
219-
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0:%.*]], 8
220-
; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[REM]], 0
221-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[REM]], 8
222-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i32 [[ADD]], i32 [[REM]]
219+
; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7
223220
; CHECK-NEXT: ret i32 [[SEL]]
224221
;
225222
%rem = srem i32 %0, 8
@@ -231,10 +228,7 @@ define i32 @rem_euclid_1(i32 %0) {
231228

232229
define i32 @rem_euclid_2(i32 %0) {
233230
; CHECK-LABEL: @rem_euclid_2(
234-
; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0:%.*]], 8
235-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[REM]], 8
236-
; CHECK-NEXT: [[COND1:%.*]] = icmp slt i32 [[REM]], 0
237-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND1]], i32 [[ADD]], i32 [[REM]]
231+
; CHECK-NEXT: [[SEL:%.*]] = and i32 [[TMP0:%.*]], 7
238232
; CHECK-NEXT: ret i32 [[SEL]]
239233
;
240234
%rem = srem i32 %0, 8
@@ -291,10 +285,7 @@ define i32 @rem_euclid_wrong_operands_select(i32 %0) {
291285

292286
define <2 x i32> @rem_euclid_vec(<2 x i32> %0) {
293287
; CHECK-LABEL: @rem_euclid_vec(
294-
; CHECK-NEXT: [[REM:%.*]] = srem <2 x i32> [[TMP0:%.*]], <i32 8, i32 8>
295-
; CHECK-NEXT: [[COND:%.*]] = icmp slt <2 x i32> [[REM]], zeroinitializer
296-
; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i32> [[REM]], <i32 8, i32 8>
297-
; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[ADD]], <2 x i32> [[REM]]
288+
; CHECK-NEXT: [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 7, i32 7>
298289
; CHECK-NEXT: ret <2 x i32> [[SEL]]
299290
;
300291
%rem = srem <2 x i32> %0, <i32 8, i32 8>
@@ -306,10 +297,7 @@ define <2 x i32> @rem_euclid_vec(<2 x i32> %0) {
306297

307298
define i128 @rem_euclid_i128(i128 %0) {
308299
; CHECK-LABEL: @rem_euclid_i128(
309-
; CHECK-NEXT: [[REM:%.*]] = srem i128 [[TMP0:%.*]], 8
310-
; CHECK-NEXT: [[COND:%.*]] = icmp slt i128 [[REM]], 0
311-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i128 [[REM]], 8
312-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i128 [[ADD]], i128 [[REM]]
300+
; CHECK-NEXT: [[SEL:%.*]] = and i128 [[TMP0:%.*]], 7
313301
; CHECK-NEXT: ret i128 [[SEL]]
314302
;
315303
%rem = srem i128 %0, 8
@@ -321,11 +309,9 @@ define i128 @rem_euclid_i128(i128 %0) {
321309

322310
define i8 @rem_euclid_non_const_pow2(i8 %0, i8 %1) {
323311
; CHECK-LABEL: @rem_euclid_non_const_pow2(
324-
; CHECK-NEXT: [[POW2:%.*]] = shl nuw i8 1, [[TMP0:%.*]]
325-
; CHECK-NEXT: [[REM:%.*]] = srem i8 [[TMP1:%.*]], [[POW2]]
326-
; CHECK-NEXT: [[COND:%.*]] = icmp slt i8 [[REM]], 0
327-
; CHECK-NEXT: [[ADD:%.*]] = select i1 [[COND]], i8 [[POW2]], i8 0
328-
; CHECK-NEXT: [[SEL:%.*]] = add i8 [[REM]], [[ADD]]
312+
; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[TMP0:%.*]]
313+
; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[NOTMASK]], -1
314+
; CHECK-NEXT: [[SEL:%.*]] = and i8 [[TMP3]], [[TMP1:%.*]]
329315
; CHECK-NEXT: ret i8 [[SEL]]
330316
;
331317
%pow2 = shl i8 1, %0

0 commit comments

Comments
 (0)