Skip to content

Commit a47c8e4

Browse files
committed
[InstCombine] fold lshr(trunc(lshr X, C1)) C2
Only the multi-use cases are changing here because there's another fold that catches the simpler patterns. But that other fold is the source of infinite loops when we try to add D110170, so removing that is planned as a follow-up. Attempt to show the general proof in Alive2: https://alive2.llvm.org/ce/z/Ns1uS2 Note that the overshift fold-to-zero tests are not currently handled by instsimplify. If they were, we could assert that the shift amount sum is less than the source bitwidth.
1 parent 29c09c7 commit a47c8e4

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,14 +1149,26 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
11491149
}
11501150
}
11511151

1152+
// (X >>u C1) >>u C --> X >>u (C1 + C)
11521153
if (match(Op0, m_LShr(m_Value(X), m_APInt(C1)))) {
1153-
unsigned AmtSum = ShAmtC + C1->getZExtValue();
11541154
// Oversized shifts are simplified to zero in InstSimplify.
1155+
unsigned AmtSum = ShAmtC + C1->getZExtValue();
11551156
if (AmtSum < BitWidth)
1156-
// (X >>u C1) >>u C --> X >>u (C1 + C)
11571157
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
11581158
}
11591159

1160+
// If the first shift covers the number of bits truncated and the combined
1161+
// shift fits in the source width:
1162+
// (trunc (X >>u C1)) >>u C --> trunc (X >>u (C1 + C))
1163+
if (match(Op0, m_OneUse(m_Trunc(m_LShr(m_Value(X), m_APInt(C1)))))) {
1164+
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
1165+
unsigned AmtSum = ShAmtC + C1->getZExtValue();
1166+
if (C1->uge(SrcWidth - BitWidth) && AmtSum < SrcWidth) {
1167+
Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
1168+
return new TruncInst(SumShift, Ty);
1169+
}
1170+
}
1171+
11601172
// Look for a "splat" mul pattern - it replicates bits across each half of
11611173
// a value, so a right shift is just a mask of the low bits:
11621174
// lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,8 @@ define i12 @trunc_sandwich_use1(i32 %x) {
487487
; CHECK-LABEL: @trunc_sandwich_use1(
488488
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 28
489489
; CHECK-NEXT: call void @use(i32 [[SH]])
490-
; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
491-
; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 2
490+
; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 30
491+
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
492492
; CHECK-NEXT: ret i12 [[R]]
493493
;
494494
%sh = lshr i32 %x, 28
@@ -502,8 +502,8 @@ define <3 x i9> @trunc_sandwich_splat_vec_use1(<3 x i14> %x) {
502502
; CHECK-LABEL: @trunc_sandwich_splat_vec_use1(
503503
; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], <i14 6, i14 6, i14 6>
504504
; CHECK-NEXT: call void @usevec(<3 x i14> [[SH]])
505-
; CHECK-NEXT: [[TR:%.*]] = trunc <3 x i14> [[SH]] to <3 x i9>
506-
; CHECK-NEXT: [[R:%.*]] = lshr <3 x i9> [[TR]], <i9 5, i9 5, i9 5>
505+
; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], <i14 11, i14 11, i14 11>
506+
; CHECK-NEXT: [[R:%.*]] = trunc <3 x i14> [[SUM_SHIFT]] to <3 x i9>
507507
; CHECK-NEXT: ret <3 x i9> [[R]]
508508
;
509509
%sh = lshr <3 x i14> %x, <i14 6, i14 6, i14 6>
@@ -517,8 +517,8 @@ define i12 @trunc_sandwich_min_shift1_use1(i32 %x) {
517517
; CHECK-LABEL: @trunc_sandwich_min_shift1_use1(
518518
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20
519519
; CHECK-NEXT: call void @use(i32 [[SH]])
520-
; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
521-
; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 1
520+
; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 21
521+
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
522522
; CHECK-NEXT: ret i12 [[R]]
523523
;
524524
%sh = lshr i32 %x, 20
@@ -528,6 +528,8 @@ define i12 @trunc_sandwich_min_shift1_use1(i32 %x) {
528528
ret i12 %r
529529
}
530530

531+
; negative test - trunc is bigger than first shift
532+
531533
define i12 @trunc_sandwich_small_shift1_use1(i32 %x) {
532534
; CHECK-LABEL: @trunc_sandwich_small_shift1_use1(
533535
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 19
@@ -547,8 +549,8 @@ define i12 @trunc_sandwich_max_sum_shift_use1(i32 %x) {
547549
; CHECK-LABEL: @trunc_sandwich_max_sum_shift_use1(
548550
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20
549551
; CHECK-NEXT: call void @use(i32 [[SH]])
550-
; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
551-
; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 11
552+
; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31
553+
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
552554
; CHECK-NEXT: ret i12 [[R]]
553555
;
554556
%sh = lshr i32 %x, 20
@@ -562,8 +564,8 @@ define i12 @trunc_sandwich_max_sum_shift2_use1(i32 %x) {
562564
; CHECK-LABEL: @trunc_sandwich_max_sum_shift2_use1(
563565
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 30
564566
; CHECK-NEXT: call void @use(i32 [[SH]])
565-
; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
566-
; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 1
567+
; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31
568+
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
567569
; CHECK-NEXT: ret i12 [[R]]
568570
;
569571
%sh = lshr i32 %x, 30
@@ -573,6 +575,8 @@ define i12 @trunc_sandwich_max_sum_shift2_use1(i32 %x) {
573575
ret i12 %r
574576
}
575577

578+
; negative test - but overshift is simplified to zero by another fold
579+
576580
define i12 @trunc_sandwich_big_sum_shift1_use1(i32 %x) {
577581
; CHECK-LABEL: @trunc_sandwich_big_sum_shift1_use1(
578582
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 21
@@ -586,6 +590,8 @@ define i12 @trunc_sandwich_big_sum_shift1_use1(i32 %x) {
586590
ret i12 %r
587591
}
588592

593+
; negative test - but overshift is simplified to zero by another fold
594+
589595
define i12 @trunc_sandwich_big_sum_shift2_use1(i32 %x) {
590596
; CHECK-LABEL: @trunc_sandwich_big_sum_shift2_use1(
591597
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 31

0 commit comments

Comments
 (0)