Skip to content

Commit cf12830

Browse files
authored
[SDAG] Don't treat ISD::SHL as a uniform binary operator in ShrinkDemandedOp (#92753)
In `TargetLowering::ShrinkDemandedOp`, types of lhs and rhs may differ before legalization. In the original case, `VT` is `i64` and `SmallVT` is `i32`, but the type of rhs is `i8`. Then invalid truncate nodes will be created. See the description of ISD::SHL for further information: > After legalization, the type of the shift amount is known to be TLI.getShiftAmountTy(). Before legalization, the shift amount can be any type, but care must be taken to ensure it is large enough. https://github.com/llvm/llvm-project/blob/605ae4e93be8976095c7eedf5c08bfdb9ff71257/llvm/include/llvm/CodeGen/ISDOpcodes.h#L691-L712 This patch stops handling ISD::SHL in `TargetLowering::ShrinkDemandedOp` and duplicates the logic in `TargetLowering::SimplifyDemandedBits`. Additionally, it adds some additional checks like `isNarrowingProfitable` and `isTypeDesirableForOp` to improve the codegen on AArch64. Fixes #92720.
1 parent ba0e871 commit cf12830

File tree

4 files changed

+105
-71
lines changed

4 files changed

+105
-71
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
587587
if (VT.isVector())
588588
return false;
589589

590+
assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591+
Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592+
"ShrinkDemandedOp only supports operands that have the same size!");
593+
590594
// Don't do this if the node has another user, which may require the
591595
// full value.
592596
if (!Op.getNode()->hasOneUse())
@@ -1832,11 +1836,33 @@ bool TargetLowering::SimplifyDemandedBits(
18321836
}
18331837
}
18341838

1839+
// TODO: Can we merge this fold with the one below?
18351840
// Try shrinking the operation as long as the shift amount will still be
18361841
// in range.
1837-
if ((ShAmt < DemandedBits.getActiveBits()) &&
1838-
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1839-
return true;
1842+
if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1843+
Op.getNode()->hasOneUse()) {
1844+
// Search for the smallest integer type with free casts to and from
1845+
// Op's type. For expedience, just check power-of-2 integer types.
1846+
unsigned DemandedSize = DemandedBits.getActiveBits();
1847+
for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1848+
SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1849+
EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1850+
if (isNarrowingProfitable(VT, SmallVT) &&
1851+
isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1852+
isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1853+
(!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1854+
assert(DemandedSize <= SmallVTBits &&
1855+
"Narrowed below demanded bits?");
1856+
// We found a type with free casts.
1857+
SDValue NarrowShl = TLO.DAG.getNode(
1858+
ISD::SHL, dl, SmallVT,
1859+
TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1860+
TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1861+
return TLO.CombineTo(
1862+
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1863+
}
1864+
}
1865+
}
18401866

18411867
// Narrow shift to lower half - similar to ShrinkDemandedOp.
18421868
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))

llvm/test/CodeGen/AArch64/bitfield-insert.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,11 +193,10 @@ define void @test_64bit_badmask(ptr %existing, ptr %new) {
193193
; CHECK: // %bb.0:
194194
; CHECK-NEXT: ldr x8, [x0]
195195
; CHECK-NEXT: ldr x9, [x1]
196-
; CHECK-NEXT: mov w10, #135 // =0x87
197-
; CHECK-NEXT: mov w11, #664 // =0x298
198-
; CHECK-NEXT: lsl w9, w9, #3
199-
; CHECK-NEXT: and x8, x8, x10
200-
; CHECK-NEXT: and x9, x9, x11
196+
; CHECK-NEXT: mov w10, #664 // =0x298
197+
; CHECK-NEXT: mov w11, #135 // =0x87
198+
; CHECK-NEXT: and x9, x10, x9, lsl #3
199+
; CHECK-NEXT: and x8, x8, x11
201200
; CHECK-NEXT: orr x8, x8, x9
202201
; CHECK-NEXT: str x8, [x0]
203202
; CHECK-NEXT: ret
@@ -579,7 +578,6 @@ define <2 x i32> @test_complex_type(ptr %addr, i64 %in, ptr %bf ) {
579578
define i64 @test_truncated_shift(i64 %x, i64 %y) {
580579
; CHECK-LABEL: test_truncated_shift:
581580
; CHECK: // %bb.0: // %entry
582-
; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1
583581
; CHECK-NEXT: bfi x0, x1, #25, #5
584582
; CHECK-NEXT: ret
585583
entry:
@@ -593,7 +591,6 @@ entry:
593591
define i64 @test_and_extended_shift_with_imm(i64 %0) {
594592
; CHECK-LABEL: test_and_extended_shift_with_imm:
595593
; CHECK: // %bb.0:
596-
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0
597594
; CHECK-NEXT: ubfiz x0, x0, #7, #8
598595
; CHECK-NEXT: ret
599596
%2 = shl i64 %0, 7

llvm/test/CodeGen/AArch64/trunc-to-tbl.ll

Lines changed: 57 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -571,29 +571,27 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
571571
; CHECK-NEXT: mov x8, xzr
572572
; CHECK-NEXT: LBB5_1: ; %loop
573573
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
574-
; CHECK-NEXT: ldp x10, x9, [x0]
575-
; CHECK-NEXT: ldrb w13, [x0, #18]
576-
; CHECK-NEXT: ldrh w14, [x0, #16]
574+
; CHECK-NEXT: ldp x9, x10, [x0]
575+
; CHECK-NEXT: ldrb w14, [x0, #18]
576+
; CHECK-NEXT: ldrh w15, [x0, #16]
577577
; CHECK-NEXT: add x0, x0, #32
578-
; CHECK-NEXT: ubfx x12, x9, #12, #20
579-
; CHECK-NEXT: fmov s0, w10
580-
; CHECK-NEXT: lsr x11, x10, #19
581-
; CHECK-NEXT: lsr x15, x9, #31
582-
; CHECK-NEXT: fmov s1, w12
583-
; CHECK-NEXT: lsr x12, x9, #50
584-
; CHECK-NEXT: mov.s v0[1], w11
585-
; CHECK-NEXT: orr w11, w14, w13, lsl #16
586-
; CHECK-NEXT: lsr x13, x10, #38
587-
; CHECK-NEXT: lsr x10, x10, #57
588-
; CHECK-NEXT: mov.s v1[1], w15
589-
; CHECK-NEXT: orr w12, w12, w11, lsl #14
590-
; CHECK-NEXT: orr w9, w10, w9, lsl #7
591-
; CHECK-NEXT: lsr w10, w11, #5
592-
; CHECK-NEXT: mov.s v0[2], w13
578+
; CHECK-NEXT: ubfx x12, x10, #12, #20
579+
; CHECK-NEXT: fmov s1, w9
580+
; CHECK-NEXT: lsr x11, x9, #19
581+
; CHECK-NEXT: lsr x13, x10, #31
582+
; CHECK-NEXT: fmov s0, w12
583+
; CHECK-NEXT: lsr x12, x9, #38
584+
; CHECK-NEXT: extr x9, x10, x9, #57
585+
; CHECK-NEXT: mov.s v1[1], w11
586+
; CHECK-NEXT: orr x11, x15, x14, lsl #16
587+
; CHECK-NEXT: mov.s v0[1], w13
588+
; CHECK-NEXT: extr x13, x11, x10, #50
589+
; CHECK-NEXT: ubfx x10, x11, #5, #27
593590
; CHECK-NEXT: mov.s v1[2], w12
594-
; CHECK-NEXT: mov.s v0[3], w9
595-
; CHECK-NEXT: mov.s v1[3], w10
596-
; CHECK-NEXT: uzp1.8h v0, v0, v1
591+
; CHECK-NEXT: mov.s v0[2], w13
592+
; CHECK-NEXT: mov.s v1[3], w9
593+
; CHECK-NEXT: mov.s v0[3], w10
594+
; CHECK-NEXT: uzp1.8h v0, v1, v0
597595
; CHECK-NEXT: xtn.8b v0, v0
598596
; CHECK-NEXT: str d0, [x1, x8, lsl #3]
599597
; CHECK-NEXT: add x8, x8, #1
@@ -608,35 +606,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
608606
; CHECK-BE-NEXT: .LBB5_1: // %loop
609607
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
610608
; CHECK-BE-NEXT: ldp x10, x9, [x0]
611-
; CHECK-BE-NEXT: ldrb w16, [x0, #18]
612-
; CHECK-BE-NEXT: lsr x11, x9, #40
613-
; CHECK-BE-NEXT: ubfx x12, x9, #33, #7
614-
; CHECK-BE-NEXT: lsr x15, x10, #45
615-
; CHECK-BE-NEXT: lsr x13, x10, #40
616-
; CHECK-BE-NEXT: ubfx x14, x10, #26, #14
617-
; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7
618-
; CHECK-BE-NEXT: ldrh w12, [x0, #16]
619-
; CHECK-BE-NEXT: fmov s0, w15
620-
; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14
621-
; CHECK-BE-NEXT: ubfx x14, x9, #14, #18
609+
; CHECK-BE-NEXT: ldrh w16, [x0, #16]
610+
; CHECK-BE-NEXT: ldrb w17, [x0, #18]
622611
; CHECK-BE-NEXT: add x0, x0, #32
623-
; CHECK-BE-NEXT: fmov s1, w11
624-
; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8
625-
; CHECK-BE-NEXT: lsl x12, x9, #24
626-
; CHECK-BE-NEXT: mov v0.s[1], w13
612+
; CHECK-BE-NEXT: lsl x11, x9, #24
613+
; CHECK-BE-NEXT: lsr x12, x9, #40
614+
; CHECK-BE-NEXT: lsr x13, x10, #45
615+
; CHECK-BE-NEXT: lsl x14, x10, #24
616+
; CHECK-BE-NEXT: lsr x15, x10, #40
617+
; CHECK-BE-NEXT: extr x12, x12, x11, #57
618+
; CHECK-BE-NEXT: fmov s0, w13
627619
; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
620+
; CHECK-BE-NEXT: extr x14, x15, x14, #50
621+
; CHECK-BE-NEXT: ubfx x15, x9, #14, #18
628622
; CHECK-BE-NEXT: extr x9, x10, x9, #40
629-
; CHECK-BE-NEXT: orr w12, w11, w12
630-
; CHECK-BE-NEXT: mov v1.s[1], w14
631-
; CHECK-BE-NEXT: lsr w12, w12, #19
623+
; CHECK-BE-NEXT: fmov s1, w12
624+
; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
625+
; CHECK-BE-NEXT: mov v0.s[1], w14
632626
; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
627+
; CHECK-BE-NEXT: orr w11, w12, w11
628+
; CHECK-BE-NEXT: mov v1.s[1], w15
629+
; CHECK-BE-NEXT: lsr w11, w11, #19
633630
; CHECK-BE-NEXT: mov v0.s[2], w13
634-
; CHECK-BE-NEXT: mov v1.s[2], w12
631+
; CHECK-BE-NEXT: mov v1.s[2], w11
635632
; CHECK-BE-NEXT: mov v0.s[3], w9
636633
; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
637634
; CHECK-BE-NEXT: add x8, x8, #1
638635
; CHECK-BE-NEXT: cmp x8, #1000
639-
; CHECK-BE-NEXT: mov v1.s[3], w11
636+
; CHECK-BE-NEXT: mov v1.s[3], w12
640637
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
641638
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
642639
; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
@@ -650,35 +647,34 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
650647
; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
651648
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
652649
; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
653-
; CHECK-DISABLE-NEXT: ldrb w16, [x0, #18]
654-
; CHECK-DISABLE-NEXT: lsr x11, x9, #40
655-
; CHECK-DISABLE-NEXT: ubfx x12, x9, #33, #7
656-
; CHECK-DISABLE-NEXT: lsr x15, x10, #45
657-
; CHECK-DISABLE-NEXT: lsr x13, x10, #40
658-
; CHECK-DISABLE-NEXT: ubfx x14, x10, #26, #14
659-
; CHECK-DISABLE-NEXT: orr w11, w12, w11, lsl #7
660-
; CHECK-DISABLE-NEXT: ldrh w12, [x0, #16]
661-
; CHECK-DISABLE-NEXT: fmov s0, w15
662-
; CHECK-DISABLE-NEXT: orr w13, w14, w13, lsl #14
663-
; CHECK-DISABLE-NEXT: ubfx x14, x9, #14, #18
650+
; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16]
651+
; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18]
664652
; CHECK-DISABLE-NEXT: add x0, x0, #32
665-
; CHECK-DISABLE-NEXT: fmov s1, w11
666-
; CHECK-DISABLE-NEXT: orr w11, w16, w12, lsl #8
667-
; CHECK-DISABLE-NEXT: lsl x12, x9, #24
668-
; CHECK-DISABLE-NEXT: mov v0.s[1], w13
653+
; CHECK-DISABLE-NEXT: lsl x11, x9, #24
654+
; CHECK-DISABLE-NEXT: lsr x12, x9, #40
655+
; CHECK-DISABLE-NEXT: lsr x13, x10, #45
656+
; CHECK-DISABLE-NEXT: lsl x14, x10, #24
657+
; CHECK-DISABLE-NEXT: lsr x15, x10, #40
658+
; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
659+
; CHECK-DISABLE-NEXT: fmov s0, w13
669660
; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
661+
; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
662+
; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18
670663
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
671-
; CHECK-DISABLE-NEXT: orr w12, w11, w12
672-
; CHECK-DISABLE-NEXT: mov v1.s[1], w14
673-
; CHECK-DISABLE-NEXT: lsr w12, w12, #19
664+
; CHECK-DISABLE-NEXT: fmov s1, w12
665+
; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
666+
; CHECK-DISABLE-NEXT: mov v0.s[1], w14
674667
; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
668+
; CHECK-DISABLE-NEXT: orr w11, w12, w11
669+
; CHECK-DISABLE-NEXT: mov v1.s[1], w15
670+
; CHECK-DISABLE-NEXT: lsr w11, w11, #19
675671
; CHECK-DISABLE-NEXT: mov v0.s[2], w13
676-
; CHECK-DISABLE-NEXT: mov v1.s[2], w12
672+
; CHECK-DISABLE-NEXT: mov v1.s[2], w11
677673
; CHECK-DISABLE-NEXT: mov v0.s[3], w9
678674
; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
679675
; CHECK-DISABLE-NEXT: add x8, x8, #1
680676
; CHECK-DISABLE-NEXT: cmp x8, #1000
681-
; CHECK-DISABLE-NEXT: mov v1.s[3], w11
677+
; CHECK-DISABLE-NEXT: mov v1.s[3], w12
682678
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
683679
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
684680
; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]

llvm/test/CodeGen/X86/pr92720.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
3+
4+
; Make sure we don't crash when shrinking the shift amount before legalization.
5+
define i64 @pr92720(i64 %x) {
6+
; CHECK-LABEL: pr92720:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: movabsq $8589934592, %rax # imm = 0x200000000
9+
; CHECK-NEXT: retq
10+
%or = or i64 %x, 255
11+
%sub = sub i64 0, %or
12+
%shl = shl i64 1, %sub
13+
%sext = shl i64 %shl, 32
14+
ret i64 %sext
15+
}

0 commit comments

Comments
 (0)