-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[DAGCombine] Remove oneuse restrictions for RISCV in folding (shl (add_nsw x, c1)), c2) and folding (shl(sext(add x, c1)), c2) in some scenarios #101294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-backend-aarch64 Author: LiqinWeng (LiqinWeng) ChangesPatch is 22.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101294.diff 14 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b35d08b327ef3..e6d0bd2495f7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e9da9b819bdd..9bcf6a2f67056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // if (!ShiftLHS->hasOneUse())
+ // return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ad91de566323..7eeb4b71b5d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 7aeaebc584c64..1a8e123246a07 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3fd961f5a7462..a6bd57630031c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1686ec572c855..5de33627886a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f..2d42353adafa3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1064,6 +1064,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b971afda4229a..fc6d90543ef86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4dccb9903df5d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1181,6 +1181,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb4..f723713e77d08 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669d..28cf3cb597478 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 253cfb040308b..d313f188568d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a8..55de5f011a620 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -314,64 +314,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,23 +395,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
@@ -426,25 +428,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -466,64 +469,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -585,22 +590,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srl...
[truncated]
|
@llvm/pr-subscribers-backend-arm Author: LiqinWeng (LiqinWeng) ChangesPatch is 22.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101294.diff 14 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b35d08b327ef3..e6d0bd2495f7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e9da9b819bdd..9bcf6a2f67056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // if (!ShiftLHS->hasOneUse())
+ // return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ad91de566323..7eeb4b71b5d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 7aeaebc584c64..1a8e123246a07 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3fd961f5a7462..a6bd57630031c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1686ec572c855..5de33627886a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f..2d42353adafa3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1064,6 +1064,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b971afda4229a..fc6d90543ef86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4dccb9903df5d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1181,6 +1181,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb4..f723713e77d08 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669d..28cf3cb597478 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 253cfb040308b..d313f188568d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a8..55de5f011a620 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -314,64 +314,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,23 +395,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
@@ -426,25 +428,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -466,64 +469,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -585,22 +590,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srl...
[truncated]
|
@llvm/pr-subscribers-llvm-selectiondag Author: LiqinWeng (LiqinWeng) ChangesPatch is 22.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101294.diff 14 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b35d08b327ef3..e6d0bd2495f7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e9da9b819bdd..9bcf6a2f67056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // if (!ShiftLHS->hasOneUse())
+ // return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ad91de566323..7eeb4b71b5d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 7aeaebc584c64..1a8e123246a07 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3fd961f5a7462..a6bd57630031c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1686ec572c855..5de33627886a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f..2d42353adafa3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1064,6 +1064,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b971afda4229a..fc6d90543ef86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4dccb9903df5d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1181,6 +1181,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb4..f723713e77d08 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669d..28cf3cb597478 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 253cfb040308b..d313f188568d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a8..55de5f011a620 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -314,64 +314,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,23 +395,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
@@ -426,25 +428,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -466,64 +469,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -585,22 +590,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srl...
[truncated]
|
@llvm/pr-subscribers-backend-powerpc Author: LiqinWeng (LiqinWeng) ChangesPatch is 22.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101294.diff 14 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b35d08b327ef3..e6d0bd2495f7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Shl1 =
DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1e9da9b819bdd..9bcf6a2f67056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17518,6 +17518,9 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // if (!ShiftLHS->hasOneUse())
+ // return false;
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 2ad91de566323..7eeb4b71b5d43 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1036,6 +1036,11 @@ bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+
// Always commute pre-type legalization and right shifts.
// We're looking for shl(or(x,y),z) patterns.
if (Level < CombineLevel::AfterLegalizeTypes ||
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 7aeaebc584c64..1a8e123246a07 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2156,6 +2156,16 @@ bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return X.getValueType().isScalarInteger(); // 'tstbit'
}
+bool HexagonTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3fd961f5a7462..a6bd57630031c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -155,6 +155,9 @@ class HexagonTargetLowering : public TargetLowering {
bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1686ec572c855..5de33627886a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17207,6 +17207,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0).hasOneUse()) {
+ // return false;
+ // }
+ return true;
+}
+
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f..2d42353adafa3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1064,6 +1064,9 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b971afda4229a..fc6d90543ef86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3490,6 +3490,17 @@ X86TargetLowering::preferredShiftLegalizationStrategy(
ExpansionFactor);
}
+bool X86TargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ // if (!N->getOperand(0)->hasOneUse())
+ // return false;
+ return true;
+}
+
bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
// Any legal vector type can be splatted more efficiently than
// loading/spilling from memory.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 362daa98e1f8e..4dccb9903df5d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1181,6 +1181,9 @@ namespace llvm {
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override;
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll
index 5de03a92afeb4..f723713e77d08 100644
--- a/llvm/test/CodeGen/ARM/add-like-or.ll
+++ b/llvm/test/CodeGen/ARM/add-like-or.ll
@@ -249,27 +249,28 @@ entry:
define i32 @multiuse(i32 %i, ptr %x, ptr %y) {
; CHECK-T1-LABEL: multiuse:
; CHECK-T1: @ %bb.0: @ %entry
+; CHECK-T1-NEXT: lsls r2, r0, #3
+; CHECK-T1-NEXT: adds r1, r1, r2
+; CHECK-T1-NEXT: ldr r1, [r1, #4]
; CHECK-T1-NEXT: lsls r0, r0, #1
+; CHECK-T1-NEXT: adds r0, r1, r0
; CHECK-T1-NEXT: adds r0, r0, #1
-; CHECK-T1-NEXT: lsls r2, r0, #2
-; CHECK-T1-NEXT: ldr r1, [r1, r2]
-; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: multiuse:
; CHECK-T2: @ %bb.0: @ %entry
-; CHECK-T2-NEXT: lsls r0, r0, #1
+; CHECK-T2-NEXT: add.w r1, r1, r0, lsl #3
+; CHECK-T2-NEXT: ldr r1, [r1, #4]
+; CHECK-T2-NEXT: add.w r0, r1, r0, lsl #1
; CHECK-T2-NEXT: adds r0, #1
-; CHECK-T2-NEXT: ldr.w r1, [r1, r0, lsl #2]
-; CHECK-T2-NEXT: add r0, r1
; CHECK-T2-NEXT: bx lr
;
; CHECK-A-LABEL: multiuse:
; CHECK-A: @ %bb.0: @ %entry
-; CHECK-A-NEXT: mov r2, #1
-; CHECK-A-NEXT: orr r0, r2, r0, lsl #1
-; CHECK-A-NEXT: ldr r1, [r1, r0, lsl #2]
-; CHECK-A-NEXT: add r0, r0, r1
+; CHECK-A-NEXT: add r1, r1, r0, lsl #3
+; CHECK-A-NEXT: ldr r1, [r1, #4]
+; CHECK-A-NEXT: add r0, r1, r0, lsl #1
+; CHECK-A-NEXT: add r0, r0, #1
; CHECK-A-NEXT: bx lr
entry:
%mul = shl i32 %i, 1
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669d..28cf3cb597478 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -70,11 +70,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-LABEL: test2:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 40(a0)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -100,11 +98,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a2, a4, 3
-; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 40(a0)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 253cfb040308b..d313f188568d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -7,14 +7,14 @@
define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-LABEL: srem_eq_fold_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: li a0, -85
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a1, -85
-; CHECK-NEXT: vmacc.vx v9, a1, v8
-; CHECK-NEXT: vsll.vi v8, v9, 7
-; CHECK-NEXT: vsrl.vi v9, v9, 1
-; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vmul.vx v8, v8, a0
+; CHECK-NEXT: vsll.vi v9, v8, 7
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: vsrl.vi v8, v8, 1
+; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: vmsleu.vx v0, v8, a0
; CHECK-NEXT: ret
%rem = srem <vscale x 4 x i8> %va, splat (i8 6)
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 457d0380ca8a8..55de5f011a620 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -314,64 +314,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: lw a0, 8(a0)
+; RV32-NEXT: lw a1, 4(s0)
+; RV32-NEXT: lbu a2, 12(s0)
+; RV32-NEXT: slli a3, a0, 31
+; RV32-NEXT: srli s1, a1, 1
+; RV32-NEXT: or s1, s1, a3
+; RV32-NEXT: slli a3, a2, 30
+; RV32-NEXT: srli a4, a0, 2
+; RV32-NEXT: or s2, a4, a3
+; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
-; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
-; RV32-NEXT: slli a1, a3, 31
+; RV32-NEXT: srai s4, a2, 31
+; RV32-NEXT: slli a1, a1, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: li a2, 7
+; RV32-NEXT: li a2, -5
+; RV32-NEXT: li a3, -1
; RV32-NEXT: mv a0, s2
; RV32-NEXT: mv a1, s4
-; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: mv s2, a0
; RV32-NEXT: mv s4, a1
-; RV32-NEXT: li a2, -5
-; RV32-NEXT: li a3, -1
+; RV32-NEXT: li a2, 7
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s3
+; RV32-NEXT: li a3, 0
; RV32-NEXT: call __moddi3
; RV32-NEXT: or a2, s5, s6
; RV32-NEXT: snez a2, a2
-; RV32-NEXT: xori a0, a0, 2
+; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: xori a1, s2, 1
+; RV32-NEXT: xori a1, s2, 2
; RV32-NEXT: or a1, a1, s4
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: neg a3, a2
+; RV32-NEXT: slli a4, a1, 2
+; RV32-NEXT: addi a5, a0, -1
+; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: andi a1, a1, 7
+; RV32-NEXT: sb a1, 12(s0)
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: addi a0, a0, -2
+; RV32-NEXT: sw a0, 4(s0)
+; RV32-NEXT: srli a0, a5, 31
+; RV32-NEXT: andi a5, a5, 1
+; RV32-NEXT: slli a5, a5, 1
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: addi a0, a0, -4
; RV32-NEXT: sw a0, 8(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,23 +395,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lbu a0, 12(a0)
-; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: ld a2, 0(s0)
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ld a1, 0(a0)
+; RV64-NEXT: lwu a0, 8(a0)
+; RV64-NEXT: srli a2, a1, 2
+; RV64-NEXT: lbu a3, 12(s0)
+; RV64-NEXT: slli a4, a0, 62
+; RV64-NEXT: or a2, a4, a2
+; RV64-NEXT: srai s1, a2, 31
+; RV64-NEXT: slli a3, a3, 32
+; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: slli a0, a0, 29
-; RV64-NEXT: srai s1, a0, 31
-; RV64-NEXT: srli a0, a2, 2
-; RV64-NEXT: slli a1, a1, 62
-; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: srai a0, a0, 31
-; RV64-NEXT: slli a2, a2, 31
-; RV64-NEXT: srai s2, a2, 31
-; RV64-NEXT: li a1, 7
+; RV64-NEXT: slli a1, a1, 31
+; RV64-NEXT: srai s2, a1, 31
+; RV64-NEXT: li a1, -5
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: li a1, -5
+; RV64-NEXT: li a1, 7
; RV64-NEXT: mv a0, s1
; RV64-NEXT: call __moddi3
; RV64-NEXT: mv s1, a0
@@ -426,25 +428,26 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sltu a0, a1, a0
-; RV64-NEXT: addi s1, s1, -2
+; RV64-NEXT: addi s1, s1, -1
; RV64-NEXT: seqz a1, s1
-; RV64-NEXT: addi s3, s3, -1
+; RV64-NEXT: addi s3, s3, -2
; RV64-NEXT: seqz a2, s3
; RV64-NEXT: neg a0, a0
-; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a3, a2, 2
; RV64-NEXT: addi a1, a1, -1
-; RV64-NEXT: slli a3, a1, 2
-; RV64-NEXT: slli a4, a2, 31
-; RV64-NEXT: srli a4, a4, 62
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
-; RV64-NEXT: slli a1, a1, 29
-; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
+; RV64-NEXT: addi a2, a2, -1
+; RV64-NEXT: slli a2, a2, 29
+; RV64-NEXT: srli a2, a2, 61
+; RV64-NEXT: sb a2, 12(s0)
+; RV64-NEXT: slli a2, a1, 31
+; RV64-NEXT: srli a2, a2, 62
+; RV64-NEXT: or a2, a3, a2
+; RV64-NEXT: addi a2, a2, -4
+; RV64-NEXT: sw a2, 8(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
-; RV64-NEXT: slli a2, a2, 33
-; RV64-NEXT: or a0, a0, a2
+; RV64-NEXT: slli a1, a1, 33
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: sd a0, 0(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -466,64 +469,66 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: lw a0, 8(a0)
+; RV32M-NEXT: lw a1, 4(s0)
+; RV32M-NEXT: lbu a2, 12(s0)
+; RV32M-NEXT: slli a3, a0, 31
+; RV32M-NEXT: srli s1, a1, 1
+; RV32M-NEXT: or s1, s1, a3
+; RV32M-NEXT: slli a3, a2, 30
+; RV32M-NEXT: srli a4, a0, 2
+; RV32M-NEXT: or s2, a4, a3
+; RV32M-NEXT: srli a0, a0, 1
; RV32M-NEXT: slli a0, a0, 31
; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 31
+; RV32M-NEXT: srli a2, a2, 2
+; RV32M-NEXT: slli a2, a2, 31
; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
-; RV32M-NEXT: slli a1, a3, 31
+; RV32M-NEXT: srai s4, a2, 31
+; RV32M-NEXT: slli a1, a1, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s5, a0
; RV32M-NEXT: mv s6, a1
-; RV32M-NEXT: li a2, 7
+; RV32M-NEXT: li a2, -5
+; RV32M-NEXT: li a3, -1
; RV32M-NEXT: mv a0, s2
; RV32M-NEXT: mv a1, s4
-; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: mv s2, a0
; RV32M-NEXT: mv s4, a1
-; RV32M-NEXT: li a2, -5
-; RV32M-NEXT: li a3, -1
+; RV32M-NEXT: li a2, 7
; RV32M-NEXT: mv a0, s1
; RV32M-NEXT: mv a1, s3
+; RV32M-NEXT: li a3, 0
; RV32M-NEXT: call __moddi3
; RV32M-NEXT: or a2, s5, s6
; RV32M-NEXT: snez a2, a2
-; RV32M-NEXT: xori a0, a0, 2
+; RV32M-NEXT: xori a0, a0, 1
; RV32M-NEXT: or a0, a0, a1
; RV32M-NEXT: seqz a0, a0
-; RV32M-NEXT: xori a1, s2, 1
+; RV32M-NEXT: xori a1, s2, 2
; RV32M-NEXT: or a1, a1, s4
; RV32M-NEXT: seqz a1, a1
; RV32M-NEXT: neg a3, a2
+; RV32M-NEXT: slli a4, a1, 2
+; RV32M-NEXT: addi a5, a0, -1
+; RV32M-NEXT: slli a0, a0, 1
; RV32M-NEXT: addi a1, a1, -1
-; RV32M-NEXT: addi a0, a0, -1
; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
-; RV32M-NEXT: andi a1, a1, 1
-; RV32M-NEXT: slli a1, a1, 1
-; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
-; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: andi a1, a1, 7
+; RV32M-NEXT: sb a1, 12(s0)
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: addi a0, a0, -2
+; RV32M-NEXT: sw a0, 4(s0)
+; RV32M-NEXT: srli a0, a5, 31
+; RV32M-NEXT: andi a5, a5, 1
+; RV32M-NEXT: slli a5, a5, 1
+; RV32M-NEXT: or a0, a4, a0
+; RV32M-NEXT: or a0, a0, a5
+; RV32M-NEXT: addi a0, a0, -4
; RV32M-NEXT: sw a0, 8(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -585,22 +590,23 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: srli a1, a1, 1
; RV64M-NEXT: or a1, a1, a4
; RV64M-NEXT: sltu a1, a5, a1
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: addi a2, a2, -1
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: neg a1, a1
-; RV64M-NEXT: slli a4, a3, 33
+; RV64M-NEXT: slli a5, a3, 33
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
-; RV64M-NEXT: or a1, a1, a4
+; RV64M-NEXT: or a1, a1, a5
; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
-; RV64M-NEXT: slli a3, a3, 31
-; RV64M-NEXT: srl...
[truncated]
|
@@ -10070,7 +10070,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { | |||
// Variant of version done on multiply, except mul by a power of 2 is turned | |||
// into a shift. | |||
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && | |||
N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is this beneficial if you have to duplicate the inner add?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pls see:
riscv32: https://godbolt.org/z/o8GdMKrae
riscv64: https://godbolt.org/z/Yh5bPz56z
4f0d68e
to
8ece18e
Compare
21e03b6
to
e728b35
Compare
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ | ||
; RUN: | FileCheck -check-prefix=RV32 %s | ||
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ | ||
; RUN: | FileCheck -check-prefix=RV64 %s | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need -verify-machineinstrs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed
e728b35
to
30b6b95
Compare
… c2) and (shl (sext (add x, c1)), c2)
…eWithShift for targets
e76b5e6
to
d813414
Compare
// LD/ST will optimize constant Offset extraction, so when AddNode is used by | ||
// LD/ST, it can still complete the folding optimization operation performed | ||
// above. | ||
auto isLDST = [&]() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function name isn't great. Should it be isUsedByLdSt
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
// This use is the one we're on right now. Skip it | ||
if (Use == N || Use->getOpcode() == ISD::SELECT) | ||
continue; | ||
if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to check that c1 << c2
fits in 12 bits so it will fold into the load/store?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There may be no need for check. The address calculation will generate LUI + addi to ensure that the LD/ST offset falls within the 12-bit range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Regression:
Before: https://godbolt.org/z/6vKhhxrWr
After:
|
Has there been a fix for this yet? If not it would be good to create an issue so we don't forget about it. |
No. Thank you for doing this! |
Stacked on llvm#119526 This fixes a regression from llvm#101294 by checking if we might be clobbering a sh{1,2,3}add pattern. Only do this is the underlying add isn't going to be folded away into an address offset.
Opened #119527 to address this |
Stacked on llvm#119526 This fixes a regression from llvm#101294 by checking if we might be clobbering a sh{1,2,3}add pattern. Only do this is the underlying add isn't going to be folded away into an address offset.
Stacked on llvm#119526 This fixes a regression from llvm#101294 by checking if we might be clobbering a sh{1,2,3}add pattern. Only do this is the underlying add isn't going to be folded away into an address offset.
…rn (llvm#119527) This fixes a regression from llvm#101294 by checking if we might be clobbering a sh{1,2,3}add pattern. Only do this is the underlying add isn't going to be folded away into an address offset.
This patch remove the restriction for folding (shl (add_nsw x, c1)), c2) and folding (shl(sext(add x, c1)), c2), and test case from dhrystone , see this link:
riscv32: https://godbolt.org/z/o8GdMKrae
riscv64: https://godbolt.org/z/Yh5bPz56z