-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Strength reduce mul by 2^n + 2/4/8 + 1 #88911
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
With zba, we can expand this to (add (shl X, C1), (shXadd X, X)). Note that this is our first expansion to a three instruction sequence. I believe this to general be a reasonable tradeoff for most architectures, but we may want to (someday) consider a tuning flag here. I plan to support 2^n + (2/4/8 + 1) eventually as well, but that comes behind 2^N - 2^M. Both are also three instruction sequences.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesWith zba, we can expand this to (add (shl X, C1), (shXadd X, X)). Note that this is our first expansion to a three instruction sequence. I believe this to general be a reasonable tradeoff for most architectures, but we may want to (someday) consider a tuning flag here. I plan to support 2^n + (2/4/8 + 1) eventually as well, but that comes behind 2^N - 2^M. Both are also three instruction sequences. Full diff: https://github.com/llvm/llvm-project/pull/88911.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 765838aafb58d2..388a67a0575f63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13431,6 +13431,34 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
}
}
+
+ // (2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
+ // Matched in tablegen, avoid perturbing patterns.
+ switch (MulAmt) {
+ case 11: case 13: case 19: case 21: case 25: case 27: case 29:
+ case 37: case 41: case 45: case 73: case 91:
+ return SDValue();
+ default:
+ break;
+ }
+
+ // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
+ if (MulAmt > 2 &&
+ isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
+ unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
+ if (ScaleShift >= 1 && ScaleShift < 4) {
+ unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
+ SDLoc DL(N);
+ SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(ScaleShift, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Shift1,
+ DAG.getNode(ISD::ADD, DL, VT, Shift2,
+ N->getOperand(0)));
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a84b9e5e7962f6..d452c9d1b0f0c5 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -598,31 +598,52 @@ define i64 @mul125(i64 %a) {
}
define i64 @mul131(i64 %a) {
-; CHECK-LABEL: mul131:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 131
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul131:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 131
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul131:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a1, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 7
+; RV64ZBA-NEXT: add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 131
ret i64 %c
}
define i64 @mul133(i64 %a) {
-; CHECK-LABEL: mul133:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 133
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul133:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 133
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul133:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a1, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 7
+; RV64ZBA-NEXT: add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 133
ret i64 %c
}
define i64 @mul137(i64 %a) {
-; CHECK-LABEL: mul137:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 137
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: mul137:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 137
+; RV64I-NEXT: mul a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: mul137:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a1, a0, a0
+; RV64ZBA-NEXT: slli a0, a0, 7
+; RV64ZBA-NEXT: add a0, a0, a1
+; RV64ZBA-NEXT: ret
%c = mul i64 %a, 137
ret i64 %c
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
// (2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x) | ||
// Matched in tablegen, avoid perturbing patterns. | ||
switch (MulAmt) { | ||
case 11: case 13: case 19: case 21: case 25: case 27: case 29: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess clang-format did this, is this your desired formatting as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not clang-format's preferred format, I'd hand formatted. I'm going to push the clang-format as this isn't worth the time to bother debating.
Co-authored-by: Min-Yih Hsu <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
With zba, we can expand this to (add (shl X, C1), (shXadd X, X)).
Note that this is our first expansion to a three instruction sequence. I believe this to general be a reasonable tradeoff for most architectures, but we may want to (someday) consider a tuning flag here.
I plan to support 2^n + (2/4/8 + 1) eventually as well, but that comes behind 2^N - 2^M. Both are also three instruction sequences.