-
Notifications
You must be signed in to change notification settings - Fork 13.6k
LICM: hoist BO assoc for FAdd and FMul #108415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions.
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesExtend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions. Full diff: https://github.com/llvm/llvm-project/pull/108415.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 23e9c70b62642a..4b1650b93cc1d1 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2819,10 +2819,17 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
if (!BO || !BO->isAssociative())
return false;
- // TODO: Only hoist ADDs and MULs for now.
+ // TODO: Only hoist ADDs, MULs, FADDs, and FMULs for now.
Instruction::BinaryOps Opcode = BO->getOpcode();
- if (Opcode != Instruction::Add && Opcode != Instruction::Mul)
+ switch (Opcode) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ break;
+ default:
return false;
+ }
bool LVInRHS = L.isLoopInvariant(BO->getOperand(0));
auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(LVInRHS));
@@ -2857,6 +2864,12 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
if (auto *I = dyn_cast<Instruction>(Inv))
I->setHasNoUnsignedWrap(true);
NewBO->setHasNoUnsignedWrap(true);
+ } else if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) {
+ // Intersect FMF flags for FADD and FMUL.
+ FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
+ if (auto *I = dyn_cast<Instruction>(Inv))
+ I->setFastMathFlags(Intersect);
+ NewBO->setFastMathFlags(Intersect);
}
BO->replaceAllUsesWith(NewBO);
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index a840e247578840..74e2b7a2caf4a5 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -437,17 +437,17 @@ loop:
br label %loop
}
-; Don't hoist floating-point ops, even if they are associative. This would be
-; valid, but is currently disabled.
-define void @fadd(float %c1, float %c2) {
-; CHECK-LABEL: @fadd(
+; The simple case. Hoist if fast is present on both instructions.
+define void @fadd_fast(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fast(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd fast float [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd fast float [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -461,6 +461,226 @@ loop:
br label %loop
}
+; The simple case. Hoist if fast is present on both instructions.
+define void @fmul_fast(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fast(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul fast float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul fast float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul fast float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fmul fast float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fmul fast float %step.add, %c2
+ br label %loop
+}
+
+; The minimum case.
+; Hoist if reasassoc and nsz are present on both instructions.
+define void @fadd_reassoc_nsz(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_reassoc_nsz(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fadd reassoc nsz float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fadd reassoc nsz float %step.add, %c2
+ br label %loop
+}
+
+; The minimum case.
+; Hoist if reasassoc and nsz are present on both instructions.
+define void @fmul_reassoc_nsz(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_reassoc_nsz(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fmul reassoc nsz float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fmul reassoc nsz float %step.add, %c2
+ br label %loop
+}
+
+; Don't hoist if both reassoc and nsz aren't present on both instructions.
+define void @fadd_nonassoc(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_nonassoc(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = fadd reassoc nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fadd reassoc float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fadd reassoc nsz float %step.add, %c2
+ br label %loop
+}
+
+; Don't hoist if both reassoc and nsz aren't present on both instructions.
+define void @fmul_noassoc(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_noassoc(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fmul reassoc nsz float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fmul nsz float %step.add, %c2
+ br label %loop
+}
+
+; No intersection in flags present on both instructions,
+; except reassoc and nsz.
+define void @fadd_fmf_nointersect(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fmf_nointersect(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fadd reassoc nsz nnan float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fadd reassoc nsz ninf float %step.add, %c2
+ br label %loop
+}
+
+; No intersection in flags present on both instructions,
+; except reassoc and nsz.
+define void @fmul_fmf_nointersect(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fmf_nointersect(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz contract float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fmul reassoc nsz contract float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fmul reassoc nnan nsz float %step.add, %c2
+ br label %loop
+}
+
+; Non-empty intersection in flags present on both instructions,
+; including reassoc and nsz.
+define void @fadd_fmf_intersect(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fmf_intersect(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc ninf nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan ninf nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc ninf nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fadd reassoc nnan nsz ninf float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fadd reassoc ninf nsz float %step.add, %c2
+ br label %loop
+}
+
+; Non-empty intersection in flags present on both instructions,
+; including reassoc and nsz.
+define void @fmul_fmf_intersect(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fmf_intersect(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz afn float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz arcp afn float [[INDEX]], [[C1]]
+; CHECK-NEXT: call void @use(float [[STEP_ADD]])
+; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz afn float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+ %step.add = fmul reassoc afn nsz arcp float %index, %c1
+ call void @use(float %step.add)
+ %index.next = fmul reassoc nsz afn float %step.add, %c2
+ br label %loop
+}
+
; Don't hoist if the intermediate op has more than two uses. This is an
; heuristic that can be adjusted if warranted. Currently we are being
; conservative to minimise potential impact in code size.
|
Is it OK to reassociate |
We're actually reassociating |
It should be noted that I wasn't able to play with this in Alive2 (since Alive2 doesn't have support for the |
On the other hand: we have some flexibility to define the meaning of As a side note: |
Gentle ping. Is the conclusion here that we can take an intersection of the flags, and that the patch is okay? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I think we have a lot of precedent for preserving the flag intersection for this kind of reassociation, so I don't think it would make sense to make a different decision here. See e.g. https://llvm.godbolt.org/z/T3KKPK8vP for InstCombine butchering the simplest possible variant of this pattern.
Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions.
Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions.