Skip to content

Commit 6fe7234

Browse files
authored
LICM: hoist BO assoc for FAdd and FMul (#108415)
Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions.
1 parent 3f8380f commit 6fe7234

File tree

2 files changed

+242
-9
lines changed

2 files changed

+242
-9
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2819,10 +2819,17 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
28192819
if (!BO || !BO->isAssociative())
28202820
return false;
28212821

2822-
// TODO: Only hoist ADDs and MULs for now.
2822+
// TODO: Only hoist ADDs, MULs, FADDs, and FMULs for now.
28232823
Instruction::BinaryOps Opcode = BO->getOpcode();
2824-
if (Opcode != Instruction::Add && Opcode != Instruction::Mul)
2824+
switch (Opcode) {
2825+
case Instruction::Add:
2826+
case Instruction::Mul:
2827+
case Instruction::FAdd:
2828+
case Instruction::FMul:
2829+
break;
2830+
default:
28252831
return false;
2832+
}
28262833

28272834
bool LVInRHS = L.isLoopInvariant(BO->getOperand(0));
28282835
auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(LVInRHS));
@@ -2857,6 +2864,12 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
28572864
if (auto *I = dyn_cast<Instruction>(Inv))
28582865
I->setHasNoUnsignedWrap(true);
28592866
NewBO->setHasNoUnsignedWrap(true);
2867+
} else if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) {
2868+
// Intersect FMF flags for FADD and FMUL.
2869+
FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
2870+
if (auto *I = dyn_cast<Instruction>(Inv))
2871+
I->setFastMathFlags(Intersect);
2872+
NewBO->setFastMathFlags(Intersect);
28602873
}
28612874

28622875
BO->replaceAllUsesWith(NewBO);

llvm/test/Transforms/LICM/hoist-binop.ll

Lines changed: 227 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -437,17 +437,17 @@ loop:
437437
br label %loop
438438
}
439439

440-
; Don't hoist floating-point ops, even if they are associative. This would be
441-
; valid, but is currently disabled.
442-
define void @fadd(float %c1, float %c2) {
443-
; CHECK-LABEL: @fadd(
440+
; The simple case. Hoist if fast is present on both instructions.
441+
define void @fadd_fast(float %c1, float %c2) {
442+
; CHECK-LABEL: @fadd_fast(
444443
; CHECK-NEXT: entry:
444+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd fast float [[C1:%.*]], [[C2:%.*]]
445445
; CHECK-NEXT: br label [[LOOP:%.*]]
446446
; CHECK: loop:
447-
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
448-
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1:%.*]]
447+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
448+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1]]
449449
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
450-
; CHECK-NEXT: [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C2:%.*]]
450+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd fast float [[INDEX]], [[INVARIANT_OP]]
451451
; CHECK-NEXT: br label [[LOOP]]
452452
;
453453
entry:
@@ -461,6 +461,226 @@ loop:
461461
br label %loop
462462
}
463463

464+
; The simple case. Hoist if fast is present on both instructions.
465+
define void @fmul_fast(float %c1, float %c2) {
466+
; CHECK-LABEL: @fmul_fast(
467+
; CHECK-NEXT: entry:
468+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul fast float [[C1:%.*]], [[C2:%.*]]
469+
; CHECK-NEXT: br label [[LOOP:%.*]]
470+
; CHECK: loop:
471+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
472+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul fast float [[INDEX]], [[C1]]
473+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
474+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul fast float [[INDEX]], [[INVARIANT_OP]]
475+
; CHECK-NEXT: br label [[LOOP]]
476+
;
477+
entry:
478+
br label %loop
479+
480+
loop:
481+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
482+
%step.add = fmul fast float %index, %c1
483+
call void @use(float %step.add)
484+
%index.next = fmul fast float %step.add, %c2
485+
br label %loop
486+
}
487+
488+
; The minimum case.
489+
; Hoist if reasassoc and nsz are present on both instructions.
490+
define void @fadd_reassoc_nsz(float %c1, float %c2) {
491+
; CHECK-LABEL: @fadd_reassoc_nsz(
492+
; CHECK-NEXT: entry:
493+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
494+
; CHECK-NEXT: br label [[LOOP:%.*]]
495+
; CHECK: loop:
496+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
497+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nsz float [[INDEX]], [[C1]]
498+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
499+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
500+
; CHECK-NEXT: br label [[LOOP]]
501+
;
502+
entry:
503+
br label %loop
504+
505+
loop:
506+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
507+
%step.add = fadd reassoc nsz float %index, %c1
508+
call void @use(float %step.add)
509+
%index.next = fadd reassoc nsz float %step.add, %c2
510+
br label %loop
511+
}
512+
513+
; The minimum case.
514+
; Hoist if reasassoc and nsz are present on both instructions.
515+
define void @fmul_reassoc_nsz(float %c1, float %c2) {
516+
; CHECK-LABEL: @fmul_reassoc_nsz(
517+
; CHECK-NEXT: entry:
518+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
519+
; CHECK-NEXT: br label [[LOOP:%.*]]
520+
; CHECK: loop:
521+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
522+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
523+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
524+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
525+
; CHECK-NEXT: br label [[LOOP]]
526+
;
527+
entry:
528+
br label %loop
529+
530+
loop:
531+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
532+
%step.add = fmul reassoc nsz float %index, %c1
533+
call void @use(float %step.add)
534+
%index.next = fmul reassoc nsz float %step.add, %c2
535+
br label %loop
536+
}
537+
538+
; Don't hoist if both reassoc and nsz aren't present on both instructions.
539+
define void @fadd_nonassoc(float %c1, float %c2) {
540+
; CHECK-LABEL: @fadd_nonassoc(
541+
; CHECK-NEXT: entry:
542+
; CHECK-NEXT: br label [[LOOP:%.*]]
543+
; CHECK: loop:
544+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
545+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc float [[INDEX]], [[C1:%.*]]
546+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
547+
; CHECK-NEXT: [[INDEX_NEXT]] = fadd reassoc nsz float [[STEP_ADD]], [[C2:%.*]]
548+
; CHECK-NEXT: br label [[LOOP]]
549+
;
550+
entry:
551+
br label %loop
552+
553+
loop:
554+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
555+
%step.add = fadd reassoc float %index, %c1
556+
call void @use(float %step.add)
557+
%index.next = fadd reassoc nsz float %step.add, %c2
558+
br label %loop
559+
}
560+
561+
; Don't hoist if both reassoc and nsz aren't present on both instructions.
562+
define void @fmul_noassoc(float %c1, float %c2) {
563+
; CHECK-LABEL: @fmul_noassoc(
564+
; CHECK-NEXT: entry:
565+
; CHECK-NEXT: br label [[LOOP:%.*]]
566+
; CHECK: loop:
567+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
568+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
569+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
570+
; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
571+
; CHECK-NEXT: br label [[LOOP]]
572+
;
573+
entry:
574+
br label %loop
575+
576+
loop:
577+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
578+
%step.add = fmul reassoc nsz float %index, %c1
579+
call void @use(float %step.add)
580+
%index.next = fmul nsz float %step.add, %c2
581+
br label %loop
582+
}
583+
584+
; No intersection in flags present on both instructions,
585+
; except reassoc and nsz.
586+
define void @fadd_fmf_nointersect(float %c1, float %c2) {
587+
; CHECK-LABEL: @fadd_fmf_nointersect(
588+
; CHECK-NEXT: entry:
589+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
590+
; CHECK-NEXT: br label [[LOOP:%.*]]
591+
; CHECK: loop:
592+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
593+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan nsz float [[INDEX]], [[C1]]
594+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
595+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
596+
; CHECK-NEXT: br label [[LOOP]]
597+
;
598+
entry:
599+
br label %loop
600+
601+
loop:
602+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
603+
%step.add = fadd reassoc nsz nnan float %index, %c1
604+
call void @use(float %step.add)
605+
%index.next = fadd reassoc nsz ninf float %step.add, %c2
606+
br label %loop
607+
}
608+
609+
; No intersection in flags present on both instructions,
610+
; except reassoc and nsz.
611+
define void @fmul_fmf_nointersect(float %c1, float %c2) {
612+
; CHECK-LABEL: @fmul_fmf_nointersect(
613+
; CHECK-NEXT: entry:
614+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
615+
; CHECK-NEXT: br label [[LOOP:%.*]]
616+
; CHECK: loop:
617+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
618+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz contract float [[INDEX]], [[C1]]
619+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
620+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
621+
; CHECK-NEXT: br label [[LOOP]]
622+
;
623+
entry:
624+
br label %loop
625+
626+
loop:
627+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
628+
%step.add = fmul reassoc nsz contract float %index, %c1
629+
call void @use(float %step.add)
630+
%index.next = fmul reassoc nnan nsz float %step.add, %c2
631+
br label %loop
632+
}
633+
634+
; Non-empty intersection in flags present on both instructions,
635+
; including reassoc and nsz.
636+
define void @fadd_fmf_intersect(float %c1, float %c2) {
637+
; CHECK-LABEL: @fadd_fmf_intersect(
638+
; CHECK-NEXT: entry:
639+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fadd reassoc ninf nsz float [[C1:%.*]], [[C2:%.*]]
640+
; CHECK-NEXT: br label [[LOOP:%.*]]
641+
; CHECK: loop:
642+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
643+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fadd reassoc nnan ninf nsz float [[INDEX]], [[C1]]
644+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
645+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fadd reassoc ninf nsz float [[INDEX]], [[INVARIANT_OP]]
646+
; CHECK-NEXT: br label [[LOOP]]
647+
;
648+
entry:
649+
br label %loop
650+
651+
loop:
652+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
653+
%step.add = fadd reassoc nnan nsz ninf float %index, %c1
654+
call void @use(float %step.add)
655+
%index.next = fadd reassoc ninf nsz float %step.add, %c2
656+
br label %loop
657+
}
658+
659+
; Non-empty intersection in flags present on both instructions,
660+
; including reassoc and nsz.
661+
define void @fmul_fmf_intersect(float %c1, float %c2) {
662+
; CHECK-LABEL: @fmul_fmf_intersect(
663+
; CHECK-NEXT: entry:
664+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul reassoc nsz afn float [[C1:%.*]], [[C2:%.*]]
665+
; CHECK-NEXT: br label [[LOOP:%.*]]
666+
; CHECK: loop:
667+
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
668+
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz arcp afn float [[INDEX]], [[C1]]
669+
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
670+
; CHECK-NEXT: [[INDEX_NEXT_REASS]] = fmul reassoc nsz afn float [[INDEX]], [[INVARIANT_OP]]
671+
; CHECK-NEXT: br label [[LOOP]]
672+
;
673+
entry:
674+
br label %loop
675+
676+
loop:
677+
%index = phi float [ 0., %entry ], [ %index.next, %loop ]
678+
%step.add = fmul reassoc afn nsz arcp float %index, %c1
679+
call void @use(float %step.add)
680+
%index.next = fmul reassoc nsz afn float %step.add, %c2
681+
br label %loop
682+
}
683+
464684
; Don't hoist if the intermediate op has more than two uses. This is an
465685
; heuristic that can be adjusted if warranted. Currently we are being
466686
; conservative to minimise potential impact in code size.

0 commit comments

Comments
 (0)