@@ -55,6 +55,8 @@ using namespace llvm;
55
55
// / Metadata attribute names
56
56
static const char *const LLVMLoopInterchangeFollowupAll =
57
57
" llvm.loop.interchange.followup_all" ;
58
+ static const char *const LLVMLoopInterchangeFollowupNextOuter =
59
+ " llvm.loop.interchange.followup_next_outer" ;
58
60
static const char *const LLVMLoopInterchangeFollowupOuter =
59
61
" llvm.loop.interchange.followup_outer" ;
60
62
static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
533
535
}
534
536
}
535
537
538
+ // If OnlyWhenForced is true, only process loops for which interchange is
539
+ // explicitly enabled.
536
540
if (OnlyWhenForced)
537
541
return processEnabledLoop (LoopList, DependencyMatrix, CostMap);
538
542
@@ -564,8 +568,10 @@ struct LoopInterchange {
564
568
Loop *InnerLoop = LoopList[InnerLoopId];
565
569
LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
566
570
<< " and OuterLoopId = " << OuterLoopId << " \n " );
567
- if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false )
571
+ if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false ) {
572
+ LLVM_DEBUG (dbgs () << " Not interchanging loops. It is disabled.\n " );
568
573
return false ;
574
+ }
569
575
LoopInterchangeLegality LIL (OuterLoop, InnerLoop, SE, ORE);
570
576
if (!LIL.canInterchangeLoops (InnerLoopId, OuterLoopId, DependencyMatrix)) {
571
577
LLVM_DEBUG (dbgs () << " Not interchanging loops. Cannot prove legality.\n " );
@@ -608,41 +614,145 @@ struct LoopInterchange {
608
614
std::vector<std::vector<char >> &DependencyMatrix,
609
615
const DenseMap<const Loop *, unsigned > &CostMap) {
610
616
bool Changed = false ;
611
- for (unsigned InnerLoopId = LoopList.size () - 1 ; InnerLoopId > 0 ;
612
- InnerLoopId--) {
613
- unsigned OuterLoopId = InnerLoopId - 1 ;
614
- if (findMetadata (LoopList[OuterLoopId]) != true )
615
- continue ;
616
617
617
- MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID ();
618
- bool Interchanged =
619
- processLoop (LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
620
- OuterLoopId, DependencyMatrix, CostMap);
621
-
622
- // TODO: Consolidate the duplicate code in `processLoopList`.
623
- if (Interchanged) {
624
- std::swap (LoopList[OuterLoopId], LoopList[InnerLoopId]);
625
- // Update the DependencyMatrix
626
- interChangeDependencies (DependencyMatrix, InnerLoopId, OuterLoopId);
618
+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
619
+ DenseMap<Loop *, unsigned > Loop2Index;
620
+ for (unsigned I = 0 ; I != LoopList.size (); I++)
621
+ Loop2Index[LoopList[I]] = I;
622
+
623
+ // Hold outer loops to be exchanged (i.e., loops that have
624
+ // "llvm.loop.interchange.enable" is true), in the current nest order.
625
+ SmallVector<Loop *, 4 > Worklist;
626
+
627
+ // Helper funciton to try to add a new loop into the Worklist. Return false
628
+ // if there is a duplicate in the loop to be interchanged.
629
+ auto AddLoopIfEnabled = [&](Loop *L) {
630
+ if (findMetadata (L) == true ) {
631
+ if (!Worklist.empty ()) {
632
+ // Because the loops are sorted in the order of the current nest, it
633
+ // is sufficient to compare with the last element.
634
+ unsigned InnerLoopId = Loop2Index[Worklist.back ()] + 1 ;
635
+ unsigned OuterLoopId = Loop2Index[L];
636
+ if (OuterLoopId <= InnerLoopId) {
637
+ ORE->emit ([&]() {
638
+ return OptimizationRemarkMissed (DEBUG_TYPE, " AmbiguousOrder" ,
639
+ L->getStartLoc (), L->getHeader ())
640
+ << " The loops to be interchanged are overlapping." ;
641
+ });
642
+ return false ;
643
+ }
644
+ }
645
+ Worklist.push_back (L);
646
+ }
647
+ return true ;
648
+ };
627
649
628
- LLVM_DEBUG (dbgs () << " Dependency matrix after interchange:\n " ;
629
- printDepMatrix (DependencyMatrix));
650
+ // Initialize Worklist. To process the loops in inner-loop-first order, add
651
+ // them to the worklist in the outer-loop-first order.
652
+ for (unsigned I = 0 ; I != LoopList.size (); I++)
653
+ if (!AddLoopIfEnabled (LoopList[I]))
654
+ return Changed;
655
+
656
+ // Set an upper bound of the number of transformations to avoid infinite
657
+ // loop. There is no deep meaning behind the current value (square of the
658
+ // size of LoopList).
659
+ // TODO: Is this really necessary?
660
+ const unsigned MaxAttemptsCount = LoopList.size () * LoopList.size ();
661
+ unsigned Attempts = 0 ;
662
+
663
+ // Process the loops. An exchange is applied to two loops, but a metadata
664
+ // replacement can be applied to three loops: the two loops plus the next
665
+ // outer loop, if it exists. This is because it's necessary to express the
666
+ // information about the order of the application of interchanges in cases
667
+ // where the target loops to be exchanged are overlapping, e.g.,
668
+ //
669
+ // #pragma clang loop interchange(enable)
670
+ // for(int i=0;i<N;i++)
671
+ // #pragma clang loop interchange(enable)
672
+ // for (int j=0;j<N;j++)
673
+ // for (int k=0;k<N;k++)
674
+ // ...
675
+ //
676
+ // In this case we will exchange the innermost two loops at first, the
677
+ // follow-up metadata including enabling interchange is attached on the
678
+ // outermost loop, and it is enqueued as the next candidate to be processed.
679
+ while (!Worklist.empty () && Attempts < MaxAttemptsCount) {
680
+ Loop *TargetLoop = Worklist.pop_back_val ();
681
+ assert (findMetadata (TargetLoop) == true &&
682
+ " Some metadata was unexpectedlly removed" );
683
+ unsigned OuterLoopId = Loop2Index[TargetLoop];
684
+ unsigned InnerLoopId = OuterLoopId + 1 ;
685
+ if (InnerLoopId >= LoopList.size ()) {
686
+ ORE->emit ([&]() {
687
+ return OptimizationRemarkMissed (DEBUG_TYPE, " InnermostLoop" ,
688
+ TargetLoop->getStartLoc (),
689
+ TargetLoop->getHeader ())
690
+ << " The metadata is invalid with an innermost loop." ;
691
+ });
692
+ break ;
693
+ }
694
+ MDNode *LoopID = TargetLoop->getLoopID ();
695
+ bool Interchanged = processLoop (LoopList, InnerLoopId, OuterLoopId,
696
+ DependencyMatrix, CostMap);
697
+ if (!Interchanged) {
698
+ ORE->emit ([&]() {
699
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInterchanged" ,
700
+ TargetLoop->getStartLoc (),
701
+ TargetLoop->getHeader ())
702
+ << " Failed to perform explicitly specified loop interchange." ;
703
+ });
704
+ break ;
630
705
}
631
706
632
- std::optional<MDNode *> MDOuterLoopID =
633
- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
634
- LLVMLoopInterchangeFollowupOuter});
635
- if (MDOuterLoopID)
636
- LoopList[OuterLoopId]->setLoopID (*MDOuterLoopID);
707
+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
708
+ Loop *NextOuterLoop = nullptr ;
709
+ if (0 < OuterLoopId)
710
+ NextOuterLoop = LoopList[OuterLoopId - 1 ];
711
+ Loop *OuterLoop = LoopList[OuterLoopId];
712
+ Loop *InnerLoop = LoopList[InnerLoopId];
713
+ Attempts++;
714
+ Changed = true ;
715
+ Loop2Index[OuterLoop] = OuterLoopId;
716
+ Loop2Index[InnerLoop] = InnerLoopId;
637
717
718
+ // Update the metadata.
719
+ std::optional<MDNode *> MDNextOuterLoopID =
720
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
721
+ LLVMLoopInterchangeFollowupNextOuter});
722
+ std::optional<MDNode *> MDOuterLoopID =
723
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
724
+ LLVMLoopInterchangeFollowupOuter});
638
725
std::optional<MDNode *> MDInnerLoopID =
639
- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
640
- LLVMLoopInterchangeFollowupInner});
726
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
727
+ LLVMLoopInterchangeFollowupInner});
728
+ if (MDNextOuterLoopID) {
729
+ if (NextOuterLoop) {
730
+ NextOuterLoop->setLoopID (*MDNextOuterLoopID);
731
+ } else {
732
+ LLVM_DEBUG (dbgs ()
733
+ << " New metadata for the next outer loop is ignored.\n " );
734
+ }
735
+ }
736
+ if (MDOuterLoopID)
737
+ OuterLoop->setLoopID (*MDOuterLoopID);
641
738
if (MDInnerLoopID)
642
- LoopList[InnerLoopId]->setLoopID (*MDInnerLoopID);
643
-
644
- Changed |= Interchanged;
739
+ InnerLoop->setLoopID (*MDInnerLoopID);
740
+
741
+ // Add new elements, paying attention to the order.
742
+ bool Valid = true ;
743
+ if (NextOuterLoop)
744
+ Valid &= AddLoopIfEnabled (NextOuterLoop);
745
+ Valid &= AddLoopIfEnabled (OuterLoop);
746
+ Valid &= AddLoopIfEnabled (InnerLoop);
747
+ if (!Valid)
748
+ break ;
645
749
}
750
+
751
+ LLVM_DEBUG ({
752
+ if (!Worklist.empty ())
753
+ dbgs () << " Some metadata was ignored because the maximum number of "
754
+ " attempts was reached.\n " ;
755
+ });
646
756
return Changed;
647
757
}
648
758
};
0 commit comments