@@ -55,6 +55,8 @@ using namespace llvm;
55
55
// / Metadata attribute names
56
56
static const char *const LLVMLoopInterchangeFollowupAll =
57
57
" llvm.loop.interchange.followup_all" ;
58
+ static const char *const LLVMLoopInterchangeFollowupNextOuter =
59
+ " llvm.loop.interchange.followup_next_outer" ;
58
60
static const char *const LLVMLoopInterchangeFollowupOuter =
59
61
" llvm.loop.interchange.followup_outer" ;
60
62
static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
533
535
}
534
536
}
535
537
538
+ // If OnlyWhenForced is true, only process loops for which interchange is
539
+ // explicitly enabled.
536
540
if (OnlyWhenForced)
537
541
return processEnabledLoop (LoopList, DependencyMatrix, CostMap);
538
542
@@ -564,8 +568,10 @@ struct LoopInterchange {
564
568
Loop *InnerLoop = LoopList[InnerLoopId];
565
569
LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
566
570
<< " and OuterLoopId = " << OuterLoopId << " \n " );
567
- if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false )
571
+ if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false ) {
572
+ LLVM_DEBUG (dbgs () << " Not interchanging loops. It is disabled.\n " );
568
573
return false ;
574
+ }
569
575
LoopInterchangeLegality LIL (OuterLoop, InnerLoop, SE, ORE);
570
576
if (!LIL.canInterchangeLoops (InnerLoopId, OuterLoopId, DependencyMatrix)) {
571
577
LLVM_DEBUG (dbgs () << " Not interchanging loops. Cannot prove legality.\n " );
@@ -608,41 +614,144 @@ struct LoopInterchange {
608
614
std::vector<std::vector<char >> &DependencyMatrix,
609
615
const DenseMap<const Loop *, unsigned > &CostMap) {
610
616
bool Changed = false ;
611
- for (unsigned InnerLoopId = LoopList.size () - 1 ; InnerLoopId > 0 ;
612
- InnerLoopId--) {
613
- unsigned OuterLoopId = InnerLoopId - 1 ;
614
- if (findMetadata (LoopList[OuterLoopId]) != true )
615
- continue ;
616
617
617
- MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID ();
618
- bool Interchanged =
619
- processLoop (LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
620
- OuterLoopId, DependencyMatrix, CostMap);
621
-
622
- // TODO: Consolidate the duplicate code in `processLoopList`.
623
- if (Interchanged) {
624
- std::swap (LoopList[OuterLoopId], LoopList[InnerLoopId]);
625
- // Update the DependencyMatrix
626
- interChangeDependencies (DependencyMatrix, InnerLoopId, OuterLoopId);
618
+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
619
+ DenseMap<Loop *, unsigned > Loop2Index;
620
+ for (unsigned I = 0 ; I != LoopList.size (); I++)
621
+ Loop2Index[LoopList[I]] = I;
622
+
623
+ // Hold outer loops to be exchanged, in the current nest order.
624
+ SmallVector<Loop *, 4 > Worklist;
625
+
626
+ // Helper funciton to try to add a new loop into the Worklist. Return false
627
+ // if there is a duplicate in the loop to be interchanged.
628
+ auto AddLoopIfEnabled = [&](Loop *L) {
629
+ if (findMetadata (L) == true ) {
630
+ if (!Worklist.empty ()) {
631
+ // Because the loops are sorted in the order of the current nest, it
632
+ // is sufficient to compare with the last element.
633
+ unsigned InnerLoopId = Loop2Index[Worklist.back ()] + 1 ;
634
+ unsigned OuterLoopId = Loop2Index[L];
635
+ if (OuterLoopId <= InnerLoopId) {
636
+ ORE->emit ([&]() {
637
+ return OptimizationRemarkMissed (DEBUG_TYPE, " AmbiguousOrder" ,
638
+ L->getStartLoc (), L->getHeader ())
639
+ << " The loops to be interchanged are overlapping." ;
640
+ });
641
+ return false ;
642
+ }
643
+ }
644
+ Worklist.push_back (L);
645
+ }
646
+ return true ;
647
+ };
627
648
628
- LLVM_DEBUG (dbgs () << " Dependency matrix after interchange:\n " ;
629
- printDepMatrix (DependencyMatrix));
649
+ // Initialize Worklist. To process the loops in inner-loop-first order, add
650
+ // them to the worklist in the outer-loop-first order.
651
+ for (unsigned I = 0 ; I != LoopList.size (); I++)
652
+ if (!AddLoopIfEnabled (LoopList[I]))
653
+ return Changed;
654
+
655
+ // Set an upper bound of the number of transformations to avoid infinite
656
+ // loop. There is no deep meaning behind the current value (square of the
657
+ // size of LoopList).
658
+ // TODO: Is this really necessary?
659
+ const unsigned MaxAttemptsCount = LoopList.size () * LoopList.size ();
660
+ unsigned Attempts = 0 ;
661
+
662
+ // Process the loops. An exchange is applied to two loops, but a metadata
663
+ // replacement can be applied to three loops: the two loops plus the next
664
+ // outer loop, if it exists. This is because it's necessary to express the
665
+ // information about the order of the application of interchanges in cases
666
+ // where the target loops to be exchanged are overlapping, e.g.,
667
+ //
668
+ // #pragma clang loop interchange(enable)
669
+ // for(int i=0;i<N;i++)
670
+ // #pragma clang loop interchange(enable)
671
+ // for (int j=0;j<N;j++)
672
+ // for (int k=0;k<N;k++)
673
+ // ...
674
+ //
675
+ // In this case we will exchange the innermost two loops at first, the
676
+ // follow-up metadata including enabling interchange is attached on the
677
+ // outermost loop, and it is enqueued as the next candidate to be processed.
678
+ while (!Worklist.empty () && Attempts < MaxAttemptsCount) {
679
+ Loop *TargetLoop = Worklist.pop_back_val ();
680
+ assert (findMetadata (TargetLoop) == true &&
681
+ " Some metadata was unexpectedlly removed" );
682
+ unsigned OuterLoopId = Loop2Index[TargetLoop];
683
+ unsigned InnerLoopId = OuterLoopId + 1 ;
684
+ if (InnerLoopId >= LoopList.size ()) {
685
+ ORE->emit ([&]() {
686
+ return OptimizationRemarkMissed (DEBUG_TYPE, " InnermostLoop" ,
687
+ TargetLoop->getStartLoc (),
688
+ TargetLoop->getHeader ())
689
+ << " The metadata is invalid with an innermost loop." ;
690
+ });
691
+ break ;
692
+ }
693
+ MDNode *LoopID = TargetLoop->getLoopID ();
694
+ bool Interchanged = processLoop (LoopList, InnerLoopId, OuterLoopId,
695
+ DependencyMatrix, CostMap);
696
+ if (!Interchanged) {
697
+ ORE->emit ([&]() {
698
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInterchanged" ,
699
+ TargetLoop->getStartLoc (),
700
+ TargetLoop->getHeader ())
701
+ << " Failed to perform explicitly specified loop interchange." ;
702
+ });
703
+ break ;
630
704
}
631
705
632
- std::optional<MDNode *> MDOuterLoopID =
633
- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
634
- LLVMLoopInterchangeFollowupOuter});
635
- if (MDOuterLoopID)
636
- LoopList[OuterLoopId]->setLoopID (*MDOuterLoopID);
706
+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
707
+ Loop *NextOuterLoop = nullptr ;
708
+ if (0 < OuterLoopId)
709
+ NextOuterLoop = LoopList[OuterLoopId - 1 ];
710
+ Loop *OuterLoop = LoopList[OuterLoopId];
711
+ Loop *InnerLoop = LoopList[InnerLoopId];
712
+ Attempts++;
713
+ Changed = true ;
714
+ Loop2Index[OuterLoop] = OuterLoopId;
715
+ Loop2Index[InnerLoop] = InnerLoopId;
637
716
717
+ // Update the metadata.
718
+ std::optional<MDNode *> MDNextOuterLoopID =
719
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
720
+ LLVMLoopInterchangeFollowupNextOuter});
721
+ std::optional<MDNode *> MDOuterLoopID =
722
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
723
+ LLVMLoopInterchangeFollowupOuter});
638
724
std::optional<MDNode *> MDInnerLoopID =
639
- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
640
- LLVMLoopInterchangeFollowupInner});
725
+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
726
+ LLVMLoopInterchangeFollowupInner});
727
+ if (MDNextOuterLoopID) {
728
+ if (NextOuterLoop) {
729
+ NextOuterLoop->setLoopID (*MDNextOuterLoopID);
730
+ } else {
731
+ LLVM_DEBUG (dbgs ()
732
+ << " New metadata for the next outer loop is ignored.\n " );
733
+ }
734
+ }
735
+ if (MDOuterLoopID)
736
+ OuterLoop->setLoopID (*MDOuterLoopID);
641
737
if (MDInnerLoopID)
642
- LoopList[InnerLoopId]->setLoopID (*MDInnerLoopID);
643
-
644
- Changed |= Interchanged;
738
+ InnerLoop->setLoopID (*MDInnerLoopID);
739
+
740
+ // Add new elements, paying attention to the order.
741
+ bool Valid = true ;
742
+ if (NextOuterLoop)
743
+ Valid &= AddLoopIfEnabled (NextOuterLoop);
744
+ Valid &= AddLoopIfEnabled (OuterLoop);
745
+ Valid &= AddLoopIfEnabled (InnerLoop);
746
+ if (!Valid)
747
+ break ;
645
748
}
749
+
750
+ LLVM_DEBUG ({
751
+ if (!Worklist.empty ())
752
+ dbgs () << " Some metadata was ignored because the maximum number of "
753
+ " attempts was reached.\n " ;
754
+ });
646
755
return Changed;
647
756
}
648
757
};
0 commit comments