35
35
#include " llvm/Support/Debug.h"
36
36
#include " llvm/Support/ErrorHandling.h"
37
37
#include " llvm/Support/raw_ostream.h"
38
+ #include < algorithm>
38
39
#include < cassert>
39
40
#include < cstdint>
40
41
@@ -80,8 +81,10 @@ class HexagonOptAddrMode : public MachineFunctionPass {
80
81
private:
81
82
using MISetType = DenseSet<MachineInstr *>;
82
83
using InstrEvalMap = DenseMap<MachineInstr *, bool >;
84
+ DenseSet<MachineInstr *> ProcessedAddiInsts;
83
85
84
86
MachineRegisterInfo *MRI = nullptr ;
87
+ const TargetRegisterInfo *TRI = nullptr ;
85
88
const HexagonInstrInfo *HII = nullptr ;
86
89
const HexagonRegisterInfo *HRI = nullptr ;
87
90
MachineDominatorTree *MDT = nullptr ;
@@ -93,6 +96,15 @@ class HexagonOptAddrMode : public MachineFunctionPass {
93
96
bool processBlock (NodeAddr<BlockNode *> BA);
94
97
bool xformUseMI (MachineInstr *TfrMI, MachineInstr *UseMI,
95
98
NodeAddr<UseNode *> UseN, unsigned UseMOnum);
99
+ bool processAddBases (NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI);
100
+ bool usedInLoadStore (NodeAddr<StmtNode *> CurrentInstSN, int64_t NewOffset);
101
+ bool findFirstReachedInst (
102
+ MachineInstr *AddMI,
103
+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>>
104
+ &AddiList,
105
+ NodeAddr<StmtNode *> &UseSN);
106
+ bool updateAddBases (MachineInstr *CurrentMI, MachineInstr *FirstReachedMI,
107
+ int64_t NewOffset);
96
108
bool processAddUses (NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI,
97
109
const NodeList &UNodeList);
98
110
bool updateAddUses (MachineInstr *AddMI, MachineInstr *UseMI);
@@ -207,8 +219,17 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
207
219
return false ;
208
220
209
221
for (auto &Mo : UseMI.operands ())
222
+ // Is it a frame index?
210
223
if (Mo.isFI ())
211
224
return false ;
225
+ // Is the OffsetReg definition actually reaches UseMI?
226
+ if (!UseMI.getParent ()->isLiveIn (OffsetReg) &&
227
+ MI.getParent () != UseMI.getParent ()) {
228
+ LLVM_DEBUG (dbgs () << " The offset reg " << printReg (OffsetReg, TRI)
229
+ << " is NOT live in to MBB "
230
+ << UseMI.getParent ()->getName () << " \n " );
231
+ return false ;
232
+ }
212
233
}
213
234
return true ;
214
235
}
@@ -327,6 +348,14 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
327
348
if ((LRExtRegDN.Addr ->getFlags () & NodeAttrs::PhiRef) &&
328
349
MI->getParent () != UseMI->getParent ())
329
350
return false ;
351
+ // Is the OffsetReg definition actually reaches UseMI?
352
+ if (!UseMI->getParent ()->isLiveIn (LRExtReg) &&
353
+ MI->getParent () != UseMI->getParent ()) {
354
+ LLVM_DEBUG (dbgs () << " The LRExtReg reg " << printReg (LRExtReg, TRI)
355
+ << " is NOT live in to MBB "
356
+ << UseMI->getParent ()->getName () << " \n " );
357
+ return false ;
358
+ }
330
359
}
331
360
return true ;
332
361
}
@@ -344,6 +373,12 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
344
373
case Hexagon::V6_vgathermhwq_pseudo:
345
374
return HII->isValidOffset (MI->getOpcode (), Offset, HRI, false );
346
375
default :
376
+ if (HII->getAddrMode (*MI) == HexagonII::BaseImmOffset) {
377
+ // The immediates are mentioned in multiples of vector counts
378
+ unsigned AlignMask = HII->getMemAccessSize (*MI) - 1 ;
379
+ if ((AlignMask & Offset) == 0 )
380
+ return HII->isValidOffset (MI->getOpcode (), Offset, HRI, false );
381
+ }
347
382
return false ;
348
383
}
349
384
}
@@ -414,6 +449,264 @@ unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) {
414
449
}
415
450
}
416
451
452
+ bool HexagonOptAddrMode::usedInLoadStore (NodeAddr<StmtNode *> CurrentInstSN,
453
+ int64_t NewOffset) {
454
+ NodeList LoadStoreUseList;
455
+
456
+ getAllRealUses (CurrentInstSN, LoadStoreUseList);
457
+ bool FoundLoadStoreUse = false ;
458
+ for (auto I = LoadStoreUseList.begin (), E = LoadStoreUseList.end (); I != E;
459
+ ++I) {
460
+ NodeAddr<UseNode *> UN = *I;
461
+ NodeAddr<StmtNode *> SN = UN.Addr ->getOwner (*DFG);
462
+ MachineInstr *LoadStoreMI = SN.Addr ->getCode ();
463
+ const MCInstrDesc &MID = LoadStoreMI->getDesc ();
464
+ if ((MID.mayLoad () || MID.mayStore ()) &&
465
+ isValidOffset (LoadStoreMI, NewOffset)) {
466
+ FoundLoadStoreUse = true ;
467
+ break ;
468
+ }
469
+ }
470
+ return FoundLoadStoreUse;
471
+ }
472
+
473
+ bool HexagonOptAddrMode::findFirstReachedInst (
474
+ MachineInstr *AddMI,
475
+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> &AddiList,
476
+ NodeAddr<StmtNode *> &UseSN) {
477
+ // Find the very first Addi instruction in the current basic block among the
478
+ // AddiList This is the Addi that should be preserved so that we do not need
479
+ // to handle the complexity of moving instructions
480
+ //
481
+ // TODO: find Addi instructions across basic blocks
482
+ //
483
+ // TODO: Try to remove this and add a solution that optimizes the number of
484
+ // Addi instructions that can be modified.
485
+ // This change requires choosing the Addi with the median offset value, but
486
+ // would also require moving that instruction above the others. Since this
487
+ // pass runs after register allocation, there might be multiple cases that
488
+ // need to be handled if we move instructions around
489
+ MachineBasicBlock *CurrentMBB = AddMI->getParent ();
490
+ for (auto &InstIter : *CurrentMBB) {
491
+ // If the instruction is an Addi and is in the AddiList
492
+ if (InstIter.getOpcode () == Hexagon::A2_addi) {
493
+ auto Iter = std::find_if (
494
+ AddiList.begin (), AddiList.end (), [&InstIter](const auto &SUPair) {
495
+ return SUPair.first .Addr ->getCode () == &InstIter;
496
+ });
497
+ if (Iter != AddiList.end ()) {
498
+ UseSN = Iter->first ;
499
+ return true ;
500
+ }
501
+ }
502
+ }
503
+ return false ;
504
+ }
505
+
506
+ // This function tries to modify the immediate value in Hexagon::Addi
507
+ // instructions, so that the immediates could then be moved into a load/store
508
+ // instruction with offset and the add removed completely when we call
509
+ // processAddUses
510
+ //
511
+ // For Example, If we have the below sequence of instructions:
512
+ //
513
+ // r1 = add(r2,#1024)
514
+ // ...
515
+ // r3 = add(r2,#1152)
516
+ // ...
517
+ // r4 = add(r2,#1280)
518
+ //
519
+ // Where the register r2 has the same reaching definition, They get modified to
520
+ // the below sequence:
521
+ //
522
+ // r1 = add(r2,#1024)
523
+ // ...
524
+ // r3 = add(r1,#128)
525
+ // ...
526
+ // r4 = add(r1,#256)
527
+ //
528
+ // The below change helps the processAddUses method to later move the
529
+ // immediates #128 and #256 into a load/store instruction that can take an
530
+ // offset, like the Vd = mem(Rt+#s4)
531
+ bool HexagonOptAddrMode::processAddBases (NodeAddr<StmtNode *> AddSN,
532
+ MachineInstr *AddMI) {
533
+
534
+ bool Changed = false ;
535
+
536
+ LLVM_DEBUG (dbgs () << " \n\t\t [Processing Addi]: " << *AddMI << " \n " );
537
+
538
+ auto Processed =
539
+ [](const MachineInstr *MI,
540
+ const DenseSet<MachineInstr *> &ProcessedAddiInsts) -> bool {
541
+ // If we've already processed this Addi, just return
542
+ if (ProcessedAddiInsts.find (MI) != ProcessedAddiInsts.end ()) {
543
+ LLVM_DEBUG (dbgs () << " \t\t\t Addi already found in ProcessedAddiInsts: "
544
+ << *MI << " \n\t\t\t Skipping..." );
545
+ return true ;
546
+ }
547
+ return false ;
548
+ };
549
+
550
+ if (Processed (AddMI, ProcessedAddiInsts))
551
+ return Changed;
552
+ ProcessedAddiInsts.insert (AddMI);
553
+
554
+ // Get the base register that would be shared by other Addi Intructions
555
+ Register BaseReg = AddMI->getOperand (1 ).getReg ();
556
+
557
+ // Store a list of all Addi instructions that share the above common base
558
+ // register
559
+ std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> AddiList;
560
+
561
+ NodeId UAReachingDefID;
562
+ // Find the UseNode that contains the base register and it's reachingDef
563
+ for (NodeAddr<UseNode *> UA : AddSN.Addr ->members_if (DFG->IsUse , *DFG)) {
564
+ RegisterRef URR = UA.Addr ->getRegRef (*DFG);
565
+ if (BaseReg != URR.Reg )
566
+ continue ;
567
+
568
+ UAReachingDefID = UA.Addr ->getReachingDef ();
569
+ NodeAddr<DefNode *> UADef = DFG->addr <DefNode *>(UAReachingDefID);
570
+ if (!UAReachingDefID || UADef.Addr ->getFlags () & NodeAttrs::PhiRef) {
571
+ LLVM_DEBUG (dbgs () << " \t\t\t Could not find reachingDef. Skipping...\n " );
572
+ return false ;
573
+ }
574
+ }
575
+
576
+ NodeAddr<DefNode *> UAReachingDef = DFG->addr <DefNode *>(UAReachingDefID);
577
+ NodeAddr<StmtNode *> ReachingDefStmt = UAReachingDef.Addr ->getOwner (*DFG);
578
+
579
+ // If the reaching definition is a predicated instruction, this might not be
580
+ // the only definition of our base register, so return immediately.
581
+ MachineInstr *ReachingDefInstr = ReachingDefStmt.Addr ->getCode ();
582
+ if (HII->isPredicated (*ReachingDefInstr))
583
+ return false ;
584
+
585
+ NodeList AddiUseList;
586
+
587
+ // Find all Addi instructions that share the same base register and add them
588
+ // to the AddiList
589
+ getAllRealUses (ReachingDefStmt, AddiUseList);
590
+ for (auto I = AddiUseList.begin (), E = AddiUseList.end (); I != E; ++I) {
591
+ NodeAddr<UseNode *> UN = *I;
592
+ NodeAddr<StmtNode *> SN = UN.Addr ->getOwner (*DFG);
593
+ MachineInstr *MI = SN.Addr ->getCode ();
594
+
595
+ // Only add instructions if it's an Addi and it's not already processed.
596
+ if (MI->getOpcode () == Hexagon::A2_addi &&
597
+ !(MI != AddMI && Processed (MI, ProcessedAddiInsts))) {
598
+ AddiList.push_back ({SN, UN});
599
+
600
+ // This ensures that we process each instruction only once
601
+ ProcessedAddiInsts.insert (MI);
602
+ }
603
+ }
604
+
605
+ // If there's only one Addi instruction, nothing to do here
606
+ if (AddiList.size () <= 1 )
607
+ return Changed;
608
+
609
+ NodeAddr<StmtNode *> FirstReachedUseSN;
610
+ // Find the first reached use of Addi instruction from the list
611
+ if (!findFirstReachedInst (AddMI, AddiList, FirstReachedUseSN))
612
+ return Changed;
613
+
614
+ // If we reach this point we know that the StmtNode FirstReachedUseSN is for
615
+ // an Addi instruction. So, we're guaranteed to have just one DefNode, and
616
+ // hence we can access the front() directly without checks
617
+ NodeAddr<DefNode *> FirstReachedUseDN =
618
+ FirstReachedUseSN.Addr ->members_if (DFG->IsDef , *DFG).front ();
619
+
620
+ MachineInstr *FirstReachedMI = FirstReachedUseSN.Addr ->getCode ();
621
+ const MachineOperand FirstReachedMIImmOp = FirstReachedMI->getOperand (2 );
622
+ if (!FirstReachedMIImmOp.isImm ())
623
+ return false ;
624
+
625
+ for (auto &I : AddiList) {
626
+ NodeAddr<StmtNode *> CurrentInstSN = I.first ;
627
+ NodeAddr<UseNode *> CurrentInstUN = I.second ;
628
+
629
+ MachineInstr *CurrentMI = CurrentInstSN.Addr ->getCode ();
630
+ MachineOperand &CurrentMIImmOp = CurrentMI->getOperand (2 );
631
+
632
+ int64_t NewOffset;
633
+
634
+ // Even though we know it's an Addi instruction, the second operand could be
635
+ // a global value and not an immediate
636
+ if (!CurrentMIImmOp.isImm ())
637
+ continue ;
638
+
639
+ NewOffset = CurrentMIImmOp.getImm () - FirstReachedMIImmOp.getImm ();
640
+
641
+ // This is the first occuring Addi, so skip modifying this
642
+ if (CurrentMI == FirstReachedMI) {
643
+ continue ;
644
+ }
645
+
646
+ if (CurrentMI->getParent () != FirstReachedMI->getParent ())
647
+ continue ;
648
+
649
+ // Modify the Addi instruction only if it could be used to modify a
650
+ // future load/store instruction and get removed
651
+ //
652
+ // This check is needed because, if we modify the current Addi instruction
653
+ // we create RAW dependence between the FirstReached Addi and the current
654
+ // one, which could result in extra packets. So we only do this change if
655
+ // we know the current Addi would get removed later
656
+ if (!usedInLoadStore (CurrentInstSN, NewOffset)) {
657
+ return false ;
658
+ }
659
+
660
+ // Verify whether the First Addi's definition register is still live when
661
+ // we reach the current Addi
662
+ RegisterRef FirstReachedDefRR = FirstReachedUseDN.Addr ->getRegRef (*DFG);
663
+ NodeAddr<InstrNode *> CurrentAddiIN = CurrentInstUN.Addr ->getOwner (*DFG);
664
+ NodeAddr<RefNode *> NearestAA =
665
+ LV->getNearestAliasedRef (FirstReachedDefRR, CurrentAddiIN);
666
+ if ((DFG->IsDef (NearestAA) && NearestAA.Id != FirstReachedUseDN.Id ) ||
667
+ (!DFG->IsDef (NearestAA) &&
668
+ NearestAA.Addr ->getReachingDef () != FirstReachedUseDN.Id )) {
669
+ // Found another definition of FirstReachedDef
670
+ LLVM_DEBUG (dbgs () << " \t\t\t Could not modify below Addi since the first "
671
+ " defined Addi register was redefined\n " );
672
+ continue ;
673
+ }
674
+
675
+ MachineOperand CurrentMIBaseOp = CurrentMI->getOperand (1 );
676
+ if (CurrentMIBaseOp.getReg () != FirstReachedMI->getOperand (1 ).getReg ()) {
677
+ continue ;
678
+ }
679
+
680
+ // If we reached this point, then we can modify MI to use the result of
681
+ // FirstReachedMI
682
+ Changed |= updateAddBases (CurrentMI, FirstReachedMI, NewOffset);
683
+
684
+ // Update the reachingDef of the Current AddI use after change
685
+ CurrentInstUN.Addr ->linkToDef (CurrentInstUN.Id , FirstReachedUseDN);
686
+ }
687
+
688
+ return Changed;
689
+ }
690
+
691
+ bool HexagonOptAddrMode::updateAddBases (MachineInstr *CurrentMI,
692
+ MachineInstr *FirstReachedMI,
693
+ int64_t NewOffset) {
694
+ LLVM_DEBUG (dbgs () << " [About to modify the Addi]: " << *CurrentMI << " \n " );
695
+ const MachineOperand FirstReachedDef = FirstReachedMI->getOperand (0 );
696
+ Register FirstDefRegister = FirstReachedDef.getReg ();
697
+
698
+ MachineOperand &CurrentMIBaseOp = CurrentMI->getOperand (1 );
699
+ MachineOperand &CurrentMIImmOp = CurrentMI->getOperand (2 );
700
+
701
+ CurrentMIBaseOp.setReg (FirstDefRegister);
702
+ CurrentMIBaseOp.setIsUndef (FirstReachedDef.isUndef ());
703
+ CurrentMIBaseOp.setImplicit (FirstReachedDef.isImplicit ());
704
+ CurrentMIImmOp.setImm (NewOffset);
705
+ ProcessedAddiInsts.insert (CurrentMI);
706
+ MRI->clearKillFlags (FirstDefRegister);
707
+ return true ;
708
+ }
709
+
417
710
bool HexagonOptAddrMode::processAddUses (NodeAddr<StmtNode *> AddSN,
418
711
MachineInstr *AddMI,
419
712
const NodeList &UNodeList) {
@@ -737,7 +1030,6 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
737
1030
738
1031
for (unsigned i = OpStart; i < OpEnd; ++i)
739
1032
MIB.add (UseMI->getOperand (i));
740
-
741
1033
Deleted.insert (UseMI);
742
1034
}
743
1035
@@ -782,6 +1074,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
782
1074
<< " ]: " << *MI << " \n\t [InstrNode]: "
783
1075
<< Print<NodeAddr<InstrNode *>>(IA, *DFG) << ' \n ' );
784
1076
1077
+ if (MI->getOpcode () == Hexagon::A2_addi)
1078
+ Changed |= processAddBases (SA, MI);
785
1079
NodeList UNodeList;
786
1080
getAllRealUses (SA, UNodeList);
787
1081
@@ -869,6 +1163,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
869
1163
bool Changed = false ;
870
1164
auto &HST = MF.getSubtarget <HexagonSubtarget>();
871
1165
MRI = &MF.getRegInfo ();
1166
+ TRI = MF.getSubtarget ().getRegisterInfo ();
872
1167
HII = HST.getInstrInfo ();
873
1168
HRI = HST.getRegisterInfo ();
874
1169
const auto &MDF = getAnalysis<MachineDominanceFrontier>();
@@ -885,6 +1180,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
885
1180
LV = &L;
886
1181
887
1182
Deleted.clear ();
1183
+ ProcessedAddiInsts.clear ();
888
1184
NodeAddr<FuncNode *> FA = DFG->getFunc ();
889
1185
LLVM_DEBUG (dbgs () << " ==== [RefMap#]=====:\n "
890
1186
<< Print<NodeAddr<FuncNode *>>(FA, *DFG) << " \n " );
0 commit comments