Skip to content

Commit c010b72

Browse files
[HEXAGON] AddrModeOpt support for HVX and optimize adds (llvm#106368)
This patch does 3 things: 1. Add support for optimizing the address mode of HVX load/store instructions 2. Reduce the value of Add instruction immediates by replacing with the difference from other Addi instructions that share common base: For Example, If we have the below sequence of instructions: r1 = add(r2,# 1024) ... r3 = add(r2,# 1152) ... r4 = add(r2,# 1280) Where the register r2 has the same reaching definition, They get modified to the below sequence: r1 = add(r2,# 1024) ... r3 = add(r1,# 128) ... r4 = add(r1,# 256) 3. Fixes a bug pass where the addi instructions were modified based on a predicated register definition, leading to incorrect output. Eg: INST-1: if (p0) r2 = add(r13,# 128) INST-2: r1 = add(r2,# 1024) INST-3: r3 = add(r2,# 1152) INST-4: r5 = add(r2,# 1280) In the above case, since r2's definition is predicated, we do not want to modify the uses of r2 in INST-3/INST-4 with add(r1,#128/256) 4.Fixes a corner case It looks like we never check whether the offset register is actually live (not clobbered) at optimization site. Add the check whether it is live at MBB entrance. The rest should have already been verified. 5. Fixes a bad codegen For whatever reason we do transformation without checking if the value in register actually reaches the user. This is second identical fix for this pass. Co-authored-by: Anirudh Sundar <[email protected]> Co-authored-by: Sergei Larin <[email protected]>
1 parent 536bdc9 commit c010b72

File tree

5 files changed

+618
-8
lines changed

5 files changed

+618
-8
lines changed

llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp

Lines changed: 297 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/Support/Debug.h"
3636
#include "llvm/Support/ErrorHandling.h"
3737
#include "llvm/Support/raw_ostream.h"
38+
#include <algorithm>
3839
#include <cassert>
3940
#include <cstdint>
4041

@@ -80,8 +81,10 @@ class HexagonOptAddrMode : public MachineFunctionPass {
8081
private:
8182
using MISetType = DenseSet<MachineInstr *>;
8283
using InstrEvalMap = DenseMap<MachineInstr *, bool>;
84+
DenseSet<MachineInstr *> ProcessedAddiInsts;
8385

8486
MachineRegisterInfo *MRI = nullptr;
87+
const TargetRegisterInfo *TRI = nullptr;
8588
const HexagonInstrInfo *HII = nullptr;
8689
const HexagonRegisterInfo *HRI = nullptr;
8790
MachineDominatorTree *MDT = nullptr;
@@ -93,6 +96,15 @@ class HexagonOptAddrMode : public MachineFunctionPass {
9396
bool processBlock(NodeAddr<BlockNode *> BA);
9497
bool xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI,
9598
NodeAddr<UseNode *> UseN, unsigned UseMOnum);
99+
bool processAddBases(NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI);
100+
bool usedInLoadStore(NodeAddr<StmtNode *> CurrentInstSN, int64_t NewOffset);
101+
bool findFirstReachedInst(
102+
MachineInstr *AddMI,
103+
std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>>
104+
&AddiList,
105+
NodeAddr<StmtNode *> &UseSN);
106+
bool updateAddBases(MachineInstr *CurrentMI, MachineInstr *FirstReachedMI,
107+
int64_t NewOffset);
96108
bool processAddUses(NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI,
97109
const NodeList &UNodeList);
98110
bool updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI);
@@ -207,8 +219,17 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
207219
return false;
208220

209221
for (auto &Mo : UseMI.operands())
222+
// Is it a frame index?
210223
if (Mo.isFI())
211224
return false;
225+
// Is the OffsetReg definition actually reaches UseMI?
226+
if (!UseMI.getParent()->isLiveIn(OffsetReg) &&
227+
MI.getParent() != UseMI.getParent()) {
228+
LLVM_DEBUG(dbgs() << " The offset reg " << printReg(OffsetReg, TRI)
229+
<< " is NOT live in to MBB "
230+
<< UseMI.getParent()->getName() << "\n");
231+
return false;
232+
}
212233
}
213234
return true;
214235
}
@@ -327,6 +348,14 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
327348
if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
328349
MI->getParent() != UseMI->getParent())
329350
return false;
351+
// Is the OffsetReg definition actually reaches UseMI?
352+
if (!UseMI->getParent()->isLiveIn(LRExtReg) &&
353+
MI->getParent() != UseMI->getParent()) {
354+
LLVM_DEBUG(dbgs() << " The LRExtReg reg " << printReg(LRExtReg, TRI)
355+
<< " is NOT live in to MBB "
356+
<< UseMI->getParent()->getName() << "\n");
357+
return false;
358+
}
330359
}
331360
return true;
332361
}
@@ -344,6 +373,12 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
344373
case Hexagon::V6_vgathermhwq_pseudo:
345374
return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false);
346375
default:
376+
if (HII->getAddrMode(*MI) == HexagonII::BaseImmOffset) {
377+
// The immediates are mentioned in multiples of vector counts
378+
unsigned AlignMask = HII->getMemAccessSize(*MI) - 1;
379+
if ((AlignMask & Offset) == 0)
380+
return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false);
381+
}
347382
return false;
348383
}
349384
}
@@ -414,6 +449,264 @@ unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) {
414449
}
415450
}
416451

452+
bool HexagonOptAddrMode::usedInLoadStore(NodeAddr<StmtNode *> CurrentInstSN,
453+
int64_t NewOffset) {
454+
NodeList LoadStoreUseList;
455+
456+
getAllRealUses(CurrentInstSN, LoadStoreUseList);
457+
bool FoundLoadStoreUse = false;
458+
for (auto I = LoadStoreUseList.begin(), E = LoadStoreUseList.end(); I != E;
459+
++I) {
460+
NodeAddr<UseNode *> UN = *I;
461+
NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
462+
MachineInstr *LoadStoreMI = SN.Addr->getCode();
463+
const MCInstrDesc &MID = LoadStoreMI->getDesc();
464+
if ((MID.mayLoad() || MID.mayStore()) &&
465+
isValidOffset(LoadStoreMI, NewOffset)) {
466+
FoundLoadStoreUse = true;
467+
break;
468+
}
469+
}
470+
return FoundLoadStoreUse;
471+
}
472+
473+
bool HexagonOptAddrMode::findFirstReachedInst(
474+
MachineInstr *AddMI,
475+
std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> &AddiList,
476+
NodeAddr<StmtNode *> &UseSN) {
477+
// Find the very first Addi instruction in the current basic block among the
478+
// AddiList This is the Addi that should be preserved so that we do not need
479+
// to handle the complexity of moving instructions
480+
//
481+
// TODO: find Addi instructions across basic blocks
482+
//
483+
// TODO: Try to remove this and add a solution that optimizes the number of
484+
// Addi instructions that can be modified.
485+
// This change requires choosing the Addi with the median offset value, but
486+
// would also require moving that instruction above the others. Since this
487+
// pass runs after register allocation, there might be multiple cases that
488+
// need to be handled if we move instructions around
489+
MachineBasicBlock *CurrentMBB = AddMI->getParent();
490+
for (auto &InstIter : *CurrentMBB) {
491+
// If the instruction is an Addi and is in the AddiList
492+
if (InstIter.getOpcode() == Hexagon::A2_addi) {
493+
auto Iter = std::find_if(
494+
AddiList.begin(), AddiList.end(), [&InstIter](const auto &SUPair) {
495+
return SUPair.first.Addr->getCode() == &InstIter;
496+
});
497+
if (Iter != AddiList.end()) {
498+
UseSN = Iter->first;
499+
return true;
500+
}
501+
}
502+
}
503+
return false;
504+
}
505+
506+
// This function tries to modify the immediate value in Hexagon::Addi
507+
// instructions, so that the immediates could then be moved into a load/store
508+
// instruction with offset and the add removed completely when we call
509+
// processAddUses
510+
//
511+
// For Example, If we have the below sequence of instructions:
512+
//
513+
// r1 = add(r2,#1024)
514+
// ...
515+
// r3 = add(r2,#1152)
516+
// ...
517+
// r4 = add(r2,#1280)
518+
//
519+
// Where the register r2 has the same reaching definition, They get modified to
520+
// the below sequence:
521+
//
522+
// r1 = add(r2,#1024)
523+
// ...
524+
// r3 = add(r1,#128)
525+
// ...
526+
// r4 = add(r1,#256)
527+
//
528+
// The below change helps the processAddUses method to later move the
529+
// immediates #128 and #256 into a load/store instruction that can take an
530+
// offset, like the Vd = mem(Rt+#s4)
531+
bool HexagonOptAddrMode::processAddBases(NodeAddr<StmtNode *> AddSN,
532+
MachineInstr *AddMI) {
533+
534+
bool Changed = false;
535+
536+
LLVM_DEBUG(dbgs() << "\n\t\t[Processing Addi]: " << *AddMI << "\n");
537+
538+
auto Processed =
539+
[](const MachineInstr *MI,
540+
const DenseSet<MachineInstr *> &ProcessedAddiInsts) -> bool {
541+
// If we've already processed this Addi, just return
542+
if (ProcessedAddiInsts.find(MI) != ProcessedAddiInsts.end()) {
543+
LLVM_DEBUG(dbgs() << "\t\t\tAddi already found in ProcessedAddiInsts: "
544+
<< *MI << "\n\t\t\tSkipping...");
545+
return true;
546+
}
547+
return false;
548+
};
549+
550+
if (Processed(AddMI, ProcessedAddiInsts))
551+
return Changed;
552+
ProcessedAddiInsts.insert(AddMI);
553+
554+
// Get the base register that would be shared by other Addi Intructions
555+
Register BaseReg = AddMI->getOperand(1).getReg();
556+
557+
// Store a list of all Addi instructions that share the above common base
558+
// register
559+
std::vector<std::pair<NodeAddr<StmtNode *>, NodeAddr<UseNode *>>> AddiList;
560+
561+
NodeId UAReachingDefID;
562+
// Find the UseNode that contains the base register and it's reachingDef
563+
for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) {
564+
RegisterRef URR = UA.Addr->getRegRef(*DFG);
565+
if (BaseReg != URR.Reg)
566+
continue;
567+
568+
UAReachingDefID = UA.Addr->getReachingDef();
569+
NodeAddr<DefNode *> UADef = DFG->addr<DefNode *>(UAReachingDefID);
570+
if (!UAReachingDefID || UADef.Addr->getFlags() & NodeAttrs::PhiRef) {
571+
LLVM_DEBUG(dbgs() << "\t\t\t Could not find reachingDef. Skipping...\n");
572+
return false;
573+
}
574+
}
575+
576+
NodeAddr<DefNode *> UAReachingDef = DFG->addr<DefNode *>(UAReachingDefID);
577+
NodeAddr<StmtNode *> ReachingDefStmt = UAReachingDef.Addr->getOwner(*DFG);
578+
579+
// If the reaching definition is a predicated instruction, this might not be
580+
// the only definition of our base register, so return immediately.
581+
MachineInstr *ReachingDefInstr = ReachingDefStmt.Addr->getCode();
582+
if (HII->isPredicated(*ReachingDefInstr))
583+
return false;
584+
585+
NodeList AddiUseList;
586+
587+
// Find all Addi instructions that share the same base register and add them
588+
// to the AddiList
589+
getAllRealUses(ReachingDefStmt, AddiUseList);
590+
for (auto I = AddiUseList.begin(), E = AddiUseList.end(); I != E; ++I) {
591+
NodeAddr<UseNode *> UN = *I;
592+
NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
593+
MachineInstr *MI = SN.Addr->getCode();
594+
595+
// Only add instructions if it's an Addi and it's not already processed.
596+
if (MI->getOpcode() == Hexagon::A2_addi &&
597+
!(MI != AddMI && Processed(MI, ProcessedAddiInsts))) {
598+
AddiList.push_back({SN, UN});
599+
600+
// This ensures that we process each instruction only once
601+
ProcessedAddiInsts.insert(MI);
602+
}
603+
}
604+
605+
// If there's only one Addi instruction, nothing to do here
606+
if (AddiList.size() <= 1)
607+
return Changed;
608+
609+
NodeAddr<StmtNode *> FirstReachedUseSN;
610+
// Find the first reached use of Addi instruction from the list
611+
if (!findFirstReachedInst(AddMI, AddiList, FirstReachedUseSN))
612+
return Changed;
613+
614+
// If we reach this point we know that the StmtNode FirstReachedUseSN is for
615+
// an Addi instruction. So, we're guaranteed to have just one DefNode, and
616+
// hence we can access the front() directly without checks
617+
NodeAddr<DefNode *> FirstReachedUseDN =
618+
FirstReachedUseSN.Addr->members_if(DFG->IsDef, *DFG).front();
619+
620+
MachineInstr *FirstReachedMI = FirstReachedUseSN.Addr->getCode();
621+
const MachineOperand FirstReachedMIImmOp = FirstReachedMI->getOperand(2);
622+
if (!FirstReachedMIImmOp.isImm())
623+
return false;
624+
625+
for (auto &I : AddiList) {
626+
NodeAddr<StmtNode *> CurrentInstSN = I.first;
627+
NodeAddr<UseNode *> CurrentInstUN = I.second;
628+
629+
MachineInstr *CurrentMI = CurrentInstSN.Addr->getCode();
630+
MachineOperand &CurrentMIImmOp = CurrentMI->getOperand(2);
631+
632+
int64_t NewOffset;
633+
634+
// Even though we know it's an Addi instruction, the second operand could be
635+
// a global value and not an immediate
636+
if (!CurrentMIImmOp.isImm())
637+
continue;
638+
639+
NewOffset = CurrentMIImmOp.getImm() - FirstReachedMIImmOp.getImm();
640+
641+
// This is the first occuring Addi, so skip modifying this
642+
if (CurrentMI == FirstReachedMI) {
643+
continue;
644+
}
645+
646+
if (CurrentMI->getParent() != FirstReachedMI->getParent())
647+
continue;
648+
649+
// Modify the Addi instruction only if it could be used to modify a
650+
// future load/store instruction and get removed
651+
//
652+
// This check is needed because, if we modify the current Addi instruction
653+
// we create RAW dependence between the FirstReached Addi and the current
654+
// one, which could result in extra packets. So we only do this change if
655+
// we know the current Addi would get removed later
656+
if (!usedInLoadStore(CurrentInstSN, NewOffset)) {
657+
return false;
658+
}
659+
660+
// Verify whether the First Addi's definition register is still live when
661+
// we reach the current Addi
662+
RegisterRef FirstReachedDefRR = FirstReachedUseDN.Addr->getRegRef(*DFG);
663+
NodeAddr<InstrNode *> CurrentAddiIN = CurrentInstUN.Addr->getOwner(*DFG);
664+
NodeAddr<RefNode *> NearestAA =
665+
LV->getNearestAliasedRef(FirstReachedDefRR, CurrentAddiIN);
666+
if ((DFG->IsDef(NearestAA) && NearestAA.Id != FirstReachedUseDN.Id) ||
667+
(!DFG->IsDef(NearestAA) &&
668+
NearestAA.Addr->getReachingDef() != FirstReachedUseDN.Id)) {
669+
// Found another definition of FirstReachedDef
670+
LLVM_DEBUG(dbgs() << "\t\t\tCould not modify below Addi since the first "
671+
"defined Addi register was redefined\n");
672+
continue;
673+
}
674+
675+
MachineOperand CurrentMIBaseOp = CurrentMI->getOperand(1);
676+
if (CurrentMIBaseOp.getReg() != FirstReachedMI->getOperand(1).getReg()) {
677+
continue;
678+
}
679+
680+
// If we reached this point, then we can modify MI to use the result of
681+
// FirstReachedMI
682+
Changed |= updateAddBases(CurrentMI, FirstReachedMI, NewOffset);
683+
684+
// Update the reachingDef of the Current AddI use after change
685+
CurrentInstUN.Addr->linkToDef(CurrentInstUN.Id, FirstReachedUseDN);
686+
}
687+
688+
return Changed;
689+
}
690+
691+
bool HexagonOptAddrMode::updateAddBases(MachineInstr *CurrentMI,
692+
MachineInstr *FirstReachedMI,
693+
int64_t NewOffset) {
694+
LLVM_DEBUG(dbgs() << "[About to modify the Addi]: " << *CurrentMI << "\n");
695+
const MachineOperand FirstReachedDef = FirstReachedMI->getOperand(0);
696+
Register FirstDefRegister = FirstReachedDef.getReg();
697+
698+
MachineOperand &CurrentMIBaseOp = CurrentMI->getOperand(1);
699+
MachineOperand &CurrentMIImmOp = CurrentMI->getOperand(2);
700+
701+
CurrentMIBaseOp.setReg(FirstDefRegister);
702+
CurrentMIBaseOp.setIsUndef(FirstReachedDef.isUndef());
703+
CurrentMIBaseOp.setImplicit(FirstReachedDef.isImplicit());
704+
CurrentMIImmOp.setImm(NewOffset);
705+
ProcessedAddiInsts.insert(CurrentMI);
706+
MRI->clearKillFlags(FirstDefRegister);
707+
return true;
708+
}
709+
417710
bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
418711
MachineInstr *AddMI,
419712
const NodeList &UNodeList) {
@@ -737,7 +1030,6 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
7371030

7381031
for (unsigned i = OpStart; i < OpEnd; ++i)
7391032
MIB.add(UseMI->getOperand(i));
740-
7411033
Deleted.insert(UseMI);
7421034
}
7431035

@@ -782,6 +1074,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
7821074
<< "]: " << *MI << "\n\t[InstrNode]: "
7831075
<< Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n');
7841076

1077+
if (MI->getOpcode() == Hexagon::A2_addi)
1078+
Changed |= processAddBases(SA, MI);
7851079
NodeList UNodeList;
7861080
getAllRealUses(SA, UNodeList);
7871081

@@ -869,6 +1163,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
8691163
bool Changed = false;
8701164
auto &HST = MF.getSubtarget<HexagonSubtarget>();
8711165
MRI = &MF.getRegInfo();
1166+
TRI = MF.getSubtarget().getRegisterInfo();
8721167
HII = HST.getInstrInfo();
8731168
HRI = HST.getRegisterInfo();
8741169
const auto &MDF = getAnalysis<MachineDominanceFrontier>();
@@ -885,6 +1180,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
8851180
LV = &L;
8861181

8871182
Deleted.clear();
1183+
ProcessedAddiInsts.clear();
8881184
NodeAddr<FuncNode *> FA = DFG->getFunc();
8891185
LLVM_DEBUG(dbgs() << "==== [RefMap#]=====:\n "
8901186
<< Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n");

0 commit comments

Comments
 (0)