-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Use an enum for demanded LMUL in RISCVInsertVSETVLI. NFC #92513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
In needVSETVLI used by the forward insertion pass, we have some rules where we can relax the demanded fields for slides and splats when VL=1. However this only works if we don't increase LMUL to anything > M1 otherwise we would end up clobbering random registers. Rather than check the VSETVLIInfo we're transitioning to, store this information in DemandedFields and have isCompatible check it. That way an upcoming patch can share these VL=1 rules with RISCVCoalesceVSETVLI, which uses isCompatible and not needVSETVLI.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesIn needVSETVLI used by the forward insertion pass, we have some rules where we can relax the demanded fields for slides and splats when VL=1. However this only works if we don't increase LMUL to anything > M1 otherwise we would end up clobbering random registers. Rather than check the VSETVLIInfo we're transitioning to, store this information in DemandedFields and have isCompatible check it. That way an upcoming patch can share these VL=1 rules with RISCVCoalesceVSETVLI, which uses isCompatible and not needVSETVLI. Full diff: https://github.com/llvm/llvm-project/pull/92513.diff 1 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 324ce5cb5ed7d..8fb5af09663e2 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -215,7 +215,11 @@ struct DemandedFields {
// than 64.
SEWNone = 0 // We don't need to preserve SEW at all.
} SEW = SEWNone;
- bool LMUL = false;
+ enum : uint8_t {
+ LMULEqual = 2, // The exact value of LMUL needs to be preserved.
+ LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
+ LMULNone = 0 // We don't need to preserve LMUL at all.
+ } LMUL = LMULNone;
bool SEWLMULRatio = false;
bool TailPolicy = false;
bool MaskPolicy = false;
@@ -233,7 +237,7 @@ struct DemandedFields {
// Mark all VTYPE subfields and properties as demanded
void demandVTYPE() {
SEW = SEWEqual;
- LMUL = true;
+ LMUL = LMULEqual;
SEWLMULRatio = true;
TailPolicy = true;
MaskPolicy = true;
@@ -250,7 +254,7 @@ struct DemandedFields {
VLAny |= B.VLAny;
VLZeroness |= B.VLZeroness;
SEW = std::max(SEW, B.SEW);
- LMUL |= B.LMUL;
+ LMUL = std::max(LMUL, B.LMUL);
SEWLMULRatio |= B.SEWLMULRatio;
TailPolicy |= B.TailPolicy;
MaskPolicy |= B.MaskPolicy;
@@ -284,7 +288,19 @@ struct DemandedFields {
break;
};
OS << ", ";
- OS << "LMUL=" << LMUL << ", ";
+ OS << "LMUL=";
+ switch (LMUL) {
+ case LMULEqual:
+ OS << "LMULEqual";
+ break;
+ case LMULLessThanOrEqualToM1:
+ OS << "LMULLessThanOrEqualToM1";
+ break;
+ case LMULNone:
+ OS << "LMULNone";
+ break;
+ };
+ OS << ", ";
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
OS << "MaskPolicy=" << MaskPolicy;
@@ -301,6 +317,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
}
#endif
+static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
+ return Fractional || LMul == 1;
+}
+
/// Return true if moving from CurVType to NewVType is
/// indistinguishable from the perspective of an instruction (or set
/// of instructions) which use only the Used subfields and properties.
@@ -324,9 +345,18 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
break;
}
- if (Used.LMUL &&
- RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
- return false;
+ switch (Used.LMUL) {
+ case DemandedFields::LMULNone:
+ break;
+ case DemandedFields::LMULEqual:
+ if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
+ return false;
+ break;
+ case DemandedFields::LMULLessThanOrEqualToM1:
+ if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
+ return false;
+ break;
+ }
if (Used.SEWLMULRatio) {
auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
@@ -382,7 +412,7 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// in the opcode. This is asserted when constructing the VSETVLIInfo.
if (getEEWForLoadStore(MI)) {
Res.SEW = DemandedFields::SEWNone;
- Res.LMUL = false;
+ Res.LMUL = DemandedFields::LMULNone;
}
// Store instructions don't use the policy fields.
@@ -397,12 +427,12 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// * The policy bits can probably be ignored..
if (isMaskRegOp(MI)) {
Res.SEW = DemandedFields::SEWNone;
- Res.LMUL = false;
+ Res.LMUL = DemandedFields::LMULNone;
}
// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
if (isScalarInsertInstr(MI)) {
- Res.LMUL = false;
+ Res.LMUL = DemandedFields::LMULNone;
Res.SEWLMULRatio = false;
Res.VLAny = false;
// For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
@@ -423,7 +453,7 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
if (isScalarExtractInstr(MI)) {
assert(!RISCVII::hasVLOp(TSFlags));
- Res.LMUL = false;
+ Res.LMUL = DemandedFields::LMULNone;
Res.SEWLMULRatio = false;
Res.TailPolicy = false;
Res.MaskPolicy = false;
@@ -1107,11 +1137,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
}
-static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
- auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
- return Fractional || LMul == 1;
-}
-
/// Return true if a VSETVLI is required to transition from CurInfo to Require
/// before MI.
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
@@ -1133,10 +1158,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// * The LMUL1 restriction is for machines whose latency may depend on VL.
// * As above, this is only legal for tail "undefined" not "agnostic".
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
- isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI)) {
+ hasUndefinedMergeOp(MI)) {
Used.VLAny = false;
Used.VLZeroness = true;
- Used.LMUL = false;
+ Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
Used.TailPolicy = false;
}
@@ -1146,9 +1171,8 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// Since a splat is non-constant time in LMUL, we do need to be careful to not
// increase the number of active vector registers (unlike for vmv.s.x.)
if (isScalarSplatInstr(MI) && Require.hasAVLImm() &&
- Require.getAVLImm() == 1 && isLMUL1OrSmaller(CurInfo.getVLMUL()) &&
- hasUndefinedMergeOp(MI)) {
- Used.LMUL = false;
+ Require.getAVLImm() == 1 && hasUndefinedMergeOp(MI)) {
+ Used.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
Used.SEWLMULRatio = false;
Used.VLAny = false;
if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
@@ -1189,7 +1213,7 @@ static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
Info.setVLMul(*NewVLMul);
- Demanded.LMUL = true;
+ Demanded.LMUL = DemandedFields::LMULEqual;
}
return Info;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In needVSETVLI used by the forward insertion pass, we have some rules where we can relax the demanded fields for slides and splats when VL=1.
However this only works if we don't increase LMUL to anything > M1 otherwise we would end up clobbering random registers.
Rather than check the VSETVLIInfo we're transitioning to, store this information in DemandedFields and have isCompatible check it.
That way an upcoming patch can share these VL=1 rules with RISCVCoalesceVSETVLI, which uses isCompatible and not needVSETVLI.