Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 87 additions & 18 deletions src/coreclr/jit/optcse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2513,17 +2513,26 @@ void CSE_HeuristicRL::CaptureLocalWeights()
LclVarDsc* const varDsc = m_pCompiler->lvaGetDescByTrackedIndex(trackedIndex);

// Locals with no references aren't enregistered
//
if (varDsc->lvRefCnt() == 0)
{
continue;
}

// Some LclVars always have stack homes
//
if (varDsc->lvDoNotEnregister)
{
continue;
}

// Only consider for integral types
//
if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsMask(varDsc->TypeGet()))
{
continue;
}

JITDUMP("V%02u," FMT_WT "\n", m_pCompiler->lvaGetLclNum(varDsc), varDsc->lvRefCntWtd());
m_localWeights->push_back(varDsc->lvRefCntWtd() / BB_UNITY_WEIGHT);
}
Expand Down Expand Up @@ -2723,6 +2732,17 @@ void CSE_HeuristicRL::SoftmaxPolicy()
// 14. cse is marked GTF_MAKE_CSE (0/1)
// 15. cse num distinct locals
// 16. cse num local occurrences
// 17. cse has call (0/1)
// 18. log (cse use count weighted * costEx)
// 19. log (cse use count weighted * num local occurrences)
// 20. cse "distance" (max postorder num - min postorder num) / num BBs
// 21. cse is "containable" (0/1)
// 22. cse is cheap & containable (0/1)
// 23. is live across call in possible LSRA ordering (0/1)
//
// -----
//
// 24. log (pressure estimate weight)
//
void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
{
Expand All @@ -2737,20 +2757,13 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
return;
}

const unsigned char costEx = cse->csdTree->GetCostEx();
const unsigned char costEx = cse->csdTree->GetCostEx();
const double deMinimis = 1e-3;
const double deMinimusAdj = -log(deMinimis);

features[0] = costEx;

if (cse->csdUseWtCnt > 0)
{
features[1] = log(cse->csdUseWtCnt);
}

if (cse->csdDefWtCnt > 0)
{
features[2] = log(cse->csdDefWtCnt);
}

features[1] = deMinimusAdj + log(max(deMinimis, cse->csdUseWtCnt));
features[2] = deMinimusAdj + log(max(deMinimis, cse->csdDefWtCnt));
features[3] = cse->csdTree->GetCostSz();
features[4] = cse->csdUseCount;
features[5] = cse->csdDefCount;
Expand All @@ -2770,6 +2783,7 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
features[9] = booleanScale * isSharedConstant;

const bool isMinCost = (costEx == Compiler::MIN_CSE_COST);
const bool isLowCost = (costEx <= Compiler::MIN_CSE_COST + 1);

features[10] = booleanScale * isMinCost;

Expand All @@ -2780,18 +2794,71 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
features[13] = booleanScale * (isMinCost & isLiveAcrossCall);

// Is any CSE tree for this candidate marked GTF_MAKE_CSE (hoisting)
// Also gather data for "distance" metric.
//
bool isMakeCse = false;
const unsigned numBBs = m_pCompiler->fgBBcount;
bool isMakeCse = false;
unsigned minPostorderNum = numBBs;
unsigned maxPostorderNum = 0;
BasicBlock* minPostorderBlock = nullptr;
BasicBlock* maxPostorderBlock = nullptr;
for (treeStmtLst* treeList = cse->csdTreeList; treeList != nullptr && !isMakeCse; treeList = treeList->tslNext)
{
isMakeCse = ((treeList->tslTree->gtFlags & GTF_MAKE_CSE) != 0);
BasicBlock* const treeBlock = treeList->tslBlock;
unsigned postorderNum = treeBlock->bbPostorderNum;
if (postorderNum < minPostorderNum)
{
minPostorderNum = postorderNum;
minPostorderBlock = treeBlock;
}

if (postorderNum > maxPostorderNum)
{
maxPostorderNum = postorderNum;
maxPostorderBlock = treeBlock;
}

isMakeCse |= ((treeList->tslTree->gtFlags & GTF_MAKE_CSE) != 0);
}
const unsigned blockSpread = maxPostorderNum - minPostorderNum;

features[14] = booleanScale * isMakeCse;

// Locals data
//
features[15] = cse->numDistinctLocals;
features[16] = cse->numLocalOccurrences;

// More
//
features[17] = booleanScale * ((cse->csdTree->gtFlags & GTF_CALL) != 0);
features[18] = deMinimusAdj + log(max(deMinimis, cse->csdUseCount * cse->csdUseWtCnt));
features[19] = deMinimusAdj + log(max(deMinimis, cse->numLocalOccurrences * cse->csdUseWtCnt));
features[20] = booleanScale * ((double)(blockSpread) / numBBs);

const bool isContainable = cse->csdTree->OperIs(GT_ADD, GT_NOT, GT_MUL, GT_LSH);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't such nodes be marked with GTF_ADDRMODE_NO_CSE by this point? Assuming you mean they could be contained as part of addressing modes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This stage of CSE will never see anything marked GTF_DONT_CSE.

This is a more general guess as to whether the operation can be subsumed by its parent, in particular I saw an AND(NOT ...) turn into andn so CSE of the NOT is not always an improvement.

features[21] = booleanScale * isContainable;
features[22] = booleanScale * (isContainable && isLowCost);

// LSRA "is live across call"
//
bool isLiveAcrossCallLSRA = isLiveAcrossCall;

if (!isLiveAcrossCallLSRA)
{
unsigned count = 0;
for (BasicBlock *block = minPostorderBlock;
block != nullptr && block != maxPostorderBlock && count < blockSpread; block = block->Next(), count++)
{
if (block->HasFlag(BBF_HAS_CALL))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this flag not reliable in optimized tier? I understand that it's OK for it to be not precise, just wonder if it's too unreliable.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CSE was already relying on this flag for its live across call analysis. I haven't checked whether it is reliable (it is set in fgMorphCall, so it should be pretty good).

{
isLiveAcrossCallLSRA = true;
break;
}
}
}

features[23] = booleanScale * isLiveAcrossCallLSRA;
}

//------------------------------------------------------------------------
Expand All @@ -2804,7 +2871,7 @@ void CSE_HeuristicRL::GetFeatures(CSEdsc* cse, double* features)
//
// Stopping features
//
// 17. int register pressure weight estimate (log)
// 24. int register pressure weight estimate (log)
//
// All boolean features are scaled up by booleanScale so their
// numeric range is similar to the non-boolean features
Expand All @@ -2819,8 +2886,9 @@ void CSE_HeuristicRL::GetStoppingFeatures(double* features)
// "remove" weight per local use occurrences * weightUses
// "add" weight of the CSE temp times * (weigh defs*2) + weightUses
//
double minWeight = 0.01;
double spillAtWeight = minWeight;
const double deMinimis = 1e-3;
double spillAtWeight = deMinimis;
const double deMinimusAdj = -log(deMinimis);

// Assume each already performed cse is occupying a registger
//
Expand All @@ -2845,7 +2913,8 @@ void CSE_HeuristicRL::GetStoppingFeatures(double* features)
// Large frame...?
// todo: scan all vars, not just tracked?
//
features[17] = log(max(spillAtWeight, minWeight));

features[24] = deMinimusAdj + log(max(deMinimis, spillAtWeight));
}

//------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/optcse.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class CSE_HeuristicRL : public CSE_HeuristicCommon

enum
{
numParameters = 19,
numParameters = 25,
booleanScale = 5,
maxSteps = 65, // MAX_CSE_CNT + 1 (for stopping)
};
Expand Down