|
| 1 | +//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This file implements the continuity stats calculation pass. |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#include "bolt/Passes/ContinuityStats.h" |
| 14 | +#include "bolt/Core/BinaryBasicBlock.h" |
| 15 | +#include "bolt/Core/BinaryFunction.h" |
| 16 | +#include "bolt/Utils/CommandLineOpts.h" |
| 17 | +#include "llvm/Support/CommandLine.h" |
| 18 | +#include <queue> |
| 19 | +#include <unordered_map> |
| 20 | +#include <unordered_set> |
| 21 | + |
| 22 | +#define DEBUG_TYPE "bolt-opts" |
| 23 | + |
| 24 | +using namespace llvm; |
| 25 | +using namespace bolt; |
| 26 | + |
| 27 | +namespace opts { |
| 28 | +extern cl::opt<unsigned> Verbosity; |
| 29 | +static cl::opt<unsigned> NumFunctionsForContinuityCheck( |
| 30 | + "num-functions-for-continuity-check", |
| 31 | + cl::desc("number of hottest functions to print aggregated " |
| 32 | + "CFG discontinuity stats of."), |
| 33 | + cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); |
| 34 | +} // namespace opts |
| 35 | + |
| 36 | +namespace { |
| 37 | +using FunctionListType = std::vector<const BinaryFunction *>; |
| 38 | +using function_iterator = FunctionListType::iterator; |
| 39 | + |
| 40 | +template <typename T> |
| 41 | +void printDistribution(raw_ostream &OS, std::vector<T> &values, |
| 42 | + bool Fraction = false) { |
| 43 | + if (values.empty()) |
| 44 | + return; |
| 45 | + // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%, |
| 46 | + // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as |
| 47 | + // fractions instead of integers. |
| 48 | + std::sort(values.begin(), values.end()); |
| 49 | + |
| 50 | + auto printLine = [&](std::string Text, double Percent) { |
| 51 | + int Rank = int(values.size() * (1.0 - Percent / 100)); |
| 52 | + if (Percent == 0) |
| 53 | + Rank = values.size() - 1; |
| 54 | + if (Fraction) |
| 55 | + OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " |
| 56 | + << format("%.2lf%%", values[Rank] * 100) << "\n"; |
| 57 | + else |
| 58 | + OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " |
| 59 | + << values[Rank] << "\n"; |
| 60 | + }; |
| 61 | + |
| 62 | + printLine("MAX", 0); |
| 63 | + const int percentages[] = {1, 5, 10, 20, 50, 80}; |
| 64 | + for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { |
| 65 | + printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); |
| 66 | + } |
| 67 | + printLine("MIN", 100); |
| 68 | +} |
| 69 | + |
| 70 | +void printCFGContinuityStats(raw_ostream &OS, |
| 71 | + iterator_range<function_iterator> &Functions) { |
| 72 | + // Given a perfect profile, every positive-execution-count BB should be |
| 73 | + // connected to an entry of the function through a positive-execution-count |
| 74 | + // directed path in the control flow graph. |
| 75 | + std::vector<size_t> NumUnreachables; |
| 76 | + std::vector<size_t> SumECUnreachables; |
| 77 | + std::vector<double> FractionECUnreachables; |
| 78 | + |
| 79 | + for (auto it = Functions.begin(); it != Functions.end(); ++it) { |
| 80 | + const BinaryFunction *Function = *it; |
| 81 | + if (Function->size() <= 1) |
| 82 | + continue; |
| 83 | + |
| 84 | + // Compute the sum of all BB execution counts (ECs). |
| 85 | + size_t NumPosECBBs = 0; |
| 86 | + size_t SumAllBBEC = 0; |
| 87 | + for (const BinaryBasicBlock &BB : *Function) { |
| 88 | + const size_t BBEC = BB.getKnownExecutionCount(); |
| 89 | + NumPosECBBs += BBEC > 0 ? 1 : 0; |
| 90 | + SumAllBBEC += BBEC; |
| 91 | + } |
| 92 | + |
| 93 | + // Perform BFS on subgraph of CFG induced by positive weight edges. |
| 94 | + // Compute the number of BBs reachable from the entry(s) of the function and |
| 95 | + // the sum of their execution counts (ECs). |
| 96 | + std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB; |
| 97 | + std::unordered_set<unsigned> Visited; |
| 98 | + std::queue<unsigned> Queue; |
| 99 | + for (const BinaryBasicBlock &BB : *Function) { |
| 100 | + // Make sure BB.getIndex() is not already in IndexToBB. |
| 101 | + assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end()); |
| 102 | + IndexToBB[BB.getIndex()] = &BB; |
| 103 | + if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) { |
| 104 | + Queue.push(BB.getIndex()); |
| 105 | + Visited.insert(BB.getIndex()); |
| 106 | + } |
| 107 | + } |
| 108 | + while (!Queue.empty()) { |
| 109 | + const unsigned BBIndex = Queue.front(); |
| 110 | + const BinaryBasicBlock *BB = IndexToBB[BBIndex]; |
| 111 | + Queue.pop(); |
| 112 | + auto SuccBIIter = BB->branch_info_begin(); |
| 113 | + for (const BinaryBasicBlock *Succ : BB->successors()) { |
| 114 | + const uint64_t Count = SuccBIIter->Count; |
| 115 | + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) { |
| 116 | + ++SuccBIIter; |
| 117 | + continue; |
| 118 | + } |
| 119 | + if (!Visited.insert(Succ->getIndex()).second) { |
| 120 | + ++SuccBIIter; |
| 121 | + continue; |
| 122 | + } |
| 123 | + Queue.push(Succ->getIndex()); |
| 124 | + ++SuccBIIter; |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + const size_t NumReachableBBs = Visited.size(); |
| 129 | + |
| 130 | + // Loop through Visited, and sum the corresponding BBs' execution counts |
| 131 | + // (ECs). |
| 132 | + size_t SumReachableBBEC = 0; |
| 133 | + for (const unsigned BBIndex : Visited) { |
| 134 | + const BinaryBasicBlock *BB = IndexToBB[BBIndex]; |
| 135 | + SumReachableBBEC += BB->getKnownExecutionCount(); |
| 136 | + } |
| 137 | + |
| 138 | + const size_t NumPosECBBsUnreachableFromEntry = |
| 139 | + NumPosECBBs - NumReachableBBs; |
| 140 | + const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; |
| 141 | + const double FractionECUnreachable = |
| 142 | + (double)SumUnreachableBBEC / SumAllBBEC; |
| 143 | + |
| 144 | + if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { |
| 145 | + OS << "Non-trivial CFG discontinuity observed in function " |
| 146 | + << Function->getPrintName() << "\n"; |
| 147 | + LLVM_DEBUG(Function->dump()); |
| 148 | + } |
| 149 | + |
| 150 | + NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); |
| 151 | + SumECUnreachables.push_back(SumUnreachableBBEC); |
| 152 | + FractionECUnreachables.push_back(FractionECUnreachable); |
| 153 | + } |
| 154 | + |
| 155 | + if (FractionECUnreachables.empty()) |
| 156 | + return; |
| 157 | + |
| 158 | + std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end()); |
| 159 | + const int Rank = int(FractionECUnreachables.size() * 0.95); |
| 160 | + OS << format("top 5%% function CFG discontinuity is %.2lf%%\n", |
| 161 | + FractionECUnreachables[Rank] * 100); |
| 162 | + |
| 163 | + if (opts::Verbosity >= 1) { |
| 164 | + OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n" |
| 165 | + << "distribution of NUM(unreachable POS BBs) among all focal " |
| 166 | + "functions\n"; |
| 167 | + printDistribution(OS, NumUnreachables); |
| 168 | + |
| 169 | + OS << "distribution of SUM_EC(unreachable POS BBs) among all focal " |
| 170 | + "functions\n"; |
| 171 | + printDistribution(OS, SumECUnreachables); |
| 172 | + |
| 173 | + OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " |
| 174 | + "POS BBs))] among all focal functions\n"; |
| 175 | + printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); |
| 176 | + } |
| 177 | +} |
| 178 | + |
| 179 | +void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, |
| 180 | + size_t NumTopFunctions) { |
| 181 | + // Sort the list of functions by execution counts (reverse). |
| 182 | + llvm::sort(ValidFunctions, |
| 183 | + [&](const BinaryFunction *A, const BinaryFunction *B) { |
| 184 | + return A->getKnownExecutionCount() > B->getKnownExecutionCount(); |
| 185 | + }); |
| 186 | + |
| 187 | + const size_t RealNumTopFunctions = |
| 188 | + std::min(NumTopFunctions, ValidFunctions.size()); |
| 189 | + |
| 190 | + iterator_range<function_iterator> Functions( |
| 191 | + ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); |
| 192 | + |
| 193 | + BC.outs() << format("BOLT-INFO: among the hottest %zu functions ", |
| 194 | + RealNumTopFunctions); |
| 195 | + printCFGContinuityStats(BC.outs(), Functions); |
| 196 | + |
| 197 | + // Print more detailed bucketed stats if requested. |
| 198 | + if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { |
| 199 | + const size_t PerBucketSize = RealNumTopFunctions / 5; |
| 200 | + BC.outs() << format( |
| 201 | + "Detailed stats for 5 buckets, each with %zu functions:\n", |
| 202 | + PerBucketSize); |
| 203 | + |
| 204 | + // For each bucket, print the CFG continuity stats of the functions in the |
| 205 | + // bucket. |
| 206 | + for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { |
| 207 | + const size_t StartIndex = BucketIndex * PerBucketSize; |
| 208 | + const size_t EndIndex = StartIndex + PerBucketSize; |
| 209 | + iterator_range<function_iterator> Functions( |
| 210 | + ValidFunctions.begin() + StartIndex, |
| 211 | + ValidFunctions.begin() + EndIndex); |
| 212 | + const size_t MaxFunctionExecutionCount = |
| 213 | + ValidFunctions[StartIndex]->getKnownExecutionCount(); |
| 214 | + const size_t MinFunctionExecutionCount = |
| 215 | + ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); |
| 216 | + BC.outs() << format("----------------\n| Bucket %zu: " |
| 217 | + "|\n----------------\n", |
| 218 | + BucketIndex + 1) |
| 219 | + << format( |
| 220 | + "execution counts of the %zu functions in the bucket: " |
| 221 | + "%zu-%zu\n", |
| 222 | + EndIndex - StartIndex, MinFunctionExecutionCount, |
| 223 | + MaxFunctionExecutionCount); |
| 224 | + printCFGContinuityStats(BC.outs(), Functions); |
| 225 | + } |
| 226 | + } |
| 227 | +} |
| 228 | +} // namespace |
| 229 | + |
| 230 | +bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const { |
| 231 | + if (BF.empty() || !BF.hasValidProfile()) |
| 232 | + return false; |
| 233 | + |
| 234 | + return BinaryFunctionPass::shouldOptimize(BF); |
| 235 | +} |
| 236 | + |
| 237 | +Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) { |
| 238 | + // Create a list of functions with valid profiles. |
| 239 | + FunctionListType ValidFunctions; |
| 240 | + for (const auto &BFI : BC.getBinaryFunctions()) { |
| 241 | + const BinaryFunction *Function = &BFI.second; |
| 242 | + if (PrintContinuityStats::shouldOptimize(*Function)) |
| 243 | + ValidFunctions.push_back(Function); |
| 244 | + } |
| 245 | + if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0) |
| 246 | + return Error::success(); |
| 247 | + |
| 248 | + printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck); |
| 249 | + return Error::success(); |
| 250 | +} |
0 commit comments