|
18 | 18 | #include "llvm/ADT/SmallVector.h"
|
19 | 19 | #include "llvm/ADT/Statistic.h"
|
20 | 20 | #include "llvm/ADT/StringRef.h"
|
| 21 | +#include "llvm/Analysis/MemoryBuiltins.h" |
| 22 | +#include "llvm/Analysis/MemoryProfileInfo.h" |
21 | 23 | #include "llvm/Analysis/ValueTracking.h"
|
22 | 24 | #include "llvm/IR/Constant.h"
|
23 | 25 | #include "llvm/IR/DataLayout.h"
|
| 26 | +#include "llvm/IR/DiagnosticInfo.h" |
24 | 27 | #include "llvm/IR/Function.h"
|
25 | 28 | #include "llvm/IR/GlobalValue.h"
|
26 | 29 | #include "llvm/IR/IRBuilder.h"
|
|
30 | 33 | #include "llvm/IR/Type.h"
|
31 | 34 | #include "llvm/IR/Value.h"
|
32 | 35 | #include "llvm/ProfileData/InstrProf.h"
|
| 36 | +#include "llvm/ProfileData/InstrProfReader.h" |
| 37 | +#include "llvm/Support/BLAKE3.h" |
33 | 38 | #include "llvm/Support/CommandLine.h"
|
34 | 39 | #include "llvm/Support/Debug.h"
|
| 40 | +#include "llvm/Support/HashBuilder.h" |
35 | 41 | #include "llvm/TargetParser/Triple.h"
|
36 | 42 | #include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
37 | 43 | #include "llvm/Transforms/Utils/ModuleUtils.h"
|
| 44 | +#include <map> |
| 45 | +#include <set> |
38 | 46 |
|
39 | 47 | using namespace llvm;
|
| 48 | +using namespace llvm::memprof; |
40 | 49 |
|
41 | 50 | #define DEBUG_TYPE "memprof"
|
42 | 51 |
|
| 52 | +namespace llvm { |
| 53 | +extern cl::opt<bool> PGOWarnMissing; |
| 54 | +extern cl::opt<bool> NoPGOWarnMismatch; |
| 55 | +extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; |
| 56 | +} // namespace llvm |
| 57 | + |
43 | 58 | constexpr int LLVM_MEM_PROFILER_VERSION = 1;
|
44 | 59 |
|
45 | 60 | // Size of memory mapped to a single shadow location.
|
@@ -128,6 +143,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
|
128 | 143 | STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
|
129 | 144 | STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
|
130 | 145 | STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
|
| 146 | +STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); |
131 | 147 |
|
132 | 148 | namespace {
|
133 | 149 |
|
@@ -601,3 +617,251 @@ bool MemProfiler::instrumentFunction(Function &F) {
|
601 | 617 |
|
602 | 618 | return FunctionModified;
|
603 | 619 | }
|
| 620 | + |
| 621 | +static void addCallsiteMetadata(Instruction &I, |
| 622 | + std::vector<uint64_t> &InlinedCallStack, |
| 623 | + LLVMContext &Ctx) { |
| 624 | + I.setMetadata(LLVMContext::MD_callsite, |
| 625 | + buildCallstackMetadata(InlinedCallStack, Ctx)); |
| 626 | +} |
| 627 | + |
| 628 | +static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, |
| 629 | + uint32_t Column) { |
| 630 | + llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> |
| 631 | + HashBuilder; |
| 632 | + HashBuilder.add(Function, LineOffset, Column); |
| 633 | + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
| 634 | + uint64_t Id; |
| 635 | + std::memcpy(&Id, Hash.data(), sizeof(Hash)); |
| 636 | + return Id; |
| 637 | +} |
| 638 | + |
| 639 | +static uint64_t computeStackId(const memprof::Frame &Frame) { |
| 640 | + return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); |
| 641 | +} |
| 642 | + |
| 643 | +static void addCallStack(CallStackTrie &AllocTrie, |
| 644 | + const AllocationInfo *AllocInfo) { |
| 645 | + SmallVector<uint64_t> StackIds; |
| 646 | + for (const auto &StackFrame : AllocInfo->CallStack) |
| 647 | + StackIds.push_back(computeStackId(StackFrame)); |
| 648 | + auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), |
| 649 | + AllocInfo->Info.getAllocCount(), |
| 650 | + AllocInfo->Info.getTotalLifetime()); |
| 651 | + AllocTrie.addCallStack(AllocType, StackIds); |
| 652 | +} |
| 653 | + |
| 654 | +// Helper to compare the InlinedCallStack computed from an instruction's debug |
| 655 | +// info to a list of Frames from profile data (either the allocation data or a |
| 656 | +// callsite). For callsites, the StartIndex to use in the Frame array may be |
| 657 | +// non-zero. |
| 658 | +static bool |
| 659 | +stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, |
| 660 | + ArrayRef<uint64_t> InlinedCallStack, |
| 661 | + unsigned StartIndex = 0) { |
| 662 | + auto StackFrame = ProfileCallStack.begin() + StartIndex; |
| 663 | + auto InlCallStackIter = InlinedCallStack.begin(); |
| 664 | + for (; StackFrame != ProfileCallStack.end() && |
| 665 | + InlCallStackIter != InlinedCallStack.end(); |
| 666 | + ++StackFrame, ++InlCallStackIter) { |
| 667 | + uint64_t StackId = computeStackId(*StackFrame); |
| 668 | + if (StackId != *InlCallStackIter) |
| 669 | + return false; |
| 670 | + } |
| 671 | + // Return true if we found and matched all stack ids from the call |
| 672 | + // instruction. |
| 673 | + return InlCallStackIter == InlinedCallStack.end(); |
| 674 | +} |
| 675 | + |
| 676 | +void llvm::readMemprof(Module &M, Function &F, |
| 677 | + IndexedInstrProfReader *MemProfReader, |
| 678 | + const TargetLibraryInfo &TLI) { |
| 679 | + auto &Ctx = M.getContext(); |
| 680 | + |
| 681 | + auto FuncName = getPGOFuncName(F); |
| 682 | + auto FuncGUID = Function::getGUID(FuncName); |
| 683 | + Expected<memprof::MemProfRecord> MemProfResult = |
| 684 | + MemProfReader->getMemProfRecord(FuncGUID); |
| 685 | + if (Error E = MemProfResult.takeError()) { |
| 686 | + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { |
| 687 | + auto Err = IPE.get(); |
| 688 | + bool SkipWarning = false; |
| 689 | + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName |
| 690 | + << ": "); |
| 691 | + if (Err == instrprof_error::unknown_function) { |
| 692 | + NumOfMemProfMissing++; |
| 693 | + SkipWarning = !PGOWarnMissing; |
| 694 | + LLVM_DEBUG(dbgs() << "unknown function"); |
| 695 | + } else if (Err == instrprof_error::hash_mismatch) { |
| 696 | + SkipWarning = |
| 697 | + NoPGOWarnMismatch || |
| 698 | + (NoPGOWarnMismatchComdatWeak && |
| 699 | + (F.hasComdat() || |
| 700 | + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
| 701 | + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); |
| 702 | + } |
| 703 | + |
| 704 | + if (SkipWarning) |
| 705 | + return; |
| 706 | + |
| 707 | + std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + |
| 708 | + Twine(" Hash = ") + std::to_string(FuncGUID)) |
| 709 | + .str(); |
| 710 | + |
| 711 | + Ctx.diagnose( |
| 712 | + DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
| 713 | + }); |
| 714 | + return; |
| 715 | + } |
| 716 | + |
| 717 | + // Build maps of the location hash to all profile data with that leaf location |
| 718 | + // (allocation info and the callsites). |
| 719 | + std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; |
| 720 | + // For the callsites we need to record the index of the associated frame in |
| 721 | + // the frame array (see comments below where the map entries are added). |
| 722 | + std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> |
| 723 | + LocHashToCallSites; |
| 724 | + const auto MemProfRec = std::move(MemProfResult.get()); |
| 725 | + for (auto &AI : MemProfRec.AllocSites) { |
| 726 | + // Associate the allocation info with the leaf frame. The later matching |
| 727 | + // code will match any inlined call sequences in the IR with a longer prefix |
| 728 | + // of call stack frames. |
| 729 | + uint64_t StackId = computeStackId(AI.CallStack[0]); |
| 730 | + LocHashToAllocInfo[StackId].insert(&AI); |
| 731 | + } |
| 732 | + for (auto &CS : MemProfRec.CallSites) { |
| 733 | + // Need to record all frames from leaf up to and including this function, |
| 734 | + // as any of these may or may not have been inlined at this point. |
| 735 | + unsigned Idx = 0; |
| 736 | + for (auto &StackFrame : CS) { |
| 737 | + uint64_t StackId = computeStackId(StackFrame); |
| 738 | + LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); |
| 739 | + // Once we find this function, we can stop recording. |
| 740 | + if (StackFrame.Function == FuncGUID) |
| 741 | + break; |
| 742 | + } |
| 743 | + assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); |
| 744 | + } |
| 745 | + |
| 746 | + auto GetOffset = [](const DILocation *DIL) { |
| 747 | + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & |
| 748 | + 0xffff; |
| 749 | + }; |
| 750 | + |
| 751 | + // Now walk the instructions, looking up the associated profile data using |
| 752 | + // dbug locations. |
| 753 | + for (auto &BB : F) { |
| 754 | + for (auto &I : BB) { |
| 755 | + if (I.isDebugOrPseudoInst()) |
| 756 | + continue; |
| 757 | + // We are only interested in calls (allocation or interior call stack |
| 758 | + // context calls). |
| 759 | + auto *CI = dyn_cast<CallBase>(&I); |
| 760 | + if (!CI) |
| 761 | + continue; |
| 762 | + auto *CalledFunction = CI->getCalledFunction(); |
| 763 | + if (CalledFunction && CalledFunction->isIntrinsic()) |
| 764 | + continue; |
| 765 | + // List of call stack ids computed from the location hashes on debug |
| 766 | + // locations (leaf to inlined at root). |
| 767 | + std::vector<uint64_t> InlinedCallStack; |
| 768 | + // Was the leaf location found in one of the profile maps? |
| 769 | + bool LeafFound = false; |
| 770 | + // If leaf was found in a map, iterators pointing to its location in both |
| 771 | + // of the maps. It might exist in neither, one, or both (the latter case |
| 772 | + // can happen because we don't currently have discriminators to |
| 773 | + // distinguish the case when a single line/col maps to both an allocation |
| 774 | + // and another callsite). |
| 775 | + std::map<uint64_t, std::set<const AllocationInfo *>>::iterator |
| 776 | + AllocInfoIter; |
| 777 | + std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, |
| 778 | + unsigned>>>::iterator CallSitesIter; |
| 779 | + for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; |
| 780 | + DIL = DIL->getInlinedAt()) { |
| 781 | + // Use C++ linkage name if possible. Need to compile with |
| 782 | + // -fdebug-info-for-profiling to get linkage name. |
| 783 | + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); |
| 784 | + if (Name.empty()) |
| 785 | + Name = DIL->getScope()->getSubprogram()->getName(); |
| 786 | + auto CalleeGUID = Function::getGUID(Name); |
| 787 | + auto StackId = |
| 788 | + computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); |
| 789 | + // LeafFound will only be false on the first iteration, since we either |
| 790 | + // set it true or break out of the loop below. |
| 791 | + if (!LeafFound) { |
| 792 | + AllocInfoIter = LocHashToAllocInfo.find(StackId); |
| 793 | + CallSitesIter = LocHashToCallSites.find(StackId); |
| 794 | + // Check if the leaf is in one of the maps. If not, no need to look |
| 795 | + // further at this call. |
| 796 | + if (AllocInfoIter == LocHashToAllocInfo.end() && |
| 797 | + CallSitesIter == LocHashToCallSites.end()) |
| 798 | + break; |
| 799 | + LeafFound = true; |
| 800 | + } |
| 801 | + InlinedCallStack.push_back(StackId); |
| 802 | + } |
| 803 | + // If leaf not in either of the maps, skip inst. |
| 804 | + if (!LeafFound) |
| 805 | + continue; |
| 806 | + |
| 807 | + // First add !memprof metadata from allocation info, if we found the |
| 808 | + // instruction's leaf location in that map, and if the rest of the |
| 809 | + // instruction's locations match the prefix Frame locations on an |
| 810 | + // allocation context with the same leaf. |
| 811 | + if (AllocInfoIter != LocHashToAllocInfo.end()) { |
| 812 | + // Only consider allocations via new, to reduce unnecessary metadata, |
| 813 | + // since those are the only allocations that will be targeted initially. |
| 814 | + if (!isNewLikeFn(CI, &TLI)) |
| 815 | + continue; |
| 816 | + // We may match this instruction's location list to multiple MIB |
| 817 | + // contexts. Add them to a Trie specialized for trimming the contexts to |
| 818 | + // the minimal needed to disambiguate contexts with unique behavior. |
| 819 | + CallStackTrie AllocTrie; |
| 820 | + for (auto *AllocInfo : AllocInfoIter->second) { |
| 821 | + // Check the full inlined call stack against this one. |
| 822 | + // If we found and thus matched all frames on the call, include |
| 823 | + // this MIB. |
| 824 | + if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, |
| 825 | + InlinedCallStack)) |
| 826 | + addCallStack(AllocTrie, AllocInfo); |
| 827 | + } |
| 828 | + // We might not have matched any to the full inlined call stack. |
| 829 | + // But if we did, create and attach metadata, or a function attribute if |
| 830 | + // all contexts have identical profiled behavior. |
| 831 | + if (!AllocTrie.empty()) { |
| 832 | + // MemprofMDAttached will be false if a function attribute was |
| 833 | + // attached. |
| 834 | + bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); |
| 835 | + assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); |
| 836 | + if (MemprofMDAttached) { |
| 837 | + // Add callsite metadata for the instruction's location list so that |
| 838 | + // it simpler later on to identify which part of the MIB contexts |
| 839 | + // are from this particular instruction (including during inlining, |
| 840 | + // when the callsite metdata will be updated appropriately). |
| 841 | + // FIXME: can this be changed to strip out the matching stack |
| 842 | + // context ids from the MIB contexts and not add any callsite |
| 843 | + // metadata here to save space? |
| 844 | + addCallsiteMetadata(I, InlinedCallStack, Ctx); |
| 845 | + } |
| 846 | + } |
| 847 | + continue; |
| 848 | + } |
| 849 | + |
| 850 | + // Otherwise, add callsite metadata. If we reach here then we found the |
| 851 | + // instruction's leaf location in the callsites map and not the allocation |
| 852 | + // map. |
| 853 | + assert(CallSitesIter != LocHashToCallSites.end()); |
| 854 | + for (auto CallStackIdx : CallSitesIter->second) { |
| 855 | + // If we found and thus matched all frames on the call, create and |
| 856 | + // attach call stack metadata. |
| 857 | + if (stackFrameIncludesInlinedCallStack( |
| 858 | + *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { |
| 859 | + addCallsiteMetadata(I, InlinedCallStack, Ctx); |
| 860 | + // Only need to find one with a matching call stack and add a single |
| 861 | + // callsite metadata. |
| 862 | + break; |
| 863 | + } |
| 864 | + } |
| 865 | + } |
| 866 | + } |
| 867 | +} |
0 commit comments