|
18 | 18 | #include "llvm/ADT/SmallVector.h"
|
19 | 19 | #include "llvm/ADT/Statistic.h"
|
20 | 20 | #include "llvm/ADT/StringRef.h"
|
21 |
| -#include "llvm/Analysis/MemoryBuiltins.h" |
22 |
| -#include "llvm/Analysis/MemoryProfileInfo.h" |
23 | 21 | #include "llvm/Analysis/ValueTracking.h"
|
24 | 22 | #include "llvm/IR/Constant.h"
|
25 | 23 | #include "llvm/IR/DataLayout.h"
|
26 |
| -#include "llvm/IR/DiagnosticInfo.h" |
27 | 24 | #include "llvm/IR/Function.h"
|
28 | 25 | #include "llvm/IR/GlobalValue.h"
|
29 | 26 | #include "llvm/IR/IRBuilder.h"
|
|
33 | 30 | #include "llvm/IR/Type.h"
|
34 | 31 | #include "llvm/IR/Value.h"
|
35 | 32 | #include "llvm/ProfileData/InstrProf.h"
|
36 |
| -#include "llvm/ProfileData/InstrProfReader.h" |
37 |
| -#include "llvm/Support/BLAKE3.h" |
38 | 33 | #include "llvm/Support/CommandLine.h"
|
39 | 34 | #include "llvm/Support/Debug.h"
|
40 |
| -#include "llvm/Support/HashBuilder.h" |
41 | 35 | #include "llvm/TargetParser/Triple.h"
|
42 | 36 | #include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
43 | 37 | #include "llvm/Transforms/Utils/ModuleUtils.h"
|
44 |
| -#include <map> |
45 |
| -#include <set> |
46 | 38 |
|
47 | 39 | using namespace llvm;
|
48 |
| -using namespace llvm::memprof; |
49 | 40 |
|
50 | 41 | #define DEBUG_TYPE "memprof"
|
51 | 42 |
|
52 |
| -namespace llvm { |
53 |
| -extern cl::opt<bool> PGOWarnMissing; |
54 |
| -extern cl::opt<bool> NoPGOWarnMismatch; |
55 |
| -extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; |
56 |
| -} // namespace llvm |
57 |
| - |
58 | 43 | constexpr int LLVM_MEM_PROFILER_VERSION = 1;
|
59 | 44 |
|
60 | 45 | // Size of memory mapped to a single shadow location.
|
@@ -143,7 +128,6 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
|
143 | 128 | STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
|
144 | 129 | STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
|
145 | 130 | STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
|
146 |
| -STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); |
147 | 131 |
|
148 | 132 | namespace {
|
149 | 133 |
|
@@ -617,251 +601,3 @@ bool MemProfiler::instrumentFunction(Function &F) {
|
617 | 601 |
|
618 | 602 | return FunctionModified;
|
619 | 603 | }
|
620 |
| - |
621 |
| -static void addCallsiteMetadata(Instruction &I, |
622 |
| - std::vector<uint64_t> &InlinedCallStack, |
623 |
| - LLVMContext &Ctx) { |
624 |
| - I.setMetadata(LLVMContext::MD_callsite, |
625 |
| - buildCallstackMetadata(InlinedCallStack, Ctx)); |
626 |
| -} |
627 |
| - |
628 |
| -static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, |
629 |
| - uint32_t Column) { |
630 |
| - llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> |
631 |
| - HashBuilder; |
632 |
| - HashBuilder.add(Function, LineOffset, Column); |
633 |
| - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
634 |
| - uint64_t Id; |
635 |
| - std::memcpy(&Id, Hash.data(), sizeof(Hash)); |
636 |
| - return Id; |
637 |
| -} |
638 |
| - |
639 |
| -static uint64_t computeStackId(const memprof::Frame &Frame) { |
640 |
| - return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); |
641 |
| -} |
642 |
| - |
643 |
| -static void addCallStack(CallStackTrie &AllocTrie, |
644 |
| - const AllocationInfo *AllocInfo) { |
645 |
| - SmallVector<uint64_t> StackIds; |
646 |
| - for (const auto &StackFrame : AllocInfo->CallStack) |
647 |
| - StackIds.push_back(computeStackId(StackFrame)); |
648 |
| - auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), |
649 |
| - AllocInfo->Info.getAllocCount(), |
650 |
| - AllocInfo->Info.getTotalLifetime()); |
651 |
| - AllocTrie.addCallStack(AllocType, StackIds); |
652 |
| -} |
653 |
| - |
654 |
| -// Helper to compare the InlinedCallStack computed from an instruction's debug |
655 |
| -// info to a list of Frames from profile data (either the allocation data or a |
656 |
| -// callsite). For callsites, the StartIndex to use in the Frame array may be |
657 |
| -// non-zero. |
658 |
| -static bool |
659 |
| -stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, |
660 |
| - ArrayRef<uint64_t> InlinedCallStack, |
661 |
| - unsigned StartIndex = 0) { |
662 |
| - auto StackFrame = ProfileCallStack.begin() + StartIndex; |
663 |
| - auto InlCallStackIter = InlinedCallStack.begin(); |
664 |
| - for (; StackFrame != ProfileCallStack.end() && |
665 |
| - InlCallStackIter != InlinedCallStack.end(); |
666 |
| - ++StackFrame, ++InlCallStackIter) { |
667 |
| - uint64_t StackId = computeStackId(*StackFrame); |
668 |
| - if (StackId != *InlCallStackIter) |
669 |
| - return false; |
670 |
| - } |
671 |
| - // Return true if we found and matched all stack ids from the call |
672 |
| - // instruction. |
673 |
| - return InlCallStackIter == InlinedCallStack.end(); |
674 |
| -} |
675 |
| - |
676 |
| -void llvm::readMemprof(Module &M, Function &F, |
677 |
| - IndexedInstrProfReader *MemProfReader, |
678 |
| - const TargetLibraryInfo &TLI) { |
679 |
| - auto &Ctx = M.getContext(); |
680 |
| - |
681 |
| - auto FuncName = getPGOFuncName(F); |
682 |
| - auto FuncGUID = Function::getGUID(FuncName); |
683 |
| - Expected<memprof::MemProfRecord> MemProfResult = |
684 |
| - MemProfReader->getMemProfRecord(FuncGUID); |
685 |
| - if (Error E = MemProfResult.takeError()) { |
686 |
| - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { |
687 |
| - auto Err = IPE.get(); |
688 |
| - bool SkipWarning = false; |
689 |
| - LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName |
690 |
| - << ": "); |
691 |
| - if (Err == instrprof_error::unknown_function) { |
692 |
| - NumOfMemProfMissing++; |
693 |
| - SkipWarning = !PGOWarnMissing; |
694 |
| - LLVM_DEBUG(dbgs() << "unknown function"); |
695 |
| - } else if (Err == instrprof_error::hash_mismatch) { |
696 |
| - SkipWarning = |
697 |
| - NoPGOWarnMismatch || |
698 |
| - (NoPGOWarnMismatchComdatWeak && |
699 |
| - (F.hasComdat() || |
700 |
| - F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
701 |
| - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); |
702 |
| - } |
703 |
| - |
704 |
| - if (SkipWarning) |
705 |
| - return; |
706 |
| - |
707 |
| - std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + |
708 |
| - Twine(" Hash = ") + std::to_string(FuncGUID)) |
709 |
| - .str(); |
710 |
| - |
711 |
| - Ctx.diagnose( |
712 |
| - DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
713 |
| - }); |
714 |
| - return; |
715 |
| - } |
716 |
| - |
717 |
| - // Build maps of the location hash to all profile data with that leaf location |
718 |
| - // (allocation info and the callsites). |
719 |
| - std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; |
720 |
| - // For the callsites we need to record the index of the associated frame in |
721 |
| - // the frame array (see comments below where the map entries are added). |
722 |
| - std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> |
723 |
| - LocHashToCallSites; |
724 |
| - const auto MemProfRec = std::move(MemProfResult.get()); |
725 |
| - for (auto &AI : MemProfRec.AllocSites) { |
726 |
| - // Associate the allocation info with the leaf frame. The later matching |
727 |
| - // code will match any inlined call sequences in the IR with a longer prefix |
728 |
| - // of call stack frames. |
729 |
| - uint64_t StackId = computeStackId(AI.CallStack[0]); |
730 |
| - LocHashToAllocInfo[StackId].insert(&AI); |
731 |
| - } |
732 |
| - for (auto &CS : MemProfRec.CallSites) { |
733 |
| - // Need to record all frames from leaf up to and including this function, |
734 |
| - // as any of these may or may not have been inlined at this point. |
735 |
| - unsigned Idx = 0; |
736 |
| - for (auto &StackFrame : CS) { |
737 |
| - uint64_t StackId = computeStackId(StackFrame); |
738 |
| - LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); |
739 |
| - // Once we find this function, we can stop recording. |
740 |
| - if (StackFrame.Function == FuncGUID) |
741 |
| - break; |
742 |
| - } |
743 |
| - assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); |
744 |
| - } |
745 |
| - |
746 |
| - auto GetOffset = [](const DILocation *DIL) { |
747 |
| - return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & |
748 |
| - 0xffff; |
749 |
| - }; |
750 |
| - |
751 |
| - // Now walk the instructions, looking up the associated profile data using |
752 |
| - // dbug locations. |
753 |
| - for (auto &BB : F) { |
754 |
| - for (auto &I : BB) { |
755 |
| - if (I.isDebugOrPseudoInst()) |
756 |
| - continue; |
757 |
| - // We are only interested in calls (allocation or interior call stack |
758 |
| - // context calls). |
759 |
| - auto *CI = dyn_cast<CallBase>(&I); |
760 |
| - if (!CI) |
761 |
| - continue; |
762 |
| - auto *CalledFunction = CI->getCalledFunction(); |
763 |
| - if (CalledFunction && CalledFunction->isIntrinsic()) |
764 |
| - continue; |
765 |
| - // List of call stack ids computed from the location hashes on debug |
766 |
| - // locations (leaf to inlined at root). |
767 |
| - std::vector<uint64_t> InlinedCallStack; |
768 |
| - // Was the leaf location found in one of the profile maps? |
769 |
| - bool LeafFound = false; |
770 |
| - // If leaf was found in a map, iterators pointing to its location in both |
771 |
| - // of the maps. It might exist in neither, one, or both (the latter case |
772 |
| - // can happen because we don't currently have discriminators to |
773 |
| - // distinguish the case when a single line/col maps to both an allocation |
774 |
| - // and another callsite). |
775 |
| - std::map<uint64_t, std::set<const AllocationInfo *>>::iterator |
776 |
| - AllocInfoIter; |
777 |
| - std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, |
778 |
| - unsigned>>>::iterator CallSitesIter; |
779 |
| - for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; |
780 |
| - DIL = DIL->getInlinedAt()) { |
781 |
| - // Use C++ linkage name if possible. Need to compile with |
782 |
| - // -fdebug-info-for-profiling to get linkage name. |
783 |
| - StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); |
784 |
| - if (Name.empty()) |
785 |
| - Name = DIL->getScope()->getSubprogram()->getName(); |
786 |
| - auto CalleeGUID = Function::getGUID(Name); |
787 |
| - auto StackId = |
788 |
| - computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); |
789 |
| - // LeafFound will only be false on the first iteration, since we either |
790 |
| - // set it true or break out of the loop below. |
791 |
| - if (!LeafFound) { |
792 |
| - AllocInfoIter = LocHashToAllocInfo.find(StackId); |
793 |
| - CallSitesIter = LocHashToCallSites.find(StackId); |
794 |
| - // Check if the leaf is in one of the maps. If not, no need to look |
795 |
| - // further at this call. |
796 |
| - if (AllocInfoIter == LocHashToAllocInfo.end() && |
797 |
| - CallSitesIter == LocHashToCallSites.end()) |
798 |
| - break; |
799 |
| - LeafFound = true; |
800 |
| - } |
801 |
| - InlinedCallStack.push_back(StackId); |
802 |
| - } |
803 |
| - // If leaf not in either of the maps, skip inst. |
804 |
| - if (!LeafFound) |
805 |
| - continue; |
806 |
| - |
807 |
| - // First add !memprof metadata from allocation info, if we found the |
808 |
| - // instruction's leaf location in that map, and if the rest of the |
809 |
| - // instruction's locations match the prefix Frame locations on an |
810 |
| - // allocation context with the same leaf. |
811 |
| - if (AllocInfoIter != LocHashToAllocInfo.end()) { |
812 |
| - // Only consider allocations via new, to reduce unnecessary metadata, |
813 |
| - // since those are the only allocations that will be targeted initially. |
814 |
| - if (!isNewLikeFn(CI, &TLI)) |
815 |
| - continue; |
816 |
| - // We may match this instruction's location list to multiple MIB |
817 |
| - // contexts. Add them to a Trie specialized for trimming the contexts to |
818 |
| - // the minimal needed to disambiguate contexts with unique behavior. |
819 |
| - CallStackTrie AllocTrie; |
820 |
| - for (auto *AllocInfo : AllocInfoIter->second) { |
821 |
| - // Check the full inlined call stack against this one. |
822 |
| - // If we found and thus matched all frames on the call, include |
823 |
| - // this MIB. |
824 |
| - if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, |
825 |
| - InlinedCallStack)) |
826 |
| - addCallStack(AllocTrie, AllocInfo); |
827 |
| - } |
828 |
| - // We might not have matched any to the full inlined call stack. |
829 |
| - // But if we did, create and attach metadata, or a function attribute if |
830 |
| - // all contexts have identical profiled behavior. |
831 |
| - if (!AllocTrie.empty()) { |
832 |
| - // MemprofMDAttached will be false if a function attribute was |
833 |
| - // attached. |
834 |
| - bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); |
835 |
| - assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); |
836 |
| - if (MemprofMDAttached) { |
837 |
| - // Add callsite metadata for the instruction's location list so that |
838 |
| - // it simpler later on to identify which part of the MIB contexts |
839 |
| - // are from this particular instruction (including during inlining, |
840 |
| - // when the callsite metdata will be updated appropriately). |
841 |
| - // FIXME: can this be changed to strip out the matching stack |
842 |
| - // context ids from the MIB contexts and not add any callsite |
843 |
| - // metadata here to save space? |
844 |
| - addCallsiteMetadata(I, InlinedCallStack, Ctx); |
845 |
| - } |
846 |
| - } |
847 |
| - continue; |
848 |
| - } |
849 |
| - |
850 |
| - // Otherwise, add callsite metadata. If we reach here then we found the |
851 |
| - // instruction's leaf location in the callsites map and not the allocation |
852 |
| - // map. |
853 |
| - assert(CallSitesIter != LocHashToCallSites.end()); |
854 |
| - for (auto CallStackIdx : CallSitesIter->second) { |
855 |
| - // If we found and thus matched all frames on the call, create and |
856 |
| - // attach call stack metadata. |
857 |
| - if (stackFrameIncludesInlinedCallStack( |
858 |
| - *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { |
859 |
| - addCallsiteMetadata(I, InlinedCallStack, Ctx); |
860 |
| - // Only need to find one with a matching call stack and add a single |
861 |
| - // callsite metadata. |
862 |
| - break; |
863 |
| - } |
864 |
| - } |
865 |
| - } |
866 |
| - } |
867 |
| -} |
0 commit comments