diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h new file mode 100644 index 0000000000000..84133a433170f --- /dev/null +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -0,0 +1,204 @@ +//===- CodeGenData.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for codegen data that has stable summary which +// can be used to optimize the code in the subsequent codegen. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CGDATA_CODEGENDATA_H +#define LLVM_CGDATA_CODEGENDATA_H + +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/CGData/OutlinedHashTree.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/IR/Module.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/Triple.h" +#include + +namespace llvm { + +enum CGDataSectKind { +#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, +#include "llvm/CGData/CodeGenData.inc" +}; + +std::string getCodeGenDataSectionName(CGDataSectKind CGSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo = true); + +enum class CGDataKind { + Unknown = 0x0, + // A function outlining info. + FunctionOutlinedHashTree = 0x1, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) +}; + +const std::error_category &cgdata_category(); + +enum class cgdata_error { + success = 0, + eof, + bad_magic, + bad_header, + empty_cgdata, + malformed, + unsupported_version, +}; + +inline std::error_code make_error_code(cgdata_error E) { + return std::error_code(static_cast(E), cgdata_category()); +} + +class CGDataError : public ErrorInfo { +public: + CGDataError(cgdata_error Err, const Twine &ErrStr = Twine()) + : Err(Err), Msg(ErrStr.str()) { + assert(Err != cgdata_error::success && "Not an error"); + } + + std::string message() const override; + + void log(raw_ostream &OS) const override { OS << message(); } + + std::error_code convertToErrorCode() const override { + return make_error_code(Err); + } + + cgdata_error get() const { return Err; } + const std::string &getMessage() const { return Msg; } + + /// Consume an Error and return the raw enum value contained within it, and + /// the optional error message. The Error must either be a success value, or + /// contain a single CGDataError. + static std::pair take(Error E) { + auto Err = cgdata_error::success; + std::string Msg; + handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) { + assert(Err == cgdata_error::success && "Multiple errors encountered"); + Err = IPE.get(); + Msg = IPE.getMessage(); + }); + return {Err, Msg}; + } + + static char ID; + +private: + cgdata_error Err; + std::string Msg; +}; + +enum CGDataMode { + None, + Read, + Write, +}; + +class CodeGenData { + /// Global outlined hash tree that has oulined hash sequences across modules. + std::unique_ptr PublishedHashTree; + + /// This flag is set when -fcodegen-data-generate is passed. + /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds. + bool EmitCGData; + + /// This is a singleton instance which is thread-safe. Unlike profile data + /// which is largely function-based, codegen data describes the whole module. + /// Therefore, this can be initialized once, and can be used across modules + /// instead of constructing the same one for each codegen backend. + static std::unique_ptr Instance; + static std::once_flag OnceFlag; + + CodeGenData() = default; + +public: + ~CodeGenData() = default; + + static CodeGenData &getInstance(); + + /// Returns true if we have a valid outlined hash tree. + bool hasOutlinedHashTree() { + return PublishedHashTree && !PublishedHashTree->empty(); + } + + /// Returns the outlined hash tree. This can be globally used in a read-only + /// manner. + const OutlinedHashTree *getOutlinedHashTree() { + return PublishedHashTree.get(); + } + + /// Returns true if we should write codegen data. + bool emitCGData() { return EmitCGData; } + + /// Publish the (globally) merged or read outlined hash tree. + void publishOutlinedHashTree(std::unique_ptr HashTree) { + PublishedHashTree = std::move(HashTree); + // Ensure we disable emitCGData as we do not want to read and write both. + EmitCGData = false; + } +}; + +namespace cgdata { + +inline bool hasOutlinedHashTree() { + return CodeGenData::getInstance().hasOutlinedHashTree(); +} + +inline const OutlinedHashTree *getOutlinedHashTree() { + return CodeGenData::getInstance().getOutlinedHashTree(); +} + +inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } + +inline void +publishOutlinedHashTree(std::unique_ptr HashTree) { + CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); +} + +void warn(Error E, StringRef Whence = ""); +void warn(Twine Message, std::string Whence = "", std::string Hint = ""); + +} // end namespace cgdata + +namespace IndexedCGData { + +// A signature for data validation, representing "\xffcgdata\x81" in +// little-endian order +const uint64_t Magic = 0x81617461646763ff; + +enum CGDataVersion { + // Version 1 is the first version. This version supports the outlined + // hash tree. + Version1 = 1, + CurrentVersion = CG_DATA_INDEX_VERSION +}; +const uint64_t Version = CGDataVersion::CurrentVersion; + +struct Header { + uint64_t Magic; + uint32_t Version; + uint32_t DataKind; + uint64_t OutlinedHashTreeOffset; + + // New fields should only be added at the end to ensure that the size + // computation is correct. The methods below need to be updated to ensure that + // the new field is read correctly. + + // Reads a header struct from the buffer. + static Expected
readFromBuffer(const unsigned char *Curr); +}; + +} // end namespace IndexedCGData + +} // end namespace llvm + +#endif // LLVM_CODEGEN_PREPARE_H diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc new file mode 100644 index 0000000000000..08ec14ea051a0 --- /dev/null +++ b/llvm/include/llvm/CGData/CodeGenData.inc @@ -0,0 +1,46 @@ +/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across compiler, host tools (reader/writer) + * to support codegen data. + * +\*===----------------------------------------------------------------------===*/ + +/* Helper macros. */ +#define CG_DATA_SIMPLE_QUOTE(x) #x +#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x) + +#ifdef CG_DATA_SECT_ENTRY +#define CG_DATA_DEFINED +CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), + CG_DATA_OUTLINE_COFF, "__DATA,") + +#undef CG_DATA_SECT_ENTRY +#endif + +/* section name strings common to all targets other + than WIN32 */ +#define CG_DATA_OUTLINE_COMMON __llvm_outline +/* Since cg data sections are not allocated, we don't need to + * access them at runtime. + */ +#define CG_DATA_OUTLINE_COFF ".loutline" + +#ifdef _WIN32 +/* Runtime section names and name strings. */ +#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF + +#else +/* Runtime section names and name strings. */ +#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) + +#endif + +/* Indexed codegen data format version (start from 1). */ +#define CG_DATA_INDEX_VERSION 1 diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h new file mode 100644 index 0000000000000..1ee4bfbe48023 --- /dev/null +++ b/llvm/include/llvm/CGData/CodeGenDataReader.h @@ -0,0 +1,154 @@ +//===- CodeGenDataReader.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading codegen data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CGDATA_CODEGENDATAREADER_H +#define LLVM_CGDATA_CODEGENDATAREADER_H + +#include "llvm/CGData/CodeGenData.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/VirtualFileSystem.h" + +namespace llvm { + +class CodeGenDataReader { + cgdata_error LastError = cgdata_error::success; + std::string LastErrorMsg; + +public: + CodeGenDataReader() = default; + virtual ~CodeGenDataReader() = default; + + /// Read the header. Required before reading first record. + virtual Error read() = 0; + /// Return the codegen data version. + virtual uint32_t getVersion() const = 0; + /// Return the codegen data kind. + virtual CGDataKind getDataKind() const = 0; + /// Return true if the data has an outlined hash tree. + virtual bool hasOutlinedHashTree() const = 0; + /// Return the outlined hash tree that is released from the reader. + std::unique_ptr releaseOutlinedHashTree() { + return std::move(HashTreeRecord.HashTree); + } + + /// Factory method to create an appropriately typed reader for the given + /// codegen data file path and file system. + static Expected> + create(const Twine &Path, vfs::FileSystem &FS); + + /// Factory method to create an appropriately typed reader for the given + /// memory buffer. + static Expected> + create(std::unique_ptr Buffer); + + /// Extract the cgdata embedded in sections from the given object file and + /// merge them into the GlobalOutlineRecord. This is a static helper that + /// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds. + static Error mergeFromObjectFile(const object::ObjectFile *Obj, + OutlinedHashTreeRecord &GlobalOutlineRecord); + +protected: + /// The outlined hash tree that has been read. When it's released by + /// releaseOutlinedHashTree(), it's no longer valid. + OutlinedHashTreeRecord HashTreeRecord; + + /// Set the current error and return same. + Error error(cgdata_error Err, const std::string &ErrMsg = "") { + LastError = Err; + LastErrorMsg = ErrMsg; + if (Err == cgdata_error::success) + return Error::success(); + return make_error(Err, ErrMsg); + } + + Error error(Error &&E) { + handleAllErrors(std::move(E), [&](const CGDataError &IPE) { + LastError = IPE.get(); + LastErrorMsg = IPE.getMessage(); + }); + return make_error(LastError, LastErrorMsg); + } + + /// Clear the current error and return a successful one. + Error success() { return error(cgdata_error::success); } +}; + +class IndexedCodeGenDataReader : public CodeGenDataReader { + /// The codegen data file contents. + std::unique_ptr DataBuffer; + /// The header + IndexedCGData::Header Header; + +public: + IndexedCodeGenDataReader(std::unique_ptr DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + IndexedCodeGenDataReader(const IndexedCodeGenDataReader &) = delete; + IndexedCodeGenDataReader & + operator=(const IndexedCodeGenDataReader &) = delete; + + /// Return true if the given buffer is in binary codegen data format. + static bool hasFormat(const MemoryBuffer &Buffer); + /// Read the contents including the header. + Error read() override; + /// Return the codegen data version. + uint32_t getVersion() const override { return Header.Version; } + /// Return the codegen data kind. + CGDataKind getDataKind() const override { + return static_cast(Header.DataKind); + } + /// Return true if the header indicates the data has an outlined hash tree. + /// This does not mean that the data is still available. + bool hasOutlinedHashTree() const override { + return Header.DataKind & + static_cast(CGDataKind::FunctionOutlinedHashTree); + } +}; + +/// This format is a simple text format that's suitable for test data. +/// The header is a custom format starting with `:` per line to indicate which +/// codegen data is recorded. `#` is used to indicate a comment. +/// The subsequent data is a YAML format per each codegen data in order. +/// Currently, it only has a function outlined hash tree. +class TextCodeGenDataReader : public CodeGenDataReader { + /// The codegen data file contents. + std::unique_ptr DataBuffer; + /// Iterator over the profile data. + line_iterator Line; + /// Describe the kind of the codegen data. + CGDataKind DataKind = CGDataKind::Unknown; + +public: + TextCodeGenDataReader(std::unique_ptr DataBuffer_) + : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} + TextCodeGenDataReader(const TextCodeGenDataReader &) = delete; + TextCodeGenDataReader &operator=(const TextCodeGenDataReader &) = delete; + + /// Return true if the given buffer is in text codegen data format. + static bool hasFormat(const MemoryBuffer &Buffer); + /// Read the contents including the header. + Error read() override; + /// Text format does not have version, so return 0. + uint32_t getVersion() const override { return 0; } + /// Return the codegen data kind. + CGDataKind getDataKind() const override { return DataKind; } + /// Return true if the header indicates the data has an outlined hash tree. + /// This does not mean that the data is still available. + bool hasOutlinedHashTree() const override { + return static_cast(DataKind) & + static_cast(CGDataKind::FunctionOutlinedHashTree); + } +}; + +} // end namespace llvm + +#endif // LLVM_CGDATA_CODEGENDATAREADER_H diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h new file mode 100644 index 0000000000000..5cb8377b1d07e --- /dev/null +++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h @@ -0,0 +1,100 @@ +//===- CodeGenDataWriter.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing codegen data. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CGDATA_CODEGENDATAWRITER_H +#define LLVM_CGDATA_CODEGENDATAWRITER_H + +#include "llvm/CGData/CodeGenData.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/Error.h" + +namespace llvm { + +/// A struct to define how the data stream should be patched. +struct CGDataPatchItem { + uint64_t Pos; // Where to patch. + uint64_t *D; // Pointer to an array of source data. + int N; // Number of elements in \c D array. +}; + +/// A wrapper class to abstract writer stream with support of bytes +/// back patching. +class CGDataOStream { +public: + CGDataOStream(raw_fd_ostream &FD) + : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} + CGDataOStream(raw_string_ostream &STR) + : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} + + uint64_t tell() { return OS.tell(); } + void write(uint64_t V) { LE.write(V); } + void write32(uint32_t V) { LE.write(V); } + void write8(uint8_t V) { LE.write(V); } + + // \c patch can only be called when all data is written and flushed. + // For raw_string_ostream, the patch is done on the target string + // directly and it won't be reflected in the stream's internal buffer. + void patch(ArrayRef P); + + // If \c OS is an instance of \c raw_fd_ostream, this field will be + // true. Otherwise, \c OS will be an raw_string_ostream. + bool IsFDOStream; + raw_ostream &OS; + support::endian::Writer LE; +}; + +class CodeGenDataWriter { + /// The outlined hash tree to be written. + OutlinedHashTreeRecord HashTreeRecord; + + /// A bit mask describing the kind of the codegen data. + CGDataKind DataKind = CGDataKind::Unknown; + +public: + CodeGenDataWriter() = default; + ~CodeGenDataWriter() = default; + + /// Add the outlined hash tree record. The input Record is released. + void addRecord(OutlinedHashTreeRecord &Record); + + /// Write the codegen data to \c OS + Error write(raw_fd_ostream &OS); + + /// Write the codegen data in text format to \c OS + Error writeText(raw_fd_ostream &OS); + + /// Return the attributes of the current CGData. + CGDataKind getCGDataKind() const { return DataKind; } + + /// Return true if the header indicates the data has an outlined hash tree. + bool hasOutlinedHashTree() const { + return static_cast(DataKind) & + static_cast(CGDataKind::FunctionOutlinedHashTree); + } + +private: + /// The offset of the outlined hash tree in the file. + uint64_t OutlinedHashTreeOffset; + + /// Write the codegen data header to \c COS + Error writeHeader(CGDataOStream &COS); + + /// Write the codegen data header in text to \c OS + Error writeHeaderText(raw_fd_ostream &OS); + + Error writeImpl(CGDataOStream &COS); +}; + +} // end namespace llvm + +#endif // LLVM_CGDATA_CODEGENDATAWRITER_H diff --git a/llvm/include/llvm/CodeGenData/OutlinedHashTree.h b/llvm/include/llvm/CGData/OutlinedHashTree.h similarity index 97% rename from llvm/include/llvm/CodeGenData/OutlinedHashTree.h rename to llvm/include/llvm/CGData/OutlinedHashTree.h index 2c8a9288f8a8c..9ab36df863eef 100644 --- a/llvm/include/llvm/CodeGenData/OutlinedHashTree.h +++ b/llvm/include/llvm/CGData/OutlinedHashTree.h @@ -12,8 +12,8 @@ // //===---------------------------------------------------------------------===// -#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREE_H -#define LLVM_CODEGENDATA_OUTLINEDHASHTREE_H +#ifndef LLVM_CGDATA_OUTLINEDHASHTREE_H +#define LLVM_CGDATA_OUTLINEDHASHTREE_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StableHashing.h" diff --git a/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h b/llvm/include/llvm/CGData/OutlinedHashTreeRecord.h similarity index 92% rename from llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h rename to llvm/include/llvm/CGData/OutlinedHashTreeRecord.h index de397c9ca5e70..dd599ff6a7a62 100644 --- a/llvm/include/llvm/CodeGenData/OutlinedHashTreeRecord.h +++ b/llvm/include/llvm/CGData/OutlinedHashTreeRecord.h @@ -13,10 +13,10 @@ // //===---------------------------------------------------------------------===// -#ifndef LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H -#define LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H +#ifndef LLVM_CGDATA_OUTLINEDHASHTREERECORD_H +#define LLVM_CGDATA_OUTLINEDHASHTREERECORD_H -#include "llvm/CodeGenData/OutlinedHashTree.h" +#include "llvm/CGData/OutlinedHashTree.h" namespace llvm { @@ -72,4 +72,4 @@ struct OutlinedHashTreeRecord { } // end namespace llvm -#endif // LLVM_CODEGENDATA_OUTLINEDHASHTREERECORD_H +#endif // LLVM_CGDATA_OUTLINEDHASHTREERECORD_H diff --git a/llvm/lib/CodeGenData/CMakeLists.txt b/llvm/lib/CGData/CMakeLists.txt similarity index 52% rename from llvm/lib/CodeGenData/CMakeLists.txt rename to llvm/lib/CGData/CMakeLists.txt index f9d107f52a715..ff1aab920e7a8 100644 --- a/llvm/lib/CodeGenData/CMakeLists.txt +++ b/llvm/lib/CGData/CMakeLists.txt @@ -1,9 +1,12 @@ -add_llvm_component_library(LLVMCodeGenData +add_llvm_component_library(LLVMCGData + CodeGenData.cpp + CodeGenDataReader.cpp + CodeGenDataWriter.cpp OutlinedHashTree.cpp OutlinedHashTreeRecord.cpp ADDITIONAL_HEADER_DIRS - ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGenData + ${LLVM_MAIN_INCLUDE_DIR}/llvm/CGData DEPENDS intrinsics_gen diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp new file mode 100644 index 0000000000000..9dd4b1674e094 --- /dev/null +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -0,0 +1,196 @@ +//===-- CodeGenData.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for codegen data that has stable summary which +// can be used to optimize the code in the subsequent codegen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/CGData/CodeGenDataReader.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/WithColor.h" + +#define DEBUG_TYPE "cg-data" + +using namespace llvm; +using namespace cgdata; + +static std::string getCGDataErrString(cgdata_error Err, + const std::string &ErrMsg = "") { + std::string Msg; + raw_string_ostream OS(Msg); + + switch (Err) { + case cgdata_error::success: + OS << "success"; + break; + case cgdata_error::eof: + OS << "end of File"; + break; + case cgdata_error::bad_magic: + OS << "invalid codegen data (bad magic)"; + break; + case cgdata_error::bad_header: + OS << "invalid codegen data (file header is corrupt)"; + break; + case cgdata_error::empty_cgdata: + OS << "empty codegen data"; + break; + case cgdata_error::malformed: + OS << "malformed codegen data"; + break; + case cgdata_error::unsupported_version: + OS << "unsupported codegen data version"; + break; + } + + // If optional error message is not empty, append it to the message. + if (!ErrMsg.empty()) + OS << ": " << ErrMsg; + + return OS.str(); +} + +namespace { + +// FIXME: This class is only here to support the transition to llvm::Error. It +// will be removed once this transition is complete. Clients should prefer to +// deal with the Error value directly, rather than converting to error_code. +class CGDataErrorCategoryType : public std::error_category { + const char *name() const noexcept override { return "llvm.cgdata"; } + + std::string message(int IE) const override { + return getCGDataErrString(static_cast(IE)); + } +}; + +} // end anonymous namespace + +const std::error_category &llvm::cgdata_category() { + static CGDataErrorCategoryType ErrorCategory; + return ErrorCategory; +} + +std::string CGDataError::message() const { + return getCGDataErrString(Err, Msg); +} + +char CGDataError::ID = 0; + +namespace { + +const char *CodeGenDataSectNameCommon[] = { +#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ + SectNameCommon, +#include "llvm/CGData/CodeGenData.inc" +}; + +const char *CodeGenDataSectNameCoff[] = { +#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ + SectNameCoff, +#include "llvm/CGData/CodeGenData.inc" +}; + +const char *CodeGenDataSectNamePrefix[] = { +#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, +#include "llvm/CGData/CodeGenData.inc" +}; + +} // namespace + +namespace llvm { + +std::string getCodeGenDataSectionName(CGDataSectKind CGSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo) { + std::string SectName; + + if (OF == Triple::MachO && AddSegmentInfo) + SectName = CodeGenDataSectNamePrefix[CGSK]; + + if (OF == Triple::COFF) + SectName += CodeGenDataSectNameCoff[CGSK]; + else + SectName += CodeGenDataSectNameCommon[CGSK]; + + return SectName; +} + +std::unique_ptr CodeGenData::Instance = nullptr; +std::once_flag CodeGenData::OnceFlag; + +CodeGenData &CodeGenData::getInstance() { + std::call_once(CodeGenData::OnceFlag, []() { + Instance = std::unique_ptr(new CodeGenData()); + + // TODO: Initialize writer or reader mode for the client optimization. + }); + return *(Instance.get()); +} + +namespace IndexedCGData { + +Expected
Header::readFromBuffer(const unsigned char *Curr) { + using namespace support; + + static_assert(std::is_standard_layout_v, + "The header should be standard layout type since we use offset " + "of fields to read."); + Header H; + H.Magic = endian::readNext(Curr); + if (H.Magic != IndexedCGData::Magic) + return make_error(cgdata_error::bad_magic); + H.Version = endian::readNext(Curr); + if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) + return make_error(cgdata_error::unsupported_version); + H.DataKind = endian::readNext(Curr); + + switch (H.Version) { + // When a new field is added to the header add a case statement here to + // compute the size as offset of the new field + size of the new field. This + // relies on the field being added to the end of the list. + static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1, + "Please update the size computation below if a new field has " + "been added to the header, if not add a case statement to " + "fall through to the latest version."); + case 1ull: + H.OutlinedHashTreeOffset = + endian::readNext(Curr); + } + + return H; +} + +} // end namespace IndexedCGData + +namespace cgdata { + +void warn(Twine Message, std::string Whence, std::string Hint) { + WithColor::warning(); + if (!Whence.empty()) + errs() << Whence << ": "; + errs() << Message << "\n"; + if (!Hint.empty()) + WithColor::note() << Hint << "\n"; +} + +void warn(Error E, StringRef Whence) { + if (E.isA()) { + handleAllErrors(std::move(E), [&](const CGDataError &IPE) { + warn(IPE.message(), Whence.str(), ""); + }); + } +} + +} // end namespace cgdata + +} // end namespace llvm diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp new file mode 100644 index 0000000000000..f7f3a8f42af7e --- /dev/null +++ b/llvm/lib/CGData/CodeGenDataReader.cpp @@ -0,0 +1,175 @@ +//===- CodeGenDataReader.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading codegen data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CGData/CodeGenDataReader.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" + +#define DEBUG_TYPE "cg-data-reader" + +using namespace llvm; + +namespace llvm { + +static Expected> +setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { + auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() + : FS.getBufferForFile(Filename); + if (std::error_code EC = BufferOrErr.getError()) + return errorCodeToError(EC); + return std::move(BufferOrErr.get()); +} + +Error CodeGenDataReader::mergeFromObjectFile( + const object::ObjectFile *Obj, + OutlinedHashTreeRecord &GlobalOutlineRecord) { + Triple TT = Obj->makeTriple(); + auto CGOutLineName = + getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); + + for (auto &Section : Obj->sections()) { + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + Expected ContentsOrErr = Section.getContents(); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + auto *Data = reinterpret_cast(ContentsOrErr->data()); + auto *EndData = Data + ContentsOrErr->size(); + + if (*NameOrErr == CGOutLineName) { + // In case dealing with an executable that has concatenated cgdata, + // we want to merge them into a single cgdata. + // Although it's not a typical workflow, we support this scenario. + while (Data != EndData) { + OutlinedHashTreeRecord LocalOutlineRecord; + LocalOutlineRecord.deserialize(Data); + GlobalOutlineRecord.merge(LocalOutlineRecord); + } + } + // TODO: Add support for other cgdata sections. + } + + return Error::success(); +} + +Error IndexedCodeGenDataReader::read() { + using namespace support; + + // The smallest header with the version 1 is 24 bytes + const unsigned MinHeaderSize = 24; + if (DataBuffer->getBufferSize() < MinHeaderSize) + return error(cgdata_error::bad_header); + + auto *Start = + reinterpret_cast(DataBuffer->getBufferStart()); + auto *End = + reinterpret_cast(DataBuffer->getBufferEnd()); + if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) + return E; + + if (hasOutlinedHashTree()) { + const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; + if (Ptr >= End) + return error(cgdata_error::eof); + HashTreeRecord.deserialize(Ptr); + } + + return success(); +} + +Expected> +CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path, FS); + if (Error E = BufferOrError.takeError()) + return std::move(E); + return CodeGenDataReader::create(std::move(BufferOrError.get())); +} + +Expected> +CodeGenDataReader::create(std::unique_ptr Buffer) { + if (Buffer->getBufferSize() == 0) + return make_error(cgdata_error::empty_cgdata); + + std::unique_ptr Reader; + // Create the reader. + if (IndexedCodeGenDataReader::hasFormat(*Buffer)) + Reader = std::make_unique(std::move(Buffer)); + else if (TextCodeGenDataReader::hasFormat(*Buffer)) + Reader = std::make_unique(std::move(Buffer)); + else + return make_error(cgdata_error::malformed); + + // Initialize the reader and return the result. + if (Error E = Reader->read()) + return std::move(E); + + return std::move(Reader); +} + +bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { + using namespace support; + if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) + return false; + + uint64_t Magic = endian::read( + DataBuffer.getBufferStart()); + // Verify that it's magical. + return Magic == IndexedCGData::Magic; +} + +bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { + // Verify that this really looks like plain ASCII text by checking a + // 'reasonable' number of characters (up to the magic size). + StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); + return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); +} +Error TextCodeGenDataReader::read() { + using namespace support; + + // Parse the custom header line by line. + for (; !Line.is_at_eof(); ++Line) { + // Skip empty or whitespace-only lines + if (Line->trim().empty()) + continue; + + if (!Line->starts_with(":")) + break; + StringRef Str = Line->drop_front().rtrim(); + if (Str.equals_insensitive("outlined_hash_tree")) + DataKind |= CGDataKind::FunctionOutlinedHashTree; + else + return error(cgdata_error::bad_header); + } + + // We treat an empty header (that is a comment # only) as a valid header. + if (Line.is_at_eof()) { + if (DataKind == CGDataKind::Unknown) + return Error::success(); + return error(cgdata_error::bad_header); + } + + // The YAML docs follow after the header. + const char *Pos = Line->data(); + size_t Size = reinterpret_cast(DataBuffer->getBufferEnd()) - + reinterpret_cast(Pos); + yaml::Input YOS(StringRef(Pos, Size)); + if (hasOutlinedHashTree()) + HashTreeRecord.deserializeYAML(YOS); + + // TODO: Add more yaml cgdata in order + + return Error::success(); +} +} // end namespace llvm diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp new file mode 100644 index 0000000000000..5f638be0fefe7 --- /dev/null +++ b/llvm/lib/CGData/CodeGenDataWriter.cpp @@ -0,0 +1,125 @@ +//===- CodeGenDataWriter.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing codegen data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CGData/CodeGenDataWriter.h" + +#define DEBUG_TYPE "cg-data-writer" + +using namespace llvm; + +void CGDataOStream::patch(ArrayRef P) { + using namespace support; + + if (IsFDOStream) { + raw_fd_ostream &FDOStream = static_cast(OS); + const uint64_t LastPos = FDOStream.tell(); + for (const auto &K : P) { + FDOStream.seek(K.Pos); + for (int I = 0; I < K.N; I++) + write(K.D[I]); + } + // Reset the stream to the last position after patching so that users + // don't accidentally overwrite data. This makes it consistent with + // the string stream below which replaces the data directly. + FDOStream.seek(LastPos); + } else { + raw_string_ostream &SOStream = static_cast(OS); + std::string &Data = SOStream.str(); // with flush + for (const auto &K : P) { + for (int I = 0; I < K.N; I++) { + uint64_t Bytes = + endian::byte_swap(K.D[I]); + Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t), + reinterpret_cast(&Bytes), sizeof(uint64_t)); + } + } + } +} + +void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) { + assert(Record.HashTree && "empty hash tree in the record"); + HashTreeRecord.HashTree = std::move(Record.HashTree); + + DataKind |= CGDataKind::FunctionOutlinedHashTree; +} + +Error CodeGenDataWriter::write(raw_fd_ostream &OS) { + CGDataOStream COS(OS); + return writeImpl(COS); +} + +Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { + using namespace support; + IndexedCGData::Header Header; + Header.Magic = IndexedCGData::Magic; + Header.Version = IndexedCGData::Version; + + // Set the CGDataKind depending on the kind. + Header.DataKind = 0; + if (static_cast(DataKind & CGDataKind::FunctionOutlinedHashTree)) + Header.DataKind |= + static_cast(CGDataKind::FunctionOutlinedHashTree); + + Header.OutlinedHashTreeOffset = 0; + + // Only write up to the CGDataKind. We need to remember the offset of the + // remaining fields to allow back-patching later. + COS.write(Header.Magic); + COS.write32(Header.Version); + COS.write32(Header.DataKind); + + // Save the location of Header.OutlinedHashTreeOffset field in \c COS. + OutlinedHashTreeOffset = COS.tell(); + + // Reserve the space for OutlinedHashTreeOffset field. + COS.write(0); + + return Error::success(); +} + +Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) { + if (Error E = writeHeader(COS)) + return E; + + uint64_t OutlinedHashTreeFieldStart = COS.tell(); + if (hasOutlinedHashTree()) + HashTreeRecord.serialize(COS.OS); + + // Back patch the offsets. + CGDataPatchItem PatchItems[] = { + {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}}; + COS.patch(PatchItems); + + return Error::success(); +} + +Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) { + if (hasOutlinedHashTree()) + OS << "# Outlined stable hash tree\n:outlined_hash_tree\n"; + + // TODO: Add more data types in this header + + return Error::success(); +} + +Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) { + if (Error E = writeHeaderText(OS)) + return E; + + yaml::Output YOS(OS); + if (hasOutlinedHashTree()) + HashTreeRecord.serializeYAML(YOS); + + // TODO: Write more yaml cgdata in order + + return Error::success(); +} diff --git a/llvm/lib/CodeGenData/OutlinedHashTree.cpp b/llvm/lib/CGData/OutlinedHashTree.cpp similarity index 98% rename from llvm/lib/CodeGenData/OutlinedHashTree.cpp rename to llvm/lib/CGData/OutlinedHashTree.cpp index d64098098de62..7bf8168e5afa1 100644 --- a/llvm/lib/CodeGenData/OutlinedHashTree.cpp +++ b/llvm/lib/CGData/OutlinedHashTree.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGenData/OutlinedHashTree.h" +#include "llvm/CGData/OutlinedHashTree.h" #define DEBUG_TYPE "outlined-hash-tree" diff --git a/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp similarity index 99% rename from llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp rename to llvm/lib/CGData/OutlinedHashTreeRecord.cpp index d3c6790408388..d1d57fe3fc9f4 100644 --- a/llvm/lib/CodeGenData/OutlinedHashTreeRecord.cpp +++ b/llvm/lib/CGData/OutlinedHashTreeRecord.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" #include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 638c3bd6f90f5..503c77cb13bd0 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -9,8 +9,8 @@ add_subdirectory(FileCheck) add_subdirectory(InterfaceStub) add_subdirectory(IRPrinter) add_subdirectory(IRReader) +add_subdirectory(CGData) add_subdirectory(CodeGen) -add_subdirectory(CodeGenData) add_subdirectory(CodeGenTypes) add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 8abc153336251..0f449fa2d45be 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -74,6 +74,7 @@ set(LLVM_TEST_DEPENDS llvm-c-test llvm-cat llvm-cfi-verify + llvm-cgdata llvm-config llvm-cov llvm-ctxprof-util diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index e5e3dc7e1b4bd..bee7aa3903a1f 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -180,6 +180,7 @@ def get_asan_rtlib(): "llvm-addr2line", "llvm-bcanalyzer", "llvm-bitcode-strip", + "llvm-cgdata", "llvm-config", "llvm-cov", "llvm-ctxprof-util", diff --git a/llvm/test/tools/llvm-cgdata/convert.test b/llvm/test/tools/llvm-cgdata/convert.test new file mode 100644 index 0000000000000..632a7366d56a0 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/convert.test @@ -0,0 +1,32 @@ +# Test dump between the binary and text formats. + +RUN: split-file %s %t + +RUN: llvm-cgdata --convert --format binary %t/dump.cgtext -o %t/dump.cgdata +RUN: llvm-cgdata --convert --format text %t/dump.cgdata -o %t/dump-round.cgtext +RUN: llvm-cgdata -c -f binary %t/dump-round.cgtext -o %t/dump-round.cgdata +RUN: llvm-cgdata -c -f text %t/dump-round.cgtext -o %t/dump-round-round.cgtext +RUN: diff %t/dump.cgdata %t/dump-round.cgdata +RUN: diff %t/dump-round.cgtext %t/dump-round-round.cgtext + +;--- dump.cgtext +# Outlined stable hash tree +:outlined_hash_tree +--- +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2, 3 ] +2: + Hash: 0x3 + Terminals: 5 + SuccessorIds: [ ] +3: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test new file mode 100644 index 0000000000000..70d5ea4b80063 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/empty.test @@ -0,0 +1,35 @@ +# Test no input file +RUN: not llvm-cgdata --convert --output - 2>&1 | FileCheck %s --check-prefix=NOFILE --ignore-case +NOFILE: error: No input file is specified. + +# Test for empty cgdata file, which is invalid. +RUN: touch %t_emptyfile.cgtext +RUN: not llvm-cgdata --convert %t_emptyfile.cgtext --format text 2>&1 | FileCheck %s --check-prefix=EMPTY +EMPTY: {{.}}emptyfile.cgtext: empty codegen data + +# Test for empty header in the text format. It can be converted to a valid binary file. +RUN: printf '#' > %t_emptyheader.cgtext +RUN: llvm-cgdata --convert %t_emptyheader.cgtext --format binary -o %t_emptyheader.cgdata + +# Without any cgdata other than the header, no data shows by default. +RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0 + +# The version number appears when asked, as it's in the header +RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION +VERSION: Version: 1 + +# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header. +RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0 + +# Synthesize a header only cgdata. +# struct Header { +# uint64_t Magic; +# uint32_t Version; +# uint32_t DataKind; +# uint64_t OutlinedHashTreeOffset; +# } +RUN: printf '\xffcgdata\x81' > %t_header.cgdata +RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata +RUN: diff %t_header.cgdata %t_emptyheader.cgdata diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test new file mode 100644 index 0000000000000..c992174505c1a --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/error.test @@ -0,0 +1,38 @@ +# Test various error cases + +# Synthesize a header only cgdata. +# struct Header { +# uint64_t Magic; +# uint32_t Version; +# uint32_t DataKind; +# uint64_t OutlinedHashTreeOffset; +# } +RUN: touch %t_empty.cgdata +RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY +EMPTY: {{.}}cgdata: empty codegen data + +# Not a magic. +RUN: printf '\xff' > %t_malformed.cgdata +RUN: not llvm-cgdata --show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED +MALFORMED: {{.}}cgdata: malformed codegen data + +# The minimum header size is 24. +RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata +RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT +CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt) + +# The current version 1 while the header says 2. +RUN: printf '\xffcgdata\x81' > %t_version.cgdata +RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata +RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION +BAD_VERSION: {{.}}cgdata: unsupported codegen data version + +# Header says an outlined hash tree, but the file ends after the header. +RUN: printf '\xffcgdata\x81' > %t_eof.cgdata +RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata +RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF +EOF: {{.}}cgdata: end of File diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-archive.test new file mode 100644 index 0000000000000..d70ac7c3c938d --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-archive.test @@ -0,0 +1,90 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge an archive that has two object files having cgdata (__llvm_outline) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Make an archive from two object files +RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o + +# Merge the archive into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata +RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 4 +CHECK-NEXT: Terminal Node Count: 2 +CHECK-NEXT: Depth: 2 + +RUN: llvm-cgdata --convert %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE +TREE: # Outlined stable hash tree +TREE-NEXT: :outlined_hash_tree +TREE-NEXT: --- +TREE-NEXT: 0: +TREE-NEXT: Hash: 0x0 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 1 ] +TREE-NEXT: 1: +TREE-NEXT: Hash: 0x1 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 2, 3 ] +TREE-NEXT: 2: +TREE-NEXT: Hash: 0x3 +TREE-NEXT: Terminals: 5 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: 3: +TREE-NEXT: Hash: 0x2 +TREE-NEXT: Terminals: 4 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: ... + +;--- raw-1.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- merge-1.ll +@.data = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" + + +;--- raw-2.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x3 + Terminals: 5 + SuccessorIds: [ ] +... + +;--- merge-2.ll +@.data = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-concat.test new file mode 100644 index 0000000000000..cc39c673cf9a5 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-concat.test @@ -0,0 +1,83 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_outline) + +RUN: split-file %s %t + +# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata. +# Concatenate them in merge-concat.ll +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll + +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o +RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata +RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 4 +CHECK-NEXT: Terminal Node Count: 2 +CHECK-NEXT: Depth: 2 + +RUN: llvm-cgdata --convert %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE +TREE: # Outlined stable hash tree +TREE-NEXT: :outlined_hash_tree +TREE-NEXT: --- +TREE-NEXT: 0: +TREE-NEXT: Hash: 0x0 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 1 ] +TREE-NEXT: 1: +TREE-NEXT: Hash: 0x1 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 2, 3 ] +TREE-NEXT: 2: +TREE-NEXT: Hash: 0x3 +TREE-NEXT: Terminals: 5 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: 3: +TREE-NEXT: Hash: 0x2 +TREE-NEXT: Terminals: 4 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: ... + +;--- raw-1.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- raw-2.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x3 + Terminals: 5 + SuccessorIds: [ ] +... + +;--- merge-concat.ll + +; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format. +@.data1 = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" +@.data2 = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-double.test new file mode 100644 index 0000000000000..950a88c66f7bb --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-double.test @@ -0,0 +1,87 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge two object files having cgdata (__llvm_outline) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed -ie "s//$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Merge two object files into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata + +RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 4 +CHECK-NEXT: Terminal Node Count: 2 +CHECK-NEXT: Depth: 2 + +RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=TREE +TREE: # Outlined stable hash tree +TREE-NEXT: :outlined_hash_tree +TREE-NEXT: --- +TREE-NEXT: 0: +TREE-NEXT: Hash: 0x0 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 1 ] +TREE-NEXT: 1: +TREE-NEXT: Hash: 0x1 +TREE-NEXT: Terminals: 0 +TREE-NEXT: SuccessorIds: [ 2, 3 ] +TREE-NEXT: 2: +TREE-NEXT: Hash: 0x3 +TREE-NEXT: Terminals: 5 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: 3: +TREE-NEXT: Hash: 0x2 +TREE-NEXT: Terminals: 4 +TREE-NEXT: SuccessorIds: [ ] +TREE-NEXT: ... + +;--- raw-1.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- merge-1.ll +@.data = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" + +;--- raw-2.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x3 + Terminals: 5 + SuccessorIds: [ ] +... + +;--- merge-2.ll +@.data = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-single.test new file mode 100644 index 0000000000000..783c7b979f541 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-single.test @@ -0,0 +1,49 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Test merge a single object file into a cgdata + +RUN: split-file %s %t + +# Merge an object file that has no cgdata (__llvm_outline). It still produces a header only cgdata. +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-empty.ll -o %t/merge-empty.o +RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata +# No summary appear with the header only cgdata. +RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0 + +# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata +RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt + +RUN: sed -ie "s//$(cat %t/raw-single-bytes.txt)/g" %t/merge-single.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o + +# Merge an object file having cgdata (__llvm_outline) +RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata +RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 3 +CHECK-NEXT: Terminal Node Count: 1 +CHECK-NEXT: Depth: 2 + +;--- merge-empty.ll +@.data = private unnamed_addr constant [1 x i8] c"\01" + +;--- raw-single.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- merge-single.ll +@.data = private unnamed_addr constant [72 x i8] c"", section "__DATA,__llvm_outline" diff --git a/llvm/test/tools/llvm-cgdata/show.test b/llvm/test/tools/llvm-cgdata/show.test new file mode 100644 index 0000000000000..b47ad4978ef0b --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/show.test @@ -0,0 +1,30 @@ +# Test show + +RUN: split-file %s %t +RUN: llvm-cgdata --show %t/show.cgtext | FileCheck %s + +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 3 +CHECK-NEXT: Terminal Node Count: 1 +CHECK-NEXT: Depth: 2 + +# Convert the text file to the binary file +RUN: llvm-cgdata --convert --format binary %t/show.cgtext -o %t/show.cgdata +RUN: llvm-cgdata --show %t/show.cgdata | FileCheck %s + +;--- show.cgtext +:outlined_hash_tree +--- +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 3 + SuccessorIds: [ ] +... diff --git a/llvm/tools/llvm-cgdata/CMakeLists.txt b/llvm/tools/llvm-cgdata/CMakeLists.txt new file mode 100644 index 0000000000000..556bc388306a3 --- /dev/null +++ b/llvm/tools/llvm-cgdata/CMakeLists.txt @@ -0,0 +1,21 @@ +set(LLVM_LINK_COMPONENTS + CGData + CodeGen + Core + Object + Option + Support + ) + +set(LLVM_TARGET_DEFINITIONS Opts.td) +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(CGDataOptsTableGen) + +add_llvm_tool(llvm-cgdata + llvm-cgdata.cpp + + DEPENDS + intrinsics_gen + CGDataOptsTableGen + GENERATE_DRIVER + ) diff --git a/llvm/tools/llvm-cgdata/Opts.td b/llvm/tools/llvm-cgdata/Opts.td new file mode 100644 index 0000000000000..b2cfc6a85bbd3 --- /dev/null +++ b/llvm/tools/llvm-cgdata/Opts.td @@ -0,0 +1,32 @@ +include "llvm/Option/OptParser.td" + +class F : Flag<["-"], letter>, HelpText; +class FF : Flag<["--"], name>, HelpText; + +// General options +def generic_group : OptionGroup<"Genric Options">, HelpText<"Generic Options">; +def help : FF<"help", "Display this help">, Group; +def : F<"h", "Alias for --help">, Alias, Group; +def version : FF<"version", "Display the LLVM version">, Group; +def : F<"v", "Alias for --version">, Alias, Group; + +// Action options +def action_group : OptionGroup<"Action">, HelpText<"Action (required)">; +def show : FF<"show", "Show summary of the (indexed) codegen data file.">, + Group; +def : F<"s", "Alias for --show">, Alias, Group; +def convert : FF<"convert", "Convert the (indexed) codegen data file in either text or binary format.">, + Group; +def : F<"c", "Alias for --convert">, Alias, Group; +def merge : FF<"merge", "Take binary files having raw codegen data in custom sections, and merge them into an indexed codegen data file.">, + Group; +def : F<"m", "Alias for --merge">, Alias, Group; + +// Additional options +def cgdata_version : FF<"cgdata-version", "Display the cgdata version">; +def output : Option<["--"], "output", KIND_SEPARATE>, + HelpText<"Specify the name for the output file to be created">, MetaVarName<"">; +def : JoinedOrSeparate<["-"], "o">, Alias, MetaVarName<"">, HelpText<"Alias for --output">; +def format : Option<["--"], "format", KIND_SEPARATE>, + HelpText<"Specify the output format (text or binary)">, MetaVarName<"">; +def : JoinedOrSeparate<["-"], "f">, Alias, HelpText<"Alias for --format">; diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp new file mode 100644 index 0000000000000..3104242070f34 --- /dev/null +++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp @@ -0,0 +1,354 @@ +//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// llvm-cgdata parses raw codegen data embedded in compiled binary files, and +// merges them into a single .cgdata file. It can also inspect and maninuplate +// a .cgdata file. This .cgdata can contain various codegen data like outlining +// information, and it can be used to optimize the code in the subsequent build. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/StringRef.h" +#include "llvm/CGData/CodeGenDataReader.h" +#include "llvm/CGData/CodeGenDataWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/LLVMDriver.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; + +enum CGDataFormat { + Invalid, + Text, + Binary, +}; + +enum CGDataAction { + Convert, + Merge, + Show, +}; + +// Command-line option boilerplate. +namespace { +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), +#include "Opts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) \ + static constexpr StringLiteral NAME##_init[] = VALUE; \ + static constexpr ArrayRef NAME(NAME##_init, \ + std::size(NAME##_init) - 1); +#include "Opts.inc" +#undef PREFIX + +using namespace llvm::opt; +static constexpr opt::OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "Opts.inc" +#undef OPTION +}; + +class CGDataOptTable : public opt::GenericOptTable { +public: + CGDataOptTable() : GenericOptTable(InfoTable) {} +}; +} // end anonymous namespace + +// Options +static StringRef ToolName; +static StringRef OutputFilename = "-"; +static StringRef Filename; +static bool ShowCGDataVersion; +static CGDataAction Action; +static std::optional OutputFormat; +static std::vector InputFilenames; + +// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html + +static void exitWithError(Twine Message, std::string Whence = "", + std::string Hint = "") { + WithColor::error(); + if (!Whence.empty()) + errs() << Whence << ": "; + errs() << Message << "\n"; + if (!Hint.empty()) + WithColor::note() << Hint << "\n"; + ::exit(1); +} + +static void exitWithError(Error E, StringRef Whence = "") { + if (E.isA()) { + handleAllErrors(std::move(E), [&](const CGDataError &IPE) { + exitWithError(IPE.message(), std::string(Whence)); + }); + return; + } + + exitWithError(toString(std::move(E)), std::string(Whence)); +} + +static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { + exitWithError(EC.message(), std::string(Whence)); +} + +static int convert_main(int argc, const char *argv[]) { + std::error_code EC; + raw_fd_ostream OS(OutputFilename, EC, + OutputFormat == CGDataFormat::Text + ? sys::fs::OF_TextWithCRLF + : sys::fs::OF_None); + if (EC) + exitWithErrorCode(EC, OutputFilename); + + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS); + if (Error E = ReaderOrErr.takeError()) + exitWithError(std::move(E), Filename); + + CodeGenDataWriter Writer; + auto Reader = ReaderOrErr->get(); + if (Reader->hasOutlinedHashTree()) { + OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree()); + Writer.addRecord(Record); + } + + if (OutputFormat == CGDataFormat::Text) { + if (Error E = Writer.writeText(OS)) + exitWithError(std::move(E)); + } else { + if (Error E = Writer.write(OS)) + exitWithError(std::move(E)); + } + + return 0; +} + +static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, + OutlinedHashTreeRecord &GlobalOutlineRecord); + +static bool handleArchive(StringRef Filename, Archive &Arch, + OutlinedHashTreeRecord &GlobalOutlineRecord) { + bool Result = true; + Error Err = Error::success(); + for (const auto &Child : Arch.children(Err)) { + auto BuffOrErr = Child.getMemoryBufferRef(); + if (Error E = BuffOrErr.takeError()) + exitWithError(std::move(E), Filename); + auto NameOrErr = Child.getName(); + if (Error E = NameOrErr.takeError()) + exitWithError(std::move(E), Filename); + std::string Name = (Filename + "(" + NameOrErr.get() + ")").str(); + Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord); + } + if (Err) + exitWithError(std::move(Err), Filename); + return Result; +} + +static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, + OutlinedHashTreeRecord &GlobalOutlineRecord) { + Expected> BinOrErr = + object::createBinary(Buffer); + if (Error E = BinOrErr.takeError()) + exitWithError(std::move(E), Filename); + + bool Result = true; + if (auto *Obj = dyn_cast(BinOrErr->get())) { + if (Error E = + CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord)) + exitWithError(std::move(E), Filename); + } else if (auto *Arch = dyn_cast(BinOrErr->get())) { + Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord); + } else { + // TODO: Support for the MachO universal binary format. + errs() << "Error: unsupported binary file: " << Filename << "\n"; + Result = false; + } + + return Result; +} + +static bool handleFile(StringRef Filename, + OutlinedHashTreeRecord &GlobalOutlineRecord) { + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BuffOrErr.getError()) + exitWithErrorCode(EC, Filename); + return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord); +} + +static int merge_main(int argc, const char *argv[]) { + bool Result = true; + OutlinedHashTreeRecord GlobalOutlineRecord; + for (auto &Filename : InputFilenames) + Result &= handleFile(Filename, GlobalOutlineRecord); + + if (!Result) + exitWithError("failed to merge codegen data files."); + + CodeGenDataWriter Writer; + if (!GlobalOutlineRecord.empty()) + Writer.addRecord(GlobalOutlineRecord); + + std::error_code EC; + raw_fd_ostream OS(OutputFilename, EC, + OutputFormat == CGDataFormat::Text + ? sys::fs::OF_TextWithCRLF + : sys::fs::OF_None); + if (EC) + exitWithErrorCode(EC, OutputFilename); + + if (OutputFormat == CGDataFormat::Text) { + if (Error E = Writer.writeText(OS)) + exitWithError(std::move(E)); + } else { + if (Error E = Writer.write(OS)) + exitWithError(std::move(E)); + } + + return 0; +} + +static int show_main(int argc, const char *argv[]) { + std::error_code EC; + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); + if (EC) + exitWithErrorCode(EC, OutputFilename); + + auto FS = vfs::getRealFileSystem(); + auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS); + if (Error E = ReaderOrErr.takeError()) + exitWithError(std::move(E), Filename); + + auto Reader = ReaderOrErr->get(); + if (ShowCGDataVersion) + OS << "Version: " << Reader->getVersion() << "\n"; + + if (Reader->hasOutlinedHashTree()) { + auto Tree = Reader->releaseOutlinedHashTree(); + OS << "Outlined hash tree:\n"; + OS << " Total Node Count: " << Tree->size() << "\n"; + OS << " Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true) + << "\n"; + OS << " Depth: " << Tree->depth() << "\n"; + } + + return 0; +} + +static void parseArgs(int argc, char **argv) { + CGDataOptTable Tbl; + ToolName = argv[0]; + llvm::BumpPtrAllocator A; + llvm::StringSaver Saver{A}; + llvm::opt::InputArgList Args = + Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { + llvm::errs() << Msg << '\n'; + std::exit(1); + }); + + if (Args.hasArg(OPT_help)) { + Tbl.printHelp( + llvm::outs(), + "llvm-cgdata [options] (|<.cgdata file>)", + ToolName.str().c_str()); + std::exit(0); + } + if (Args.hasArg(OPT_version)) { + cl::PrintVersionMessage(); + std::exit(0); + } + + ShowCGDataVersion = Args.hasArg(OPT_cgdata_version); + + if (opt::Arg *A = Args.getLastArg(OPT_format)) { + StringRef OF = A->getValue(); + OutputFormat = StringSwitch(OF) + .Case("text", CGDataFormat::Text) + .Case("binary", CGDataFormat::Binary) + .Default(CGDataFormat::Invalid); + if (OutputFormat == CGDataFormat::Invalid) + exitWithError("unsupported format '" + OF + "'"); + } + + InputFilenames = Args.getAllArgValues(OPT_INPUT); + if (InputFilenames.empty()) + exitWithError("No input file is specified."); + Filename = InputFilenames[0]; + + if (Args.hasArg(OPT_output)) { + OutputFilename = Args.getLastArgValue(OPT_output); + for (auto &Filename : InputFilenames) + if (Filename == OutputFilename) + exitWithError( + "Input file name cannot be the same as the output file name!\n"); + } + + opt::Arg *ActionArg = nullptr; + for (opt::Arg *Arg : Args.filtered(OPT_action_group)) { + if (ActionArg) + exitWithError("Only one action is allowed."); + ActionArg = Arg; + } + if (!ActionArg) + exitWithError("One action is required."); + + switch (ActionArg->getOption().getID()) { + case OPT_show: + if (InputFilenames.size() != 1) + exitWithError("only one input file is allowed."); + Action = CGDataAction::Show; + break; + case OPT_convert: + // The default output format is text for convert. + if (!OutputFormat) + OutputFormat = CGDataFormat::Text; + if (InputFilenames.size() != 1) + exitWithError("only one input file is allowed."); + Action = CGDataAction::Convert; + break; + case OPT_merge: + // The default output format is binary for merge. + if (!OutputFormat) + OutputFormat = CGDataFormat::Binary; + Action = CGDataAction::Merge; + break; + default: + llvm_unreachable("unrecognized action"); + } +} + +int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) { + const char **argv = const_cast(argvNonConst); + parseArgs(argc, argvNonConst); + + switch (Action) { + case CGDataAction::Convert: + return convert_main(argc, argv); + case CGDataAction::Merge: + return merge_main(argc, argv); + case CGDataAction::Show: + return show_main(argc, argv); + default: + llvm_unreachable("unrecognized action"); + } + + return 1; +} diff --git a/llvm/unittests/CodeGenData/CMakeLists.txt b/llvm/unittests/CGData/CMakeLists.txt similarity index 94% rename from llvm/unittests/CodeGenData/CMakeLists.txt rename to llvm/unittests/CGData/CMakeLists.txt index 3d821b87e29d8..9cedab56d3f6b 100644 --- a/llvm/unittests/CodeGenData/CMakeLists.txt +++ b/llvm/unittests/CGData/CMakeLists.txt @@ -1,7 +1,7 @@ set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} + CGData CodeGen - CodeGenData Core Support ) diff --git a/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp b/llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp similarity index 98% rename from llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp rename to llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp index aa7ad4a33754f..a614a48dd7a43 100644 --- a/llvm/unittests/CodeGenData/OutlinedHashTreeRecordTest.cpp +++ b/llvm/unittests/CGData/OutlinedHashTreeRecordTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/OutlinedHashTreeRecord.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp b/llvm/unittests/CGData/OutlinedHashTreeTest.cpp similarity index 98% rename from llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp rename to llvm/unittests/CGData/OutlinedHashTreeTest.cpp index 637ab3cd08c1c..2d1ec8b05ab2a 100644 --- a/llvm/unittests/CodeGenData/OutlinedHashTreeTest.cpp +++ b/llvm/unittests/CGData/OutlinedHashTreeTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGenData/OutlinedHashTree.h" +#include "llvm/CGData/OutlinedHashTree.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index 49ed6c8fb6c42..911ede701982f 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -20,8 +20,8 @@ add_subdirectory(AsmParser) add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Bitstream) +add_subdirectory(CGData) add_subdirectory(CodeGen) -add_subdirectory(CodeGenData) add_subdirectory(DebugInfo) add_subdirectory(Debuginfod) add_subdirectory(Demangle)