diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 000000000..7588c25fd --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,41 @@ +name: Build and Deploy Doxygen Docs +on: + push: + branches: [ development ] + # pull_request: # For testing only. Remove before merge! + # branches: [ development ] +permissions: + contents: write +jobs: + build-and-deploy: + runs-on: ubuntu-24.04 + strategy: + fail-fast: true + continue-on-error: false + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Install Phasar Dependencies + shell: bash + run: | + ./utils/InstallAptDependencies.sh --noninteractive tzdata doxygen graphviz + + - name: Build Doxygen Docs + shell: bash + env: + CXX: clang++-15 + CC: clang-15 + run: | + cmake -S . -B build -DPHASAR_BUILD_DOC=ON + cmake --build ./build --target doc_doxygen + + - name: Deploy Doxygen Docs on GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: build/docs/html + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index d15020fae..17396e546 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Currently, PhASAR is maintained by PhASAR requires at least C++-17. However, building in C++20 mode is supported. You may enable this setting the cmake variable `CMAKE_CXX_STANDARD` to `20`. -Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 ealier. +Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 earlier. ## Currently Supported Version of LLVM diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index b45f14eeb..2014d7364 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -32,19 +32,19 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "PhASAR" +PROJECT_NAME = PhASAR # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = @PHASAR_VERSION@ +PROJECT_NUMBER = @PHASAR_VERSION@@development # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = "PhASAR a LLVM-based Static Analysis Framework" +PROJECT_BRIEF = "A LLVM-based Static Analysis Framework" # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels @@ -144,7 +144,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = +STRIP_FROM_PATH = @PHASAR_SRC_DIR@ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -153,8 +153,7 @@ STRIP_FROM_PATH = # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = - +STRIP_FROM_INC_PATH = @PHASAR_SRC_DIR@/include # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. @@ -398,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. @@ -743,7 +742,8 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ @CMAKE_CURRENT_SOURCE_DIR@/lib/ @CMAKE_CURRENT_SOURCE_DIR@/docs +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ \ + @CMAKE_CURRENT_SOURCE_DIR@/docs # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -794,7 +794,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = */external/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -805,7 +805,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = *::detail::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -905,7 +905,7 @@ INLINE_SOURCES = NO # Fortran comments will always remain visible. # The default value is: YES. -STRIP_CODE_COMMENTS = YES +STRIP_CODE_COMMENTS = NO # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. @@ -917,7 +917,7 @@ REFERENCED_BY_RELATION = YES # all documented entities called/used by that function will be listed. # The default value is: NO. -REFERENCES_RELATION = NO +REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and @@ -1407,7 +1407,7 @@ FORMULA_TRANSPARENT = YES # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -USE_MATHJAX = NO +USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: @@ -1521,7 +1521,7 @@ SEARCHDATA_FILE = searchdata.xml # projects and redirect the results back to the right project. # This tag requires that the tag SEARCHENGINE is set to YES. -EXTERNAL_SEARCH_ID = +EXTERNAL_SEARCH_ID = phasar # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are @@ -1540,7 +1540,7 @@ EXTRA_SEARCH_MAPPINGS = # If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. # The default value is: YES. -GENERATE_LATEX = YES +GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of @@ -1917,7 +1917,7 @@ SEARCH_INCLUDES = YES # preprocessor. # This tag requires that the tag SEARCH_INCLUDES is set to YES. -INCLUDE_PATH = +INCLUDE_PATH = @PHASAR_BINARY_DIR@/include # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the @@ -2040,7 +2040,7 @@ DIA_PATH = # and usage relations if the target is undocumented or is not a class. # The default value is: YES. -HIDE_UNDOC_RELATIONS = YES +HIDE_UNDOC_RELATIONS = NO # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz (see: @@ -2100,7 +2100,7 @@ CLASS_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -COLLABORATION_GRAPH = YES +COLLABORATION_GRAPH = NO # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for # groups, showing the direct groups dependencies. @@ -2136,7 +2136,7 @@ UML_LIMIT_NUM_FIELDS = 10 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -TEMPLATE_RELATIONS = NO +TEMPLATE_RELATIONS = YES # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to # YES then doxygen will generate a graph for each documented file showing the @@ -2145,7 +2145,7 @@ TEMPLATE_RELATIONS = NO # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDE_GRAPH = YES +INCLUDE_GRAPH = NO # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are # set to YES then doxygen will generate a graph for each documented file showing @@ -2154,7 +2154,7 @@ INCLUDE_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDED_BY_GRAPH = YES +INCLUDED_BY_GRAPH = NO # If the CALL_GRAPH tag is set to YES then doxygen will generate a call # dependency graph for every global function or class method. @@ -2203,7 +2203,7 @@ DIRECTORY_GRAPH = YES # The default value is: png. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_IMAGE_FORMAT = png +DOT_IMAGE_FORMAT = svg # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. @@ -2276,7 +2276,7 @@ MAX_DOT_GRAPH_DEPTH = 0 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_TRANSPARENT = NO +DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This diff --git a/docs/README.dox b/docs/README.dox index e00ef5977..6ae3b786c 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -2,14 +2,27 @@ @mainpage PhASAR: A LLVM-based Static Analysis Framework -@author Philipp Schubert (E-Mail: philipp.schubert@upb.de) and others +PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. -\b Copyright \n - Copyright 2017 Philipp Schubert. All rights reserved. +This page contains the generated code documentation of PhASAR. +You can find the original source on GitHub: . The README should already give you a good first overview. -\b License \n - See LICENSE.txt +For further information, please checkout PhASAR's [Wiki](https://github.com/secure-software-engineering/phasar/wiki). + +@subsubsection SSEG Secure Software Engineering Group + +PhASAR is primarily developed and maintained by the [Secure Software Engineering Group](https://www.hni.uni-paderborn.de/sse) at Heinz Nixdorf Institute (University of Paderborn) and [Fraunhofer IEM](https://www.iem.fraunhofer.de/). -TODO: add detailed description. +PhASAR was initially developed by Philipp Dominik Schubert (@pdschubert)(). + +\b Currently, PhASAR is maintained by +- Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) +- Sriteja Kummita (@sritejakv) +- Lucas Briese (@jusito) +- Martin Mory (@MMory)(martin.mory@upb.de) +- *others* + +\b License \n + PhASAR is made available under the permissive MIT License. See LICENSE.txt */ diff --git a/include/phasar/AnalysisStrategy/AnalysisSetup.h b/include/phasar/AnalysisStrategy/AnalysisSetup.h index 975ed1c29..7b40140d4 100644 --- a/include/phasar/AnalysisStrategy/AnalysisSetup.h +++ b/include/phasar/AnalysisStrategy/AnalysisSetup.h @@ -16,7 +16,7 @@ namespace psr { -// Indicates that an analysis does not need a special configuration (file). +/// Indicates that an analysis does not need a special configuration (file). struct HasNoConfigurationType {}; struct AnalysisSetup { diff --git a/include/phasar/ControlFlow/CallGraphData.h b/include/phasar/ControlFlow/CallGraphData.h index ed50d66e4..58393936a 100644 --- a/include/phasar/ControlFlow/CallGraphData.h +++ b/include/phasar/ControlFlow/CallGraphData.h @@ -18,8 +18,12 @@ #include namespace psr { + +/// A data structure used for storing, serializing and deserializing call-graph +/// information. struct CallGraphData { - // Mangled FunName --> [CS-IDs] + + /// Mangled FunName --> [CS-IDs] std::unordered_map> FToFunctionVertexTy{}; CallGraphData() noexcept = default; diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index 4ef63126d..ba6f9dc1f 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -37,7 +37,7 @@ template class ICFGBase { } /// returns the function definition or declaration with the given name. If - /// ther eis no such function, returns a default constructed f_t (nullptr for + /// there is no such function, returns a default constructed f_t (nullptr for /// pointers). [[nodiscard]] f_t getFunction(llvm::StringRef Fun) const { return self().getFunctionImpl(Fun); @@ -124,9 +124,10 @@ template class ICFGBase { /// from the given analysis-Domain template // NOLINTNEXTLINE(readability-identifier-naming) -PSR_CONCEPT is_icfg_v = is_crtp_base_of_v - &&std::is_same_v - &&std::is_same_v; +PSR_CONCEPT is_icfg_v = + is_crtp_base_of_v && + std::is_same_v && + std::is_same_v; } // namespace psr diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h index 4a74d9e3f..ca6c0ad70 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h @@ -65,7 +65,7 @@ concept IsEdgeFunction = requires(const T &EF, const EdgeFunction std::same_as>; {T::join(CEF, TEEF)} -> std::same_as>; }; - // clang-format on +// clang-format on #endif @@ -78,10 +78,10 @@ enum class EdgeFunctionAllocationPolicy { class EdgeFunctionBase { public: template - static constexpr bool - IsSOOCandidate = sizeof(ConcreteEF) <= sizeof(void *) && // NOLINT - alignof(ConcreteEF) <= alignof(void *) && - std::is_trivially_copyable_v; + static constexpr bool IsSOOCandidate = + sizeof(ConcreteEF) <= sizeof(void *) && // NOLINT + alignof(ConcreteEF) <= alignof(void *) && + std::is_trivially_copyable_v; using AllocationPolicy = EdgeFunctionAllocationPolicy; @@ -89,7 +89,9 @@ class EdgeFunctionBase { struct RefCountedBase { mutable std::atomic_size_t Rc = 0; }; - template struct RefCounted : RefCountedBase { T Value; }; + template struct RefCounted : RefCountedBase { + T Value; + }; template struct CachedRefCounted : RefCounted { EdgeFunctionSingletonCache *Cache{}; @@ -118,8 +120,8 @@ class EdgeFunctionBase { : AllocationPolicy::CustomHeapAllocated; }; -/// Non-null reference to an edge function that is guarenteed to be managed by -/// an EdgeFunction object. +/// \brief Non-null reference to an edge function that is guarenteed to be +/// managed by an EdgeFunction object. template class [[clang::trivial_abi]] EdgeFunctionRef final : EdgeFunctionBase { template friend class EdgeFunction; @@ -164,8 +166,8 @@ class [[clang::trivial_abi]] EdgeFunctionRef final : EdgeFunctionBase { IsCached{}; }; -/// Ref-counted and type-erased edge function with small-object optimization. -/// Supports caching. +/// \brief Ref-counted and type-erased edge function with small-object +/// optimization. Supports caching. template // -- combined copy and move assignment // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) @@ -265,8 +267,8 @@ class [[clang::trivial_abi]] EdgeFunction final : EdgeFunctionBase { explicit EdgeFunction( std::in_place_type_t /*unused*/, ArgTys &&...Args) noexcept(IsSOOCandidate> && - std::is_nothrow_constructible_v) + std::is_nothrow_constructible_v) : EdgeFunction( [](auto &&...Args) { if constexpr (IsSOOCandidate>) { diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h index 3b9034133..e6b4cd2e1 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h @@ -42,222 +42,225 @@ template class EdgeFunctions { virtual ~EdgeFunctions() = default; - // - // Also refer to FlowFunctions::getNormalFlowFunction() - // - // Describes a value computation problem along a normal (non-call, non-return) - // intra-procedural exploded supergraph edge. A normal edge function - // implementation is queried for each edge that has been generated by appling - // the flow function returned by FlowFunctions::getNormalFlowFunction(). The - // supergraph edge whose computation is requested is defined by the supergraph - // nodes CurrNode and SuccNode. - // - // Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological - // lambda fact. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getNormalFlowfunction()): - // - // f(0) -> {0} // pass the lambda (or zero fact) as identity - // f(o) -> {o, x} // generate a new fact x from o - // f(.) -> {.} // pass all other facts that hold before instruction_1 - // // as identity - // - // The above flow-function implementation corresponds to the following edges - // in the exploded supergraph. - // - // 0 o ... - // | |\ ... - // Curr := x = instruction_1 o p | | \ ... - // | | | ... - // v v v ... - // 0 o x ... - // - // Succ := y = instruction_2 q r - // - // For each edge generated by the respective flow function a normal edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getNormalEdgeFunction(0, Curr, 0 Succ); - // getNormalEdgeFunction(o, Curr, o Succ); - // getNormalEdgeFunction(o, Curr, x Succ); - // + /// + /// Also refer to FlowFunctions::getNormalFlowFunction() + /// + /// Describes a value computation problem along a normal (non-call, + /// non-return) intra-procedural exploded supergraph edge. A normal edge + /// function implementation is queried for each edge that has been generated + /// by appling the flow function returned by + /// FlowFunctions::getNormalFlowFunction(). The supergraph edge whose + /// computation is requested is defined by the supergraph nodes CurrNode and + /// SuccNode. + /// + /// Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological + /// lambda fact. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getNormalFlowfunction()): + /// \code + /// f(0) -> {0} // pass the lambda (or zero fact) as identity + /// f(o) -> {o, x} // generate a new fact x from o + /// f(.) -> {.} // pass all other facts that hold before + /// // instruction_1 as identity + /// \endcode + /// + /// The above flow-function implementation corresponds to the following edges + /// in the exploded supergraph. + /// \code + /// 0 o ... + /// | |\ ... + /// Curr := x = instruction_1 o p | | \ ... + /// | | | ... + /// v v v ... + /// 0 o x ... + /// + /// Succ := y = instruction_2 q r + /// \endcode + /// For each edge generated by the respective flow function a normal edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// + /// \code + /// getNormalEdgeFunction(0, Curr, 0 Succ); + /// getNormalEdgeFunction(o, Curr, o Succ); + /// getNormalEdgeFunction(o, Curr, x Succ); + /// \endcode virtual EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) = 0; - // - // Also refer to FlowFunctions::getCallFlowFunction() - // - // Describes a value computation problem along a call flow. A call edge - // function is queried for each edge that has been generated by applying the - // flow function that has been returned by FlowFunctions::getCallFlowFunction. - // The supergraph edge whose computation is requested is defined by the - // supergraph nodes SrcNode and DestNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getCallFlowFunction()): - // - // f(0) -> {0} // pass as identity into the callee target - // f(o) -> {q} // map actual o into formal q - // f(p) -> {r} // map actual p into formal r - // f(.) -> {} // kill all other facts that are not visible to the - // // callee target - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o p ... - // \ \ \ ... - // CallInst := x = CalleeFun(o, p, ...) \ \ +----------------+ - // \ +---------------- | - // +-------------+ + | - // ... | | | - // ... | | | - // 0 o p ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, r, ...) | | | - // v v v - // 0 q r ... - // - // start point - // - // For each edge generated by the respective flow function a call edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getCallEdgeFunction(CallInst, 0, CalleeFun, 0); - // getCallEdgeFunction(CallInst, o, CalleeFun, q); - // getCallEdgeFunction(CallInst, p, CalleeFun, r); - // + /// + /// Also refer to FlowFunctions::getCallFlowFunction() + /// + /// Describes a value computation problem along a call flow. A call edge + /// function is queried for each edge that has been generated by applying the + /// flow function that has been returned by + /// FlowFunctions::getCallFlowFunction. The supergraph edge whose computation + /// is requested is defined by the supergraph nodes SrcNode and DestNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getCallFlowFunction()): + /// \code + /// f(0) -> {0} // pass as identity into the callee target + /// f(o) -> {q} // map actual o into formal q + /// f(p) -> {r} // map actual p into formal r + /// f(.) -> {} // kill all other facts that are not visible to the + /// // callee target + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o p ... + /// \ \ \ ... + /// CallInst := x = CalleeFun(o, p, ...) \ \ +----------------+ + /// \ +---------------- | + /// +-------------+ + | + /// ... | | | + /// ... | | | + /// 0 o p ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, r, ...) | | | + /// v v v + /// 0 q r ... + /// + /// start point + /// \endcode + /// For each edge generated by the respective flow function a call edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// \code + /// getCallEdgeFunction(CallInst, 0, CalleeFun, 0); + /// getCallEdgeFunction(CallInst, o, CalleeFun, q); + /// getCallEdgeFunction(CallInst, p, CalleeFun, r); + /// \endcode virtual EdgeFunction getCallEdgeFunction(n_t CallInst, d_t SrcNode, f_t CalleeFun, d_t DestNode) = 0; - // - // Also refer to FlowFunction::getRetFlowFunction() - // - // Describes a value computation problem along a return flow. A return edge - // function implementation is queried for each edge that has been generated by - // applying the flow function that has been returned by - // FlowFunctions::getRetFlowFunction(). The supergraph edge whose computation - // is requested is defined by the supergraph nodes ExitNode and RetNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getRetFlowFunction()): - // - // f(0) -> {0} // pass as identity into the callee target - // f(r) -> {x} // map return value to lhs variable at CallSite - // f(q) -> {o} // map pointer-typed formal q to actual o - // f(.) -> {} // kill all other facts that are not visible to the - // // caller - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // - // CallSite = RetSite := x = CalleeFun(o, ...) - // +------------------+ - // +--|---------------+ | - // +--|--|------------+ | | - // v v v ... | | | - // 0 o x ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, ...) | | | - // | | | - // 0 q r - // - // ExitInst := return r - // - // For each edge generated by the respective flow function a return edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, 0, RetSite, 0); - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, q, RetSite, o); - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, r, RetSite, x); - // + /// + /// Also refer to FlowFunction::getRetFlowFunction() + /// + /// Describes a value computation problem along a return flow. A return edge + /// function implementation is queried for each edge that has been generated + /// by applying the flow function that has been returned by + /// FlowFunctions::getRetFlowFunction(). The supergraph edge whose computation + /// is requested is defined by the supergraph nodes ExitNode and RetNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getRetFlowFunction()): + /// \code + /// f(0) -> {0} // pass as identity into the callee target + /// f(r) -> {x} // map return value to lhs variable at CallSite + /// f(q) -> {o} // map pointer-typed formal q to actual o + /// f(.) -> {} // kill all other facts that are not visible to the + /// // caller + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o ... + /// + /// CallSite = RetSite := x = CalleeFun(o, ...) + /// +------------------+ + /// +--|---------------+ | + /// +--|--|------------+ | | + /// v v v ... | | | + /// 0 o x ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, ...) | | | + /// | | | + /// 0 q r + /// + /// ExitInst := return r + /// \endcode + /// For each edge generated by the respective flow function a return edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// \code + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, 0, RetSite, 0); + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, q, RetSite, o); + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, r, RetSite, x); + /// \endcode virtual EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, d_t ExitNode, n_t RetSite, d_t RetNode) = 0; - // - // Also refer to FlowFunctions::getCallToRetFlowFunction() - // - // Describes a value computation problem along data-flows alongsite a - // CallSite. A return edge function implementation is queried for each edge - // that has been generated by applying the flow function that has been - // returned by FlowFunctions::getCallToRetFlowFunction(). The supergraph edge - // whose computation is requested is defined by the supergraph nodes CallNode - // and RetSiteNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getCallToRetFlowFunction()): - // - // f(0) -> {0} // pass lambda as identity alongsite the CallSite - // f(o) -> {o} // assuming that o is passed by value, it is passed - // // alongsite the CallSite - // f(p) -> {} // assuming that p is a pointer-typed value, we need - // // to kill p, as it will be handled by the call- and - // // return-flow functions - // f(.) -> {.} // pass everything that is not involved in the call as - // // identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | | - // | +-------+ - // +--------+ | - // | | - // CallSite = RetSite := x = CalleeFun(o, p, ...) | | - // | | - // +--------+ | - // | +-------+ - // v v - // 0 o x ... - // - // For each edge generated by the respective flow function a call-to-return - // edge function is queried that describes a value computation. This results - // in the following queries: - // - // getCallToRetEdgeFunction(CallSite, 0, RetSite, 0, {CalleeFun}); - // getCallToRetEdgeFunction(CallSite, o, RetSite, o, {CalleeFun}); - // + /// + /// Also refer to FlowFunctions::getCallToRetFlowFunction() + /// + /// Describes a value computation problem along data-flows alongsite a + /// CallSite. A return edge function implementation is queried for each edge + /// that has been generated by applying the flow function that has been + /// returned by FlowFunctions::getCallToRetFlowFunction(). The supergraph edge + /// whose computation is requested is defined by the supergraph nodes CallNode + /// and RetSiteNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getCallToRetFlowFunction()): + /// \code + /// f(0) -> {0} // pass lambda as identity alongsite the CallSite + /// f(o) -> {o} // assuming that o is passed by value, it is passed + /// // alongsite the CallSite + /// f(p) -> {} // assuming that p is a pointer-typed value, we need + /// // to kill p, as it will be handled by the call- and + /// // return-flow functions + /// f(.) -> {.} // pass everything that is not involved in the call + /// // as identity + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o ... + /// | | + /// | +-------+ + /// +--------+ | + /// | | + /// CallSite = RetSite := x = CalleeFun(o, p, ...) | | + /// | | + /// +--------+ | + /// | +-------+ + /// v v + /// 0 o x ... + /// \endcode + /// For each edge generated by the respective flow function a call-to-return + /// edge function is queried that describes a value computation. This results + /// in the following queries: + /// + /// getCallToRetEdgeFunction(CallSite, 0, RetSite, 0, {CalleeFun}); + /// getCallToRetEdgeFunction(CallSite, o, RetSite, o, {CalleeFun}); + /// virtual EdgeFunction getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, llvm::ArrayRef Callees) = 0; - // - // Also refer to FlowFunction::getSummaryFlowFunction() - // - // Describes a value computation problem along a summary data flow. A summary - // edge function implementation is queried for each edge that has been - // generated by FlowFunctions::getSummaryFlowFunction(). The supergraph edge - // whose computation is requested is defined by the supergraph nodes CurrNode - // and SuccNode. - // - // The default implementation returns a nullptr to indicate that the mechanism - // should not be used. - // + /// + /// Also refer to FlowFunction::getSummaryFlowFunction() + /// + /// Describes a value computation problem along a summary data flow. A summary + /// edge function implementation is queried for each edge that has been + /// generated by FlowFunctions::getSummaryFlowFunction(). The supergraph edge + /// whose computation is requested is defined by the supergraph nodes CurrNode + /// and SuccNode. + /// + /// The default implementation returns a nullptr to indicate that the + /// mechanism should not be used. + /// virtual EdgeFunction getSummaryEdgeFunction(n_t /*Curr*/, d_t /*CurrNode*/, n_t /*Succ*/, diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 2f3fa53f5..6e54fe393 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -36,9 +36,9 @@ namespace psr { // FlowFunction Class //===----------------------------------------------------------------------===// -// -// This class models a flow function for distributive data-flow problems. -// +/// +/// This class models a flow function for distributive data-flow problems. +/// template > class FlowFunction { static_assert(std::is_same::value, "Container values needs to be the same as D"); @@ -52,17 +52,17 @@ template > class FlowFunction { virtual ~FlowFunction() = default; - // - // This function is called for each data-flow fact Source that holds before - // the instruction under analysis. The return value is a (potentially empty) - // set of data-flow facts that are generated from Source and hold after the - // instruction under analysis. In other words: the function describes what - // exploded supergraph edges have to be "drawn". - // - // Please also refer to the various flow function factories of the - // FlowFunctions interface: FlowFunctions::get*FlowFunction() for more - // details. - // + /// + /// This function is called for each data-flow fact Source that holds before + /// the instruction under analysis. The return value is a (potentially empty) + /// set of data-flow facts that are generated from Source and hold after the + /// instruction under analysis. In other words: the function describes what + /// exploded supergraph edges have to be "drawn". + /// + /// Please also refer to the various flow function factories of the + /// FlowFunctions interface: FlowFunctions::get*FlowFunction() for more + /// details. + /// virtual container_type computeTargets(D Source) = 0; }; @@ -153,13 +153,13 @@ template class FlowFunctionTemplates { /// dataflow-facts x, f(x) = {x}. /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x1 x1 x3 ... /// | | | ... /// id-instruction | | | ... /// v v v ... /// x1 x2 x3 ... - /// + /// \endcode static auto identityFlow() { struct IdFF final : public FlowFunction { container_type computeTargets(d_t Source) override { @@ -178,14 +178,14 @@ template class FlowFunctionTemplates { /// dataflow-facts x, f(x) = F(x). /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x /// | /// inst F /// / / | \ \ ... /// v v v v v /// x1 x2 x x3 x4 - /// + /// \endcode template static auto lambdaFlow(Fn &&F) { struct LambdaFlow final : public FlowFunction { LambdaFlow(Fn &&F) : Flow(std::forward(F)) {} @@ -208,21 +208,22 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateFlow(v, w), then for all incoming /// dataflow facts x: + /// \code /// f(w) = {v, w}, /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ | ... /// inst | | \ | ... /// v v v v ... /// x w v u - /// + /// \endcode /// \note If the FactToGenerate already holds at the beginning of the /// statement, this flow function does not kill it. For IFDS analysis it makes /// no difference, but in the case of IDE, the corresponding edge functions - /// are being joined together potentially lowing precition. If that is an + /// are being joined together potentially lowering precision. If that is an /// issue, use transferFlow instead. static auto generateFlow(d_t FactToGenerate, d_t From) { struct GenFrom final : public FlowFunction { @@ -250,9 +251,10 @@ template class FlowFunctionTemplates { /// /// So, given a flow function f = generateFlowIf(v, p), for all incoming /// dataflow facts x: + /// \code /// f(x) = {v, x} if p(x) == true /// f(x) = {x} else. - /// + /// \endcode template >> static auto generateFlowIf(d_t FactToGenerate, Fn Predicate) { @@ -281,17 +283,18 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateManyFlows({v1, v2, ..., vN}, w), for all /// incoming dataflow facts x: + /// \code /// f(w) = {v1, v2, ..., vN, w} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ \ ... \ | ... /// inst | | \ \ ... \ | ... /// v v v v ... \ v ... /// x w v1 v2 ... vN u - /// + /// \endcode template , typename = std::enable_if_t>> static auto generateManyFlows(Range &&FactsToGenerate, d_t From) { @@ -324,17 +327,18 @@ template class FlowFunctionTemplates { /// (FactToKill). /// /// Given a flow function f = killFlow(v), for all incoming dataflow facts x: + /// \code /// f(v) = {} /// f(x) = {x} - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// u v w ... /// | | | /// inst | | /// v v /// u v w ... - /// + /// \endcode static auto killFlow(d_t FactToKill) { struct KillFlow final : public FlowFunction { KillFlow(d_t KillValue) : KillValue(std::move(KillValue)) {} @@ -355,9 +359,10 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = killFlowIf(p), for all incoming dataflow facts /// x: + /// \code /// f(x) = {} if p(x) == true /// f(x) = {x} else. - /// + /// \endcode template >> static auto killFlowIf(Fn Predicate) { @@ -382,20 +387,21 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = killManyFlows({v1, v2, ..., vN}), for all /// incoming dataflow facts x: + /// \code /// f(v1) = {} /// f(v2) = {} /// ... /// f(vN) = {} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// u v1 v2 ... vN w ... /// | | | | | /// inst | | /// v v /// u v1 v2 ... vN w ... - /// + /// \endcode template , typename = std::enable_if_t>> static auto killManyFlows(Range &&FactsToKill) { @@ -419,8 +425,9 @@ template class FlowFunctionTemplates { /// A flow function that stops propagating *all* incoming dataflow facts. /// /// Given a flow function f = killAllFlows(), for all incoming dataflow facts + /// \code /// x, f(x) = {}. - /// + /// \endcode static auto killAllFlows() { struct KillAllFF final : public FlowFunction { Container computeTargets(d_t /*Source*/) override { return Container(); } @@ -440,20 +447,21 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateFlowAndKillAllOthers(v, w), for all /// incoming dataflow facts x: + /// \code /// f(w) = {v, w} /// f(x) = {}. - /// + /// \endcode /// Equivalent to: killFlowIf(λz.z!=w) o generateFlow(v, w) (where o denotes /// function composition) /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ | /// inst | \ ... /// v v /// x w v u - /// + /// \endcode static auto generateFlowAndKillAllOthers(d_t FactToGenerate, d_t From) { struct GenFlowAndKillAllOthers final : public FlowFunction { @@ -481,17 +489,18 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateManyFlowsAndKillAllOthers({v1, v2, ..., /// vN}, w), for all incoming dataflow facts x: + /// \code /// f(w) = {v1, v2, ..., vN, w} /// f(x) = {}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ \ ... \ | ... /// inst | \ \ ... \ ... /// v v v ... \ ... /// x w v1 v2 ... vN u - /// + /// \endcode template , typename = std::enable_if_t>> static auto generateManyFlowsAndKillAllOthers(Range &&FactsToGenerate, @@ -533,19 +542,20 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = transferFlow(v, w), for all incoming dataflow /// facts x: + /// \code /// f(v) = {} /// f(w) = {v, w} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w v u ... /// | |\ | | ... /// | | \ | ... /// inst | | \ | ... /// v v v v ... /// x w v u - /// + /// \endcode static auto transferFlow(d_t FactToGenerate, d_t From) { struct TransferFlow final : public FlowFunction { TransferFlow(d_t GenValue, d_t FromValue) @@ -575,8 +585,9 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = unionFlows(g, h), for all incoming dataflow /// facts x: + /// \code /// f(x) = g(x) u h(x). (where u denotes set-union) - /// + /// \endcode template && @@ -635,242 +646,239 @@ class FlowFunctions virtual ~FlowFunctions() = default; - // - // Describes the effects of the current instruction, i.e. data-flows, along - // normal (non-call, non-return) instructions. Analysis writers are free to - // inspect the successor instructions, too, as a lookahead. - // - // Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological - // lambda fact. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds before the current statement under analysis. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each combination of input fact d_i (given as an input to - // f) and respective output facts (f(d_i)) represents an edge that must be - // "drawn" to construct the exploded supergraph for the analysis problem to be - // solved. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before instruction_1. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass the lambda (or zero fact) as identity - // f(o) -> {o, x} // generate a new fact x from o - // f(.) -> {.} // pass all other facts that hold before instruction_1 - // // as identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | |\ ... - // x = instruction_1 o p | | \ ... - // | | | ... - // v v v ... - // 0 o x ... - // - // y = instruction_2 q r - // + /// + /// Describes the effects of the current instruction, i.e. data-flows, along + /// normal (non-call, non-return) instructions. Analysis writers are free to + /// inspect the successor instructions, too, as a lookahead. + /// + /// Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological + /// lambda fact. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds before the current statement under analysis. f's return type is + /// a set of (target) facts that have to be generated from the source fact d_i + /// by the data-flow solver. Each combination of input fact d_i (given as an + /// input to f) and respective output facts (f(d_i)) represents an edge that + /// must be "drawn" to construct the exploded supergraph for the analysis + /// problem to be solved. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before instruction_1. + /// We assume that f is implemented to produce the following outputs. + /// \code + /// f(0) -> {0} // pass the lambda (or zero fact) as identity + /// f(o) -> {o, x} // generate a new fact x from o + /// f(.) -> {.} // pass all other facts that hold before + /// // instruction_1 as identity + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o ... + /// | |\ ... + /// x = instruction_1 o p | | \ ... + /// | | | ... + /// v v v ... + /// 0 o x ... + /// + /// y = instruction_2 q r + /// \endcode virtual FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) = 0; - // - // Handles call flows: describes the effects of a function call at callInst - // to the callee target destFun. If a call instruction has multiple callee - // targets, for instance, because it is an indirect function call that cannot - // be analyzed precisely in a static manner, the call flow function will be - // queried for each callee target. - // - // This flow function usually handles parameter passing and maps actual to - // formal parameters. If an analysis writer does not wish to analyze a given - // callee target they can return a flow function implementation that kills all - // data-flow facts (e.g. KillAll) such that call is not followed. A commonly - // used trick to model the effects of functions that are not present (e.g. - // library functions such as malloc(), free(), etc.) is to kill all facts at - // the call to the respective target and plugin the semantics in the - // call-to-return flow function. In the call-to-return flow function, an - // analysis writer can check if the function of interest is one of the - // possible targets and then, return a flow function implementation that - // describes the special semantics of that function call. - // - // Let start_point be the starting point of the callee target CalleeFun. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the CallInst. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold before - // start_point. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before CallInst. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass as identity into the callee target - // f(o) -> {q} // map actual o into formal q - // f(p) -> {r} // map actual p into formal r - // f(.) -> {} // kill all other facts that are not visible to the - // // callee target - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o p ... - // \ \ \ ... - // x = CalleeFun(o, p, ...) \ \ +----------------+ - // \ +---------------- | - // +-------------+ + | - // ... | | | - // ... | | | - // 0 o p ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, r, ...) | | | - // v v v - // 0 q r ... - // - // start point - // + /// + /// Handles call flows: describes the effects of a function call at callInst + /// to the callee target destFun. If a call instruction has multiple callee + /// targets, for instance, because it is an indirect function call that cannot + /// be analyzed precisely in a static manner, the call flow function will be + /// queried for each callee target. + /// + /// This flow function usually handles parameter passing and maps actual to + /// formal parameters. If an analysis writer does not wish to analyze a given + /// callee target they can return a flow function implementation that kills + /// all data-flow facts (e.g. KillAll) such that call is not followed. A + /// commonly used trick to model the effects of functions that are not present + /// (e.g. library functions such as malloc(), free(), etc.) is to kill all + /// facts at the call to the respective target and plugin the semantics in the + /// call-to-return flow function. In the call-to-return flow function, an + /// analysis writer can check if the function of interest is one of the + /// possible targets and then, return a flow function implementation that + /// describes the special semantics of that function call. + /// + /// Let start_point be the starting point of the callee target CalleeFun. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the CallInst. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold before start_point. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before CallInst. We + /// assume that f is implemented to produce the following outputs. + /// \code + /// f(0) -> {0} // pass as identity into the callee target + /// f(o) -> {q} // map actual o into formal q + /// f(p) -> {r} // map actual p into formal r + /// f(.) -> {} // kill all other facts that are not visible to the + /// // callee target + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o p ... + /// \ \ \ ... + /// x = CalleeFun(o, p, ...) \ \ +----------------+ + /// \ +---------------- | + /// +-------------+ + | + /// ... | | | + /// ... | | | + /// 0 o p ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, r, ...) | | | + /// v v v + /// 0 q r ... + /// + /// start point + /// \endcode virtual FlowFunctionPtrType getCallFlowFunction(n_t CallInst, f_t CalleeFun) = 0; - // - // Handles return flows: describes the data-flows from an ExitInst to the - // corresponding RetSite. - // - // This flow function usually handles the returned value of the callee target - // as well as the parameter mapping back to the caller of CalleeFun for - // pointer parameters as modifications made by CalleeFun are visible to the - // caller. Data-flow facts that are not returned or escape via function - // pointer parameters (or global variables) are usually killed. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the ExitInst. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold after - // CallSite. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before ExitInst. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass as identity into the callee target - // f(r) -> {x} // map return value to lhs variable at CallSite - // f(q) -> {o} // map pointer-typed formal q to actual o - // f(.) -> {} // kill all other facts that are not visible to the - // // caller - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // - // x = CalleeFun(o, ...) - // +------------------+ - // +--|---------------+ | - // +--|--|------------+ | | - // v v v ... | | | - // 0 o x ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, ...) | | | - // | | | - // 0 q r ... - // - // return r - // + /// + /// Handles return flows: describes the data-flows from an ExitInst to the + /// corresponding RetSite. + /// + /// This flow function usually handles the returned value of the callee target + /// as well as the parameter mapping back to the caller of CalleeFun for + /// pointer parameters as modifications made by CalleeFun are visible to the + /// caller. Data-flow facts that are not returned or escape via function + /// pointer parameters (or global variables) are usually killed. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the ExitInst. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold after CallSite. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before ExitInst. We + /// assume that f is implemented to produce the following outputs. + /// \code + /// f(0) -> {0} // pass as identity into the callee target + /// f(r) -> {x} // map return value to lhs variable at CallSite + /// f(q) -> {o} // map pointer-typed formal q to actual o + /// f(.) -> {} // kill all other facts that are not visible to the + /// // caller + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o ... + /// + /// x = CalleeFun(o, ...) + /// +------------------+ + /// +--|---------------+ | + /// +--|--|------------+ | | + /// v v v ... | | | + /// 0 o x ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, ...) | | | + /// | | | + /// 0 q r ... + /// + /// return r + /// \endcode virtual FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, n_t RetSite) = 0; - // Performs any side-effects of a return-flow-function - // - // In case of unbalanced returns (if the option `followReturnsPastSeeds` is - // activated in the IfdsIdeSolverConfig), we will eventually reach a function - // that is not called from other functions. Still, we may want to apply a - // return-flow-function -- just for its side-effects, such as registering a - // taint + /// Performs any side-effects of a return-flow-function + /// + /// In case of unbalanced returns (if the option `followReturnsPastSeeds` is + /// activated in the IfdsIdeSolverConfig), we will eventually reach a function + /// that is not called from other functions. Still, we may want to apply a + /// return-flow-function -- just for its side-effects, such as registering a + /// taint virtual void applyUnbalancedRetFlowFunctionSideEffects(f_t CalleeFun, n_t ExitInst, d_t Source) { // By default, do nothing } - // - // Describes the data-flows alongsite a CallSite. - // - // This flow function usually passes all data-flow facts that are not involved - // in the function call alongsite the CallSite. Data-flow facts that are not - // actual parameters or passed by value, modifications to those within a - // callee are not visible in the caller context, are mostly passed as - // identity. The call-to-return flow function may also be used to describe - // special semantics (cf. getCallFlowFunction()). - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the CallSite. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold after - // CallSite. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before CallSite. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass lambda as identity alongsite the CallSite - // f(o) -> {o} // assuming that o is passed by value, it is passed - // // alongsite the CallSite - // f(p) -> {} // assuming that p is a pointer-typed value, we need - // // to kill p, as it will be handled by the call- and - // // return-flow functions - // f(.) -> {.} // pass everything that is not involved in the call as - // // identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | | - // | +-------+ - // +--------+ | - // | | - // x = CalleeFun(o, p, ...) | | - // | | - // +--------+ | - // | +-------+ - // v v - // 0 o x ... - // + /// + /// Describes the data-flows alongsite a CallSite. + /// + /// This flow function usually passes all data-flow facts that are not + /// involved in the function call alongsite the CallSite. Data-flow facts that + /// are not actual parameters or passed by value, modifications to those + /// within a callee are not visible in the caller context, are mostly passed + /// as identity. The call-to-return flow function may also be used to describe + /// special semantics (cf. getCallFlowFunction()). + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the CallSite. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold after CallSite. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before CallSite. We + /// assume that f is implemented to produce the following outputs. + /// \code + /// f(0) -> {0} // pass lambda as identity alongsite the CallSite + /// f(o) -> {o} // assuming that o is passed by value, it is passed + /// // alongsite the CallSite + /// f(p) -> {} // assuming that p is a pointer-typed value, we need + /// // to kill p, as it will be handled by the call- and + /// // return-flow functions + /// f(.) -> {.} // pass everything that is not involved in the call + /// // as identity + /// \endcode + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// \code + /// 0 o ... + /// | | + /// | +-------+ + /// +--------+ | + /// | | + /// x = CalleeFun(o, p, ...) | | + /// | | + /// +--------+ | + /// | +-------+ + /// v v + /// 0 o x ... + /// \endcode virtual FlowFunctionPtrType getCallToRetFlowFunction(n_t CallSite, n_t RetSite, llvm::ArrayRef Callees) = 0; - // - // May be used to encode special sementics of a given callee target (whose - // call should not be directly followed by the data-flow solver) similar to - // the getCallFlowFunction() --> getCallToRetFlowFunction() trick (cf. - // getCallFlowFunction()). - // - // The default implementation returns a nullptr to indicate that the mechanism - // should not be used. - // + /// + /// May be used to encode special sementics of a given callee target (whose + /// call should not be directly followed by the data-flow solver) similar to + /// the getCallFlowFunction() --> getCallToRetFlowFunction() trick (cf. + /// getCallFlowFunction()). + /// + /// The default implementation returns a nullptr to indicate that the + /// mechanism should not be used. + /// virtual FlowFunctionPtrType getSummaryFlowFunction(n_t /*Curr*/, f_t /*CalleeFun*/) { return nullptr; } }; -} // namespace psr +} // namespace psr #endif diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index 57120a439..914749a5d 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -53,6 +53,12 @@ class AllTopFnProvider< } }; +/// \brief The analysis problem interface for IDE problems (solvable by the +/// IDESolver). Subclass this and override all pure-virtual functions to create +/// your own IDE analysis. +/// +/// For more information on how to write an IDE analysis, see [Writing an IDE +/// Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IDE-analysis) template > class IDETabulationProblem : public FlowFunctions, @@ -73,6 +79,16 @@ class IDETabulationProblem : public FlowFunctions, using ConfigurationTy = HasNoConfigurationType; + /// Takes an IR data base (IRDB) and collects information from it to create a + /// tabulation problem. + /// @param[in] IRDB The project IR data base, that holds the code under + /// analysis + /// @param[in] EntryPoints The (mangled) names of all entry functions of the + /// project, given as a vector of strings. An example would simply be + /// `{"main"}`. To set every function as entry point, pass `"__ALL__"` + /// @param[in] ZeroValue Provides the special tautological zero value (aka. + /// Λ). If not provided here, you must set it via \link initializeZeroValue() + /// \endlink. explicit IDETabulationProblem( const ProjectIRDBBase *IRDB, std::vector EntryPoints, std::optional diff --git a/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h b/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h index 71dcba326..8f2046fcb 100644 --- a/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h +++ b/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h @@ -39,22 +39,40 @@ enum class SolverConfigOptions : uint32_t { All = ~0U }; +/// \brief Configuration options for the solving process of IFDS/IDE problems struct IFDSIDESolverConfig { IFDSIDESolverConfig() noexcept = default; IFDSIDESolverConfig(SolverConfigOptions Options) noexcept; + /// Returns whether the solver should handle unbalanced returns (default: + /// false) [[nodiscard]] bool followReturnsPastSeeds() const; + /// Returns whether the solver should automatically insert an identityFlow + /// propagation for the special zero value (default: true) [[nodiscard]] bool autoAddZero() const; + /// Returns whether the IDE solver should perform IDE's phase 2 (default: + /// true). You may want to turn this off for IFDS analyses. [[nodiscard]] bool computeValues() const; + /// Returns, whether the solver should record all ESG edges (default: false) + /// \note This option may severly hurt the solver's performance [[nodiscard]] bool recordEdges() const; + /// Returns, whether the solver should emit the ESG as DOT graph on the + /// command-line (default: false) [[nodiscard]] bool emitESG() const; + /// Currently unused [[nodiscard]] bool computePersistedSummaries() const; + /// \see followReturnsPastSeeds void setFollowReturnsPastSeeds(bool Set = true); + /// \see autoAddZero void setAutoAddZero(bool Set = true); + /// \see computeValues void setComputeValues(bool Set = true); + /// \see recordEdges void setRecordEdges(bool Set = true); + /// \see emitESG void setEmitESG(bool Set = true); + /// \see computePersistedSummaries void setComputePersistedSummaries(bool Set = true); void setConfig(SolverConfigOptions Opt); diff --git a/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h index cbafa68c3..bf5263a2a 100644 --- a/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h @@ -19,6 +19,12 @@ namespace psr { +/// \brief The analysis problem interface for IFDS problems (solvable by the +/// IFDSSolver). Subclass this and override all pure-virtual functions to create +/// your own IFDS analysis. +/// +/// For more information on how to write an IFDS analysis, see [Writing an IFDS +/// Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IFDS-analysis) template > class IFDSTabulationProblem @@ -38,6 +44,16 @@ class IFDSTabulationProblem using typename Base::t_t; using typename Base::v_t; + /// Takes an IR data base (IRDB) and collects information from it to create a + /// tabulation problem. + /// @param[in] IRDB The project IR data base, that holds the code under + /// analysis + /// @param[in] EntryPoints The (mangled) names of all entry functions of the + /// project, given as a vector of strings. An example would simply be + /// `{"main"}`. To set every function as entry point, pass `"__ALL__"` + /// @param[in] ZeroValue Provides the special tautological zero value (aka. + /// Λ). + /// \endlink. explicit IFDSTabulationProblem(const ProjectIRDBBase *IRDB, std::vector EntryPoints, d_t ZeroValue) diff --git a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h index db7ea654e..1a0d18526 100644 --- a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h +++ b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h @@ -22,6 +22,9 @@ namespace psr { +/// \brief Represent the starting points of the analysi. +/// +/// The initial facts that should hold at the entry points. template class InitialSeeds { public: using GeneralizedSeeds = std::map>; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h index 7d357a0a0..d0f9472d9 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h @@ -18,6 +18,10 @@ namespace psr { template class Compressor; +/// \brief A utility class that assigns a sequential Id to every inserted +/// object. +/// +/// This specialization handles types that can be efficiently passed by value template class Compressor>> { public: @@ -61,6 +65,10 @@ class Compressor>> { llvm::SmallVector FromInt; }; +/// \brief A utility class that assigns a sequential Id to every inserted +/// object. +/// +/// This specialization handles types that cannot be efficiently passed by value template class Compressor>> { public: @@ -69,6 +77,9 @@ class Compressor>> { ToInt.reserve(Capacity); } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present yet, it will be added first and its index will + /// then be returned. uint32_t getOrInsert(const T &Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; @@ -79,6 +90,9 @@ class Compressor>> { return Ret; } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present yet, it will be added first and its index will + /// then be returned. uint32_t getOrInsert(T &&Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; @@ -89,6 +103,8 @@ class Compressor>> { return Ret; } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present, std::nullopt will be returned std::optional getOrNull(const T &Elem) const { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h index a0f42d821..d38ecb7f8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h @@ -137,6 +137,10 @@ template struct FlowFunctionCacheBase { } // namespace detail +/// This class caches flow and edge functions to avoid their reconstruction. +/// When a flow or edge function must be applied to multiple times, a cached +/// version is used if existend, otherwise a new one is created and inserted +/// into the cache. This class is used within both IDE solver implementations. template class FlowFunctionCache : detail::FlowFunctionCacheBase, diff --git a/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h index 744c8e7e3..20ff8b6c3 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h @@ -21,7 +21,7 @@ namespace psr { /// XXX (#734): When upgrading to C++20, create a concept checking valid /// SolverResults types -/// A type-erased version of the main functionality of SolverResults. +/// \brief A type-erased version of the main functionality of SolverResults. /// Can be accepted by consumers that don't need deep access to the internals /// (so, the usual ones). As we have now two kinds of solver-results /// (SolverResults and IdBasedSolverResults), we need a common way of accessing diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index e6423bab0..cfebf428e 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -27,6 +27,14 @@ namespace psr { +/// \brief Solves the given IFDSTabulationProblem as described in the 1995 paper +/// by Reps, Horwitz and Sagiv. To solve the problem, call solve(). Results can +/// then be queried by using resultAt() and resultsAt(). +/// +/// \note PhASAR implements IFDS in terms of IDE, so in case you do not need the +/// raw SolverResults, for maximum performance you should use +/// IFDSIDESolverConfig#setComputeValues(bool) to disable IDE's +/// phase 2. template > class IFDSSolver diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h index bab1509d0..68ee40cb8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h @@ -50,7 +50,7 @@ namespace psr { /// This solver implements the optimizations and the $JF_N$ layout from the /// paper "Scaling Interprocedural Static Data-Flow Analysis to Large C/C++ /// Applications: An Experience Report" -/// (https://doi.org/10.4230/LIPIcs.ECOOP.2024.36) by Schiebel, Sattler, +/// () by Schiebel, Sattler, /// Schubert, Apel, and Bodden. template > diff --git a/include/phasar/DataFlow/IfdsIde/SolverResults.h b/include/phasar/DataFlow/IfdsIde/SolverResults.h index 664d0e111..51b8b3ce9 100644 --- a/include/phasar/DataFlow/IfdsIde/SolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/SolverResults.h @@ -46,11 +46,22 @@ class SolverResultsBase { using d_t = D; using l_t = L; + /// Returns the result that the IDE analysis computed for the fact Node right + /// after the statement Stmt. + /// + /// A default-constructed l_t, if no analysis result was computed at this + /// point. [[nodiscard]] ByConstRef resultAt(ByConstRef Stmt, ByConstRef Node) const { return self().Results.get(Stmt, Node); } + /// Returns the results that the IDE analysis computed right after the + /// statement Stmt. + /// + /// \param Stmt The statement, where the analysis results are requested + /// \param StripZero Whether the special zero value should be stripped from + /// the result. [[nodiscard]] std::unordered_map resultsAt(ByConstRef Stmt, bool StripZero) const { std::unordered_map Result = self().Results.row(Stmt); @@ -60,19 +71,26 @@ class SolverResultsBase { return Result; } + /// Returns the results that the IDE analysis computed right after the + /// statement Stmt. + /// + /// Does not strip the special zero value from the result. [[nodiscard]] const std::unordered_map & resultsAt(ByConstRef Stmt) const { return self().Results.row(Stmt); } + /// The internal representation of this SolverResults object. [[nodiscard]] const auto &rowMapView() const { return self().Results.rowMapView(); } + /// Whether the analysis has computed any results for the statement Stmt. [[nodiscard]] bool containsNode(ByConstRef Stmt) const { return self().Results.containsRow(Stmt); } + /// Similar to resultsAt(ByConstRef). [[nodiscard]] const auto &row(ByConstRef Stmt) const { return self().Results.row(Stmt); } @@ -247,12 +265,12 @@ class OwningSolverResults D ZV) noexcept(std::is_nothrow_move_constructible_v) : Results(std::move(ResTab)), ZV(std::move(ZV)) {} - [[nodiscard]] SolverResults get() const &noexcept { + [[nodiscard]] SolverResults get() const & noexcept { return {Results, ZV}; } SolverResults get() && = delete; - [[nodiscard]] operator SolverResults() const &noexcept { + [[nodiscard]] operator SolverResults() const & noexcept { return get(); } diff --git a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h index 41cb4d58e..f7cbd0cfc 100644 --- a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h +++ b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h @@ -45,9 +45,9 @@ class [[deprecated("This ancient API is not maintained and should not be used " std::map> SpecialEdgeFunctions; std::vector SpecialFunctionNames; - // Constructs the SpecialSummaryMap such that it contains all glibc, - // llvm.intrinsics and C++'s new, new[], delete, delete[] with identity - // flow functions. + /// Constructs the SpecialSummaryMap such that it contains all glibc, + /// llvm.intrinsics and C++'s new, new[], delete, delete[] with identity + /// flow functions. SpecialSummaries() { // insert default flow and edge functions for (const auto &FunctionName : @@ -71,7 +71,7 @@ class [[deprecated("This ancient API is not maintained and should not be used " return Instance; } - // Returns true, when an existing function is overwritten, false otherwise. + /// Returns true, when an existing function is overwritten, false otherwise. bool provideSpecialSummary(const std::string &Name, FlowFunctionPtrType FlowFunc) { bool Override = containsSpecialSummary(Name); @@ -79,7 +79,7 @@ class [[deprecated("This ancient API is not maintained and should not be used " return Override; } - // Returns true, when an existing function is overwritten, false otherwise. + /// Returns true, when an existing function is overwritten, false otherwise. bool provideSpecialSummary(const std::string &Name, FlowFunctionPtrType FlowFunc, std::shared_ptr> EdgeFunc) { @@ -97,8 +97,8 @@ class [[deprecated("This ancient API is not maintained and should not be used " return SpecialFlowFunctions.count(Name); } - FlowFunctionPtrType getSpecialFlowFunctionSummary( - const llvm::Function *Func) { + FlowFunctionPtrType + getSpecialFlowFunctionSummary(const llvm::Function *Func) { return getSpecialFlowFunctionSummary(Func->getName()); } @@ -106,18 +106,18 @@ class [[deprecated("This ancient API is not maintained and should not be used " return SpecialFlowFunctions[Name]; } - std::shared_ptr> getSpecialEdgeFunctionSummary( - const llvm::Function *Func) { + std::shared_ptr> + getSpecialEdgeFunctionSummary(const llvm::Function *Func) { return getSpecialEdgeFunctionSummary(Func->getName()); } - std::shared_ptr> getSpecialEdgeFunctionSummary( - const std::string &Name) { + std::shared_ptr> + getSpecialEdgeFunctionSummary(const std::string &Name) { return SpecialEdgeFunctions[Name]; } - friend llvm::raw_ostream &operator<<( - llvm::raw_ostream &OS, const SpecialSummaries &SpecialSumms) { + friend llvm::raw_ostream & + operator<<(llvm::raw_ostream &OS, const SpecialSummaries &SpecialSumms) { OS << "SpecialSummaries:\n"; for (auto &Entry : SpecialSumms.SpecialFunctionNames) { OS << Entry.first << " "; diff --git a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h index 098c4ff21..c7326729b 100644 --- a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h +++ b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h @@ -13,6 +13,10 @@ namespace psr { +/// Stores a call-string context that can be used in interprocedural monotone +/// analysis to achieve (limited) context sensitivity. +/// @tparam N Type of the call-string elements. +/// @tparam K Maximal length the call string can have. template class CallStringCTX { protected: std::deque CallString; diff --git a/include/phasar/DataFlow/Mono/InterMonoProblem.h b/include/phasar/DataFlow/Mono/InterMonoProblem.h index 9ee1b3298..e4a345a14 100644 --- a/include/phasar/DataFlow/Mono/InterMonoProblem.h +++ b/include/phasar/DataFlow/Mono/InterMonoProblem.h @@ -31,6 +31,9 @@ namespace psr { template class TypeHierarchy; template class ICFG; +/// \brief The analysis problem interface for interprocedural monotone problems +/// (solvable by the InterMonoSolver). Subclass this and override all +/// pure-virtual functions to create your own inter-mono analysis. template class InterMonoProblem : public IntraMonoProblem { public: @@ -47,6 +50,14 @@ class InterMonoProblem : public IntraMonoProblem { const i_t *ICF; public: + /// An interprocedural monotone problem generated from an intermediate + /// representation, a type hierarchy of said representation, a control flow + /// graph, points-to information and optionally a vector of entry points. + /// @param[in] IRDB A project IR data base. + /// @param[in] TH A type hierarchy based on the given IRDB. + /// @param[in] CF A control flow graph based on the given IRDB. + /// @param[in] PT Points-to information based on the given IRDB. + /// @param[in] EntryPoints A vector of entry points. Provide at least one. InterMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const i_t *ICF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/IntraMonoProblem.h b/include/phasar/DataFlow/Mono/IntraMonoProblem.h index 4915cfe4f..69aca7ede 100644 --- a/include/phasar/DataFlow/Mono/IntraMonoProblem.h +++ b/include/phasar/DataFlow/Mono/IntraMonoProblem.h @@ -35,6 +35,9 @@ struct HasNoConfigurationType; template class TypeHierarchy; template class CFG; +/// \brief The analysis problem interface for intraprocedural monotone problems +/// (solvable by the IntraMonoSolver). Subclass this and override all +/// pure-virtual functions to create your own mono analysis. template class IntraMonoProblem { public: using n_t = typename AnalysisDomainTy::n_t; @@ -58,10 +61,16 @@ template class IntraMonoProblem { [[maybe_unused]] Soundness S = Soundness::Soundy; public: - // denote that a problem does not require a configuration (type/file) - // a user problem can override the type of configuration to be used, if any using ConfigurationTy = HasNoConfigurationType; + /// An intraprocedural monotone problem generated from an intermediate + /// representation, a type hierarchy of said representation, a control flow + /// graph, points-to information and optionally a vector of entry points. + /// @param[in] IRDB A project IR data base. + /// @param[in] TH A type hierarchy based on the given IRDB. + /// @param[in] CF A control flow graph based on the given IRDB. + /// @param[in] PT Points-to information based on the given IRDB. + /// @param[in] EntryPoints A vector of entry points. Provide at least one. IntraMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const CFGBase *CF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h index 0be77db3d..b0abf8835 100644 --- a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h @@ -27,6 +27,12 @@ namespace psr { +/// \brief A solver class for interprocedual monotone problems (derived from +/// InterMonoProblem). To solve the problem, call solve(). +/// +/// \tparam AnalysisDomainTy type of the analysis domain. +/// \tparam K An unsigned integer used as the maximum length for call-string +/// contexts. template class InterMonoSolver { public: using ProblemTy = InterMonoProblem; diff --git a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h index 739b6fa30..ed5b1f449 100644 --- a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h @@ -29,6 +29,10 @@ namespace psr { +/// \brief A solver class for intraprocedual monotone problems. To solve the +/// problem, call solve(). +/// +/// \tparam AnalysisDomainTy type of the analysis domain. template class IntraMonoSolver { public: using ProblemTy = IntraMonoProblem; diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h index d781d127f..b97393231 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h @@ -28,6 +28,8 @@ namespace psr { +/// \brief A utility class that allows path-reconstruction for IFDS/IDE solver +/// results. template class PathSensitivityManager : public PathSensitivityManagerBase, diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h index c92c35aae..74ed9e1c5 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h @@ -67,6 +67,11 @@ class PathSensitivityManagerMixin { } public: + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts in FactsRange holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename FactsRangeTy, typename ConfigTy, typename Filter = DefaultPathTracingFilter, @@ -147,6 +152,11 @@ class PathSensitivityManagerMixin { return Dag; } + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename L, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> @@ -159,6 +169,11 @@ class PathSensitivityManagerMixin { return pathsDagToAll(std::move(Inst), FactsRange, Config, PFilter); } + /// Reconstruct the combined control- and data-flow paths the lead to the + /// given data-flow fact Fact holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> @@ -171,6 +186,11 @@ class PathSensitivityManagerMixin { PFilter); } + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts in FactsRange holding at Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> diff --git a/include/phasar/Domain/AnalysisDomain.h b/include/phasar/Domain/AnalysisDomain.h index bf79fe797..a4622695a 100644 --- a/include/phasar/Domain/AnalysisDomain.h +++ b/include/phasar/Domain/AnalysisDomain.h @@ -14,30 +14,31 @@ namespace psr { -// AnalysisDomain - This class should be specialized by different static -// analyses types... which is why the default version declares all analysis -// domains as aliases of void. -// -// Virtually all of PhASAR's internal analyses are implemented in a generic way -// using interfaces and template parameters. In order to specify concrete types -// for the template parameters such that an analysis can compute some useful -// information on some concrete target code, a configuration template parameter -// of type AnalysisDomain is passed around to make the necessary information -// available to the required analyses. -// -// If a type is not meant to be used by an analysis it should be left as an -// alias to void. If any analysis detects that a parameter is required to -// conduct an analysis but not correctly set, it will statically report an error -// and ask for the missing piece of information. +/// AnalysisDomain - This class should be specialized by different static +/// analyses types... which is why the default version declares all analysis +/// domains as aliases of void. +/// +/// Virtually all of PhASAR's internal analyses are implemented in a generic way +/// using interfaces and template parameters. In order to specify concrete types +/// for the template parameters such that an analysis can compute some useful +/// information on some concrete target code, a configuration template parameter +/// of type AnalysisDomain is passed around to make the necessary information +/// available to the required analyses. +/// +/// If a type is not meant to be used by an analysis it should be left as an +/// alias to void. If any analysis detects that a parameter is required to +/// conduct an analysis but not correctly set, it will statically report an +/// error and ask for the missing piece of information. struct AnalysisDomain { - // Data-flow fact --- Specifies the type of an individual data-flow fact that - // is propagated through the program under analysis. + /// Data-flow fact --- Specifies the type of an individual data-flow fact that + /// is propagated through the program under analysis. using d_t = void; // (Control-flow) Node --- Specifies the type of a node in the // (inter-procedural) control-flow graph and can be though of as an individual - // statement or instruction of the program. + // statement or instruction of the target program. using n_t = void; - // Function --- Specifies the type of functions. + // Function --- Specifies the type of functions/procedures in the target + // program. using f_t = void; // (User-defined) type --- Specifies the type of a user-defined (i.e. struct // or class) data type. diff --git a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h index 348f341f4..614a4fa80 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h +++ b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h @@ -16,6 +16,12 @@ namespace psr { class LLVMProjectIRDB; +/// \brief Provides utilities to inject a function into the IR under analysis +/// that captures global constructors and destructors as described in the 2021 +/// Paper "Modeling the Effects of Global Variables in Data-Flow Analysis for +/// C/C++" by Schubert et al. +/// +/// See <10.1109/SCAM52516.2021.00010> for more information. class GlobalCtorsDtorsModel { public: static constexpr llvm::StringLiteral ModelName = @@ -30,14 +36,27 @@ class GlobalCtorsDtorsModel { static constexpr llvm::StringLiteral UserEntrySelectorName = "__psrCRuntimeUserEntrySelector"; + /// @brief Function that creates a new global model and inserts it into the + /// IRDB. The returned function is the global model that was inserted. + /// @param[in, out] IRDB IR data base that will have the global model + /// inserted. + /// @param[in] UserEntryPoints Entry points for the program given as + /// llvm::Function pointers. You usually want to pass here the main function static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); + + /// @brief Function that creates a new global model and inserts it into the + /// IRDB. The returned function is the global model that was inserted. + /// @param[in, out] IRDB IR data base that will have the global model + /// inserted. + /// @param[in] UserEntryPoints Entry points for the program given as + /// std::strings. You usually want to pass here the main function static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); - /// Returns true, if a function was generated by phasar. + /// Returns true, if a function was generated by the GlobalCtorsDtorsModel. [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &F) noexcept; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h index a488e9b46..be69af134 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h @@ -22,6 +22,8 @@ namespace psr { class LLVMProjectIRDB; class LLVMBasedBackwardCFG; +/// \brief A class that represents a backwards control flow graph. Conforms to +/// the CFGBase CRTP interface. class LLVMBasedBackwardCFG : public detail::LLVMBasedCFGImpl { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index d9bbb1786..670a6a6c9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -25,6 +25,8 @@ template class CallGraph; template <> struct CFGTraits : CFGTraits {}; +/// \brief A class that represents a backwards interprocedural control flow +/// graph. Conforms to the ICFGBase CRTP interface. class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public ICFGBase { friend ICFGBase; @@ -35,7 +37,7 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public: LLVMBackwardRet(llvm::LLVMContext &Ctx) - : Instance(llvm::ReturnInst::Create(Ctx)){}; + : Instance(llvm::ReturnInst::Create(Ctx)) {}; [[nodiscard]] const llvm::ReturnInst *getInstance() const noexcept { return Instance; } diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index a3134c713..229d71c83 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -36,6 +36,8 @@ template <> struct CFGTraits { template <> struct CFGTraits : CFGTraits {}; +/// \brief A class that implements a control flow graph. Conforms to the CFGBase +/// CRTP interface. namespace detail { template class LLVMBasedCFGImpl : public CFGBase { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index 9ed1799e0..fbb2693c9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -44,6 +44,8 @@ class Resolver; class LLVMBasedICFG; template <> struct CFGTraits : CFGTraits {}; +/// \brief A class that implements a inter-procedural control flow graph. +/// Conforms to the ICFGBase CRTP interface. class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 1fdc8100b..646ff7132 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -23,6 +23,12 @@ class GlobalVariable; namespace psr { class LLVMProjectIRDB; +/// \brief A class that provides access to all C++ virtual function tables +/// (VTables) found in the target program. +/// +/// Useful for constructing a call graph for a C++-based target. +/// \note This class only works, if the target program's IR was generated with +/// debug information. Pass `-g` to the compiler to achieve this. class LLVMVFTableProvider { public: explicit LLVMVFTableProvider(const llvm::Module &Mod); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 818a59de8..dc6f7c8ff 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -26,6 +26,9 @@ class CallBase; namespace psr { class DIBasedTypeHierarchy; + +/// \brief A resolver that performs Class Hierarchy Analysis to resolve calls +/// to C++ virtual functions. Requires debug information. class CHAResolver : public Resolver { public: CHAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 376eb5962..88afa796e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -18,6 +18,7 @@ class CallBase; namespace psr { +/// \brief A resolver that doesn't resolve indirect- and virtual calls class NOResolver final : public Resolver { public: NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 3bbdc83f5..eca760ae7 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -36,6 +36,8 @@ namespace psr { class DIBasedTypeHierarchy; +/// \brief A resolver that uses alias information to resolve indirect and +/// virtual calls class OTFResolver : public Resolver { public: OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index f4371c608..c6e003211 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -28,6 +28,9 @@ class DICompositeType; namespace psr { class DIBasedTypeHierarchy; + +/// \brief A resolver that performs Rapid Type Analysis to resolve calls +/// to C++ virtual functions. Requires debug information. class RTAResolver : public CHAResolver { public: RTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 8748e56aa..c59717c25 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -68,6 +68,10 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); +/// \brief A base class for call-target resolvers. Used to build call graphs. +/// +/// Create a specific resolver by making a new class, inheriting this resolver +/// class and implementing the virtual functions as needed. class Resolver { protected: const LLVMProjectIRDB *IRDB; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index 8645f5c72..6524bfad0 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -23,6 +23,8 @@ template <> struct CFGTraits : CFGTraits { using v_t = const llvm::Value *; }; +/// \brief A class that implements a sparse control flow graph. Conforms to the +/// CFGBase CRTP interface. class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { friend struct SVFGCache; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 2d43ae64e..df946ae27 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -20,12 +20,20 @@ class SparseLLVMBasedCFG; class DIBasedTypeHierarchy; struct SVFGCache; +/// \brief A class that implements a sparse interprocedural control flow graph. +/// Conforms to the ICFGBase CRTP interface. +/// +/// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or +/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// of Interprocedural Distributive Environment Problems" +/// class SparseLLVMBasedICFG : public LLVMBasedICFG, public SparseLLVMBasedCFGProvider { friend SparseLLVMBasedCFGProvider; public: + /// Constructor that delegates all arguments to the ctor of LLVMBasedICFG explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, llvm::ArrayRef EntryPoints = {}, diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index 8c0aaa9da..7fbbb65a5 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -29,9 +29,14 @@ struct SVFGCache; template <> struct CFGTraits : CFGTraits {}; -/// Similar to SparseLLVMBasedICFG; the only difference is that this one *is* no -/// LLVMBasedICFG -- it contains a pointer to an already existing one. -/// It still owns the sparse value-flow graphs +/// \brief Similar to SparseLLVMBasedICFG; the only difference is that this one +/// *is* no LLVMBasedICFG -- it contains a pointer to an already existing one. +/// It still owns the sparse value-flow graphs. +/// +/// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or +/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// of Interprocedural Distributive Environment Problems" +/// class SparseLLVMBasedICFGView : public LLVMBasedCFG, public ICFGBase, diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 677b26639..0b970fcfe 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -36,6 +36,7 @@ template <> struct ProjectIRDBTraits { using g_t = const llvm::GlobalVariable *; }; +/// \brief Project IR Database that manages a LLVM IR module. class LLVMProjectIRDB : public ProjectIRDBBase { friend ProjectIRDBBase; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h index e40d109ab..98cdd56f7 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h @@ -23,9 +23,10 @@ struct DataFlowFact { std::variant Fact; }; +/// \brief Simple representation of a serializable data-flow summary class FunctionDataFlowFacts { public: - using ParamaterMappingTy = + using ParameterMappingTy = std::unordered_map>; FunctionDataFlowFacts() noexcept = default; @@ -67,10 +68,10 @@ class FunctionDataFlowFacts { return It->second; } - return getDefaultValue(); + return getDefaultValue(); } - llvm::StringMap Fdff; + llvm::StringMap Fdff; }; } // namespace psr::library_summary diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h index 19e03b49b..8afba2f71 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h @@ -14,10 +14,11 @@ class LLVMFunctionDataFlowFacts; [[nodiscard]] LLVMFunctionDataFlowFacts readFromFDFF(const FunctionDataFlowFacts &Fdff, const LLVMProjectIRDB &Irdb); +/// @brief A LLVM-specific mapping of FunctionDataFlowFacts class LLVMFunctionDataFlowFacts { public: LLVMFunctionDataFlowFacts() noexcept = default; - using ParamaterMappingTy = FunctionDataFlowFacts::ParamaterMappingTy; + using ParameterMappingTy = FunctionDataFlowFacts::ParameterMappingTy; /// insert a set of data flow facts void insertSet(const llvm::Function *Fun, uint32_t Index, @@ -56,19 +57,19 @@ class LLVMFunctionDataFlowFacts { return getFacts(Fun, Arg->getArgNo()); } - [[nodiscard]] const ParamaterMappingTy & + [[nodiscard]] const ParameterMappingTy & getFactsForFunction(const llvm::Function *Fun) { auto Iter = LLVMFdff.find(Fun); if (Iter != LLVMFdff.end()) { return Iter->second; } - return getDefaultValue(); + return getDefaultValue(); } friend LLVMFunctionDataFlowFacts readFromFDFF(const FunctionDataFlowFacts &Fdff, const LLVMProjectIRDB &Irdb); private: - std::unordered_map LLVMFdff; + std::unordered_map LLVMFdff; }; } // namespace psr::library_summary diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h index ad55a9840..729e3c44a 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h @@ -28,10 +28,9 @@ class Value; namespace psr { -/** - * This class may be used to represent the special zero value for IFDS - * and IDE problems. The LLVMZeroValue is implemented as a singleton. - */ +/// \brief This class may be used to represent the special zero value (aka. Λ) +/// for IFDS and IDE problems. The LLVMZeroValue is implemented as a singleton. +/// class LLVMZeroValue : public llvm::GlobalVariable { private: LLVMZeroValue(llvm::Module &Mod); diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index 34012ec33..bad969a70 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -51,6 +51,8 @@ struct IDEExtendedTaintAnalysisDomain : public LLVMAnalysisDomainDefault { }; namespace XTaint { +/// \brief An IDE-based taint analysis that uses k-limited field-access paths to +/// achieve field sensitivity class IDEExtendedTaintAnalysis : public IDETabulationProblem, public AnalysisBase { diff --git a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h index 988a977c4..39cd52740 100644 --- a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h +++ b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h @@ -61,6 +61,12 @@ class Z3BasedPathSensitivityManagerBase static void deduplicatePaths(FlowPathSequence &Paths); }; +/// \brief An extension of the path-reconstruction mechanism of the +/// PathSensitivityManager that provides means to extract concrete combined +/// control- and data-flow paths. +/// +/// Filters out paths that are considered infeasible by the Z3 +/// constraint solver. template >> @@ -89,6 +95,14 @@ class Z3BasedPathSensitivityManager } } + /// Reconstruct the feasible combined control- and data-flow paths the lead to + /// the given data-flow fact Fact holding right after Inst. + /// + /// The result is given as list of paths, where cycles are unrolled once in an + /// implementation-defined way. + /// It is strongly recommended to Use the Z3BasedPathSensitivityConfig in the + /// Z3BasedPathSensitivityManager's ctor to limit the returned paths; + /// otherwise this function quickly becomes a performance bottleneck. FlowPathSequence pathsTo(n_t Inst, d_t Fact) const { if (Config.DAGSizeThreshold != SIZE_MAX) { PHASAR_LOG_LEVEL( diff --git a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h index 755bbc923..c4330c4e4 100644 --- a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h +++ b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h @@ -25,6 +25,8 @@ class LLVMProjectIRDB; class LLVMBasedICFG; class LLVMBasedCFG; +/// \brief An AnalysisDomain that specializes sensible defaults for LLVM-based +/// analysis struct LLVMAnalysisDomainDefault : public AnalysisDomain { using d_t = const llvm::Value *; using n_t = const llvm::Instruction *; @@ -36,6 +38,8 @@ struct LLVMAnalysisDomainDefault : public AnalysisDomain { using db_t = LLVMProjectIRDB; }; +/// \brief An AnalysisDomain that specializes sensible defaults for LLVM-based +/// IFDS analysis using LLVMIFDSAnalysisDomainDefault = WithBinaryValueDomain; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h index ff4ccbe5d..a405b7aca 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h @@ -53,14 +53,15 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, using AllocationSiteSetPtrTy = traits_t::AllocationSiteSetPtrTy; using AliasSetMap = llvm::DenseMap>; - /** - * Creates points-to set(s) for all functions in the IRDB. If - * UseLazyEvaluation is true, computes points-to-sets for functions that do - * not use global variables on the fly - */ + /// \brief Creates alias-sets for all functions in the IRDB. + /// + /// If UseLazyEvaluation is true, computes alias-sets only for functions that + /// use global variables directly and delays all others to when they are first + /// requested explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, bool UseLazyEvaluation = true, AliasAnalysisType PATy = AliasAnalysisType::CFLAnders); + /// Loads alias sets from JSON explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedPTS); @@ -103,22 +104,18 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, return AnalysisProperties::None; } - /** - * Shows a parts of an alias set. Good for debugging when one wants to peak - * into a points to set. - * - * @param ValueSetPair a pair on an Value* and the corresponding points to set - * @param Peak the amount of instrutions shown from the points to set - */ + /// Shows a parts of an alias set. Good for debugging when one wants to peak + /// into a points to set. + /// + /// \param ValueSetPair a pair on a Value* and the corresponding points-to set + /// \param Peak the amount of instructions shown from the points-to set static void peakIntoAliasSet(const AliasSetMap::value_type &ValueSetPair, int Peak); - /** - * Prints out the size distribution for all points to sets. - * - * @param Peak the amount of instrutions shown from one of the biggest points - * to sets, use 0 show nothing. - */ + /// Prints out the size distribution for all points to sets. + /// + /// \param Peak the amount of instructions shown from one of the biggest + /// points-to sets, use 0 to show nothing. void drawAliasSetsDistribution(int Peak = 10) const; [[nodiscard]] inline bool empty() const { return AnalyzedFunctions.empty(); } diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h index bf924e39c..d895741ad 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h @@ -15,6 +15,9 @@ #include namespace psr { + +/// A data structure used for storing, serializing and deserializing a +/// LLVMAliasSet struct LLVMAliasSetData { std::vector> AliasSets; std::vector AnalyzedFunctions; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h index 1195da6c8..a7532d20d 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h @@ -25,6 +25,10 @@ namespace psr { class LLVMProjectIRDB; +/// \brief Wrapper over alias analyses that provide point-wise alias +/// information. +/// +/// Used to construct an LLVMAliasSet. class LLVMBasedAliasAnalysis { public: diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h index 1b5694c6d..258bdbbc9 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h @@ -25,6 +25,10 @@ namespace psr { class LLVMProjectIRDB; +/// \brief Represents the type hierarchy of a tha target program. +/// +/// \note This class only works, if the target program's IR was generated with +/// debug information. Pass `-g` to the compiler to achieve this. class DIBasedTypeHierarchy : public TypeHierarchy { public: @@ -39,7 +43,17 @@ class DIBasedTypeHierarchy static inline constexpr llvm::StringLiteral PureVirtualCallName = "__cxa_pure_virtual"; + /// \brief Creates a type hierarchy based on an intermediate representation + /// data base. + /// \param[in] IRDB The IR data base of which the type hierarchy will be based + /// upon. This MUST contain debug information for the algorithm to work! explicit DIBasedTypeHierarchy(const LLVMProjectIRDB &IRDB); + + /// \brief Loads an already computed type hierarchy. + /// \param[in] IRDB The IR data base of the type hierarchy. + /// \param[in] SerializedData The already existing type hierarchy, given by + /// the appropiate class DIBasedTypeHierarchyData, which contains all + /// neccesary information. explicit DIBasedTypeHierarchy(const LLVMProjectIRDB *IRDB, const DIBasedTypeHierarchyData &SerializedData); ~DIBasedTypeHierarchy() override = default; @@ -93,16 +107,12 @@ class DIBasedTypeHierarchy void print(llvm::raw_ostream &OS = llvm::outs()) const override; - /** - * @brief Prints the class hierarchy to an ostream in dot format. - * @param OS outputstream - */ + /// \brief Prints the class hierarchy to an ostream in dot format. + /// \param OS outputstream void printAsDot(llvm::raw_ostream &OS = llvm::outs()) const; - /** - * @brief Prints the class hierarchy to an ostream in json format. - * @param an outputstream - */ + /// \brief Prints the class hierarchy to an ostream in JSON format. + /// \param OS outputstream void printAsJson(llvm::raw_ostream &OS = llvm::outs()) const override; private: diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h index 52cd85569..da579b8f8 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h @@ -18,6 +18,9 @@ #include namespace psr { +/// \brief A structure that is used to store already calculated type hierarchy +/// data, serialize that data or deserialize a json file with a previously +/// serialized type hierarchy. struct DIBasedTypeHierarchyData { // DITypes and llvm::Function * are serialized by serializing their names and // using the DebugInfoFinder to deserialize them diff --git a/include/phasar/PhasarLLVM/Utils/Annotation.h b/include/phasar/PhasarLLVM/Utils/Annotation.h index 31d9c42ba..bf5527e7c 100644 --- a/include/phasar/PhasarLLVM/Utils/Annotation.h +++ b/include/phasar/PhasarLLVM/Utils/Annotation.h @@ -10,9 +10,8 @@ namespace psr { -//===----------------------------------------------------------------------===// -// Helper classes that allow for an easier retrieval of annotation information. -//===----------------------------------------------------------------------===// +/// \file Helper classes that allow for an easier retrieval of annotation +/// information. class VarAnnotation { public: diff --git a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h index f9d3d91d7..286cae8dd 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h @@ -39,6 +39,11 @@ class DILocation; namespace psr { +/// \file This file contains useful structs and functions to get and store +/// information about the source code or the intermediate representation of a +/// project. + +/// \brief Minimal source-code information, based on LLVM debug information struct DebugLocation { unsigned Line{}; unsigned Column{}; @@ -47,6 +52,9 @@ struct DebugLocation { [[nodiscard]] llvm::DILocalVariable *getDILocalVariable(const llvm::Value *V); +/// \brief A struct that contains information about a source code line, what the +/// corresponding function name of that line is and what the file name is that +/// the line is in. struct SourceCodeInfo { std::string SourceCodeLine; std::string SourceCodeFilename; diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index b0d8bccf9..129249946 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -53,11 +53,23 @@ bool isIntegerLikeType(const llvm::Type *T) noexcept; bool isAllocaInstOrHeapAllocaFunction(const llvm::Value *V) noexcept; bool isHeapAllocatingFunction(const llvm::Function *F) noexcept; -// TODO add description +/// Returns true if the provided function and the function type are both not +/// null and have the same number of parameters and the same return type. If the +/// argument ExactMatch is set to true, which it is by default, the two provided +/// arguments must also have the same type for each argument, for the function +/// to return true. +/// +/// \note This function is less useful in practice than you may think. Consider +/// using isConsistentCall() instead. bool matchesSignature(const llvm::Function *F, const llvm::FunctionType *FType, bool ExactMatch = true); -// TODO add description +/// Returns true iff the provided functions are both not null and have the same +/// number of paramters, the same return type and each parameter of both +/// functions has the same type aswell. +/// +/// \note This function is less useful in practice than you may think. Consider +/// using isConsistentCall() instead. bool matchesSignature(const llvm::FunctionType *FType1, const llvm::FunctionType *FType2); @@ -230,7 +242,7 @@ bool isGuardVariable(const llvm::Value *V); bool isStaticVariableLazyInitializationBranch(const llvm::BranchInst *Inst); /** - * Tests for https://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic + * Tests for * e.g. * int boo __attribute__((annotate("bar")); * @param F The function to test - Target of the call instruction @@ -239,7 +251,7 @@ bool isVarAnnotationIntrinsic(const llvm::Function *F); /** * Retrieves String annotation value as per - * https://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic + * * Test the call function be tested by isVarAnnotationIntrinsic * */ diff --git a/include/phasar/PhasarPass/PhasarPass.h b/include/phasar/PhasarPass/PhasarPass.h index d8272b98c..a7488a96c 100644 --- a/include/phasar/PhasarPass/PhasarPass.h +++ b/include/phasar/PhasarPass/PhasarPass.h @@ -20,6 +20,17 @@ class raw_ostream; namespace psr { +/// @brief PhasarPass is an implementation of llvm passes for the PhASAR +/// framework. +/// +/// What is a pass? +/// "The LLVM pass framework is an important part of the LLVM system, +/// because LLVM passes are where most of the interesting parts of the compiler +/// exist. Passes perform the transformations and optimizations that make up the +/// compiler, they build the analysis results that are used by these +/// transformations, and they are, above all, a structuring technique for +/// compiler code." +/// Source: class PhasarPass : public llvm::ModulePass { public: static inline char ID = 12; diff --git a/include/phasar/Pointer/AliasInfo.h b/include/phasar/Pointer/AliasInfo.h index db861acdc..a5f2f5f57 100644 --- a/include/phasar/Pointer/AliasInfo.h +++ b/include/phasar/Pointer/AliasInfo.h @@ -41,7 +41,7 @@ struct AliasInfoTraits> : DefaultAATraits {}; template struct AliasInfoTraits> : DefaultAATraits {}; -/// A type-erased reference to any object implementing the IsAliasInfo +/// \brief A type-erased reference to any object implementing the IsAliasInfo /// interface. Use this, if your analysis is not tied to a specific alias info /// implementation. /// @@ -260,8 +260,9 @@ class AliasInfoRef : public AnalysisPropertiesMixin> { const VTable *VT{}; }; -/// Similar to AliasInfoRef, but exclusively owns the held reference. Use this, -/// if you need to decide dynamically, which alias info implementation to use. +/// \brief Similar to AliasInfoRef, but exclusively owns the held reference. Use +/// this, if you need to decide dynamically, which alias info implementation to +/// use. /// /// Implicitly convertible to AliasInfoRef. /// @@ -314,13 +315,13 @@ class [[clang::trivial_abi]] AliasInfo final : public AliasInfoRef { } } - [[nodiscard]] base_t asRef() &noexcept { return *this; } - [[nodiscard]] AliasInfoRef asRef() const &noexcept { return *this; } + [[nodiscard]] base_t asRef() & noexcept { return *this; } + [[nodiscard]] AliasInfoRef asRef() const & noexcept { return *this; } [[nodiscard]] AliasInfoRef asRef() && = delete; /// For better interoperability with unique_ptr - [[nodiscard]] base_t get() &noexcept { return asRef(); } - [[nodiscard]] AliasInfoRef get() const &noexcept { return asRef(); } + [[nodiscard]] base_t get() & noexcept { return asRef(); } + [[nodiscard]] AliasInfoRef get() const & noexcept { return asRef(); } [[nodiscard]] AliasInfoRef get() && = delete; }; diff --git a/include/phasar/TypeHierarchy/VFTable.h b/include/phasar/TypeHierarchy/VFTable.h index a236f2e34..4fc331cf5 100644 --- a/include/phasar/TypeHierarchy/VFTable.h +++ b/include/phasar/TypeHierarchy/VFTable.h @@ -20,6 +20,7 @@ class raw_ostream; namespace psr { +/// \brief A generic class to represent a virtual function table template class VFTable { public: virtual ~VFTable() = default; diff --git a/include/phasar/Utils/AnalysisPrinterBase.h b/include/phasar/Utils/AnalysisPrinterBase.h index a1ca6aee3..ec95b47d7 100644 --- a/include/phasar/Utils/AnalysisPrinterBase.h +++ b/include/phasar/Utils/AnalysisPrinterBase.h @@ -10,6 +10,8 @@ namespace psr { +/// \brief A generic class that serves as the basis for a custom analysis +/// printer implementation. template class AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; using d_t = typename AnalysisDomainTy::d_t; diff --git a/include/phasar/Utils/DebugOutput.h b/include/phasar/Utils/DebugOutput.h index 03f6098b3..bfd282183 100644 --- a/include/phasar/Utils/DebugOutput.h +++ b/include/phasar/Utils/DebugOutput.h @@ -23,6 +23,9 @@ namespace psr { namespace detail { +/// \file This file contains many useful ways of printing information for +/// debugging purposes. + template void printHelper(OS_t &OS, const T &Data); template diff --git a/include/phasar/Utils/DefaultAnalysisPrinter.h b/include/phasar/Utils/DefaultAnalysisPrinter.h index f9008b946..2eee3eac4 100644 --- a/include/phasar/Utils/DefaultAnalysisPrinter.h +++ b/include/phasar/Utils/DefaultAnalysisPrinter.h @@ -30,6 +30,8 @@ template struct Warning { LatticeElement(std::move(Lattice)), AnalysisType(DfAnalysisType) {} }; +/// \brief A default implementation of the AnalysisPrinterBase. Aggregates all +/// analysis results in a vector and prints them when the analysis has finished. template class DefaultAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; @@ -41,7 +43,8 @@ class DefaultAnalysisPrinter : public AnalysisPrinterBase { : OS(&OS) {} explicit DefaultAnalysisPrinter(const llvm::Twine &Filename) - : AnalysisPrinterBase(), OS(openFileStream(Filename)){}; + : AnalysisPrinterBase(), + OS(openFileStream(Filename)) {}; ~DefaultAnalysisPrinter() override = default; diff --git a/include/phasar/Utils/EquivalenceClassMap.h b/include/phasar/Utils/EquivalenceClassMap.h index a11d3fe4e..c1ce7d0bf 100644 --- a/include/phasar/Utils/EquivalenceClassMap.h +++ b/include/phasar/Utils/EquivalenceClassMap.h @@ -23,11 +23,12 @@ namespace psr { -// EquivalenceClassMap is a special map type that splits the keys into -// equivalence classes regarding their mapped values. Meaning, that all keys -// that are equivalent are mapped to the same value. Two keys are treated as -// equivalent and merged into a equivalence class when they refer to Values -// that compare equal. +/// \brief EquivalenceClassMap is a special map type that splits the keys into +/// equivalence classes regarding their mapped values. +/// +/// Meaning, that all keys that are equivalent are mapped to the same value. Two +/// keys are treated as equivalent and merged into a equivalence class when they +/// refer to Values that compare equal. template struct EquivalenceClassMap { template using SetType = std::set; using EquivalenceClassBucketT = std::pair, ValueT>; @@ -69,41 +70,41 @@ template struct EquivalenceClassMap { return llvm::make_range(begin(), end()); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type insert(const KeyT &Key, ValueType &&Value) { return try_emplace(Key, std::forward(Value)); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type insert(KeyT &&Key, ValueType &&Value) { return try_emplace(std::move(Key), std::forward(Value)); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. insert_return_type insert(const std::pair &KVPair) { return try_emplace(KVPair.first, KVPair.second); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. insert_return_type insert(std::pair &&KVPair) { return try_emplace(std::move(KVPair.first), std::move(KVPair.second)); } - // Insert a range of Key Values pairs into the map. + /// Insert a range of Key Values pairs into the map. template void insert(InputIt I, InputIt End) { for (; I != End; ++I) { try_emplace(I->first, I->second); } } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type try_emplace(KeyT &&Key, Ts &&...Args) { ValueT Val{std::forward(Args...)}; @@ -118,8 +119,8 @@ template struct EquivalenceClassMap { return std::make_pair(StoredData.back().first.begin(), true); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type try_emplace(const KeyT &Key, Ts &&...Args) { ValueT Val{std::forward(Args...)}; @@ -134,7 +135,7 @@ template struct EquivalenceClassMap { return std::make_pair(StoredData.back().first.begin(), true); } - // Return 1 if the specified key is in the map, 0 otherwise. + /// Return 1 if the specified key is in the map, 0 otherwise. [[nodiscard]] inline size_type count(const KeyT &Key) const { for (auto &KVPair : StoredData) { if (KVPair.first.count(Key) >= 1) { @@ -148,7 +149,7 @@ template struct EquivalenceClassMap { return StoredData.size(); } - // Returns the size of the map, i.e., the number of equivalence classes. + /// Returns the size of the map, i.e., the number of equivalence classes. [[nodiscard]] inline size_type size() const { return numEquivalenceClasses(); } @@ -273,7 +274,7 @@ class EquivalenceClassMapNG { return Values.size(); } - // Returns the size of the map, i.e., the number of equivalence classes. + /// Returns the size of the map, i.e., the number of equivalence classes. [[nodiscard]] inline size_t size() const noexcept { return numEquivalenceClasses(); } diff --git a/include/phasar/Utils/ErrorHandling.h b/include/phasar/Utils/ErrorHandling.h index 3e5920af1..93dfacf2c 100644 --- a/include/phasar/Utils/ErrorHandling.h +++ b/include/phasar/Utils/ErrorHandling.h @@ -18,6 +18,10 @@ #include namespace psr { + +/// \file This file contains useful functions for handling errors, by using +/// std::system_error, or returning null or a default value. + template T getOrThrow(llvm::ErrorOr ValOrErr) { if (ValOrErr) { return std::move(*ValOrErr); diff --git a/include/phasar/Utils/IO.h b/include/phasar/Utils/IO.h index f9966df63..8aea9333c 100644 --- a/include/phasar/Utils/IO.h +++ b/include/phasar/Utils/IO.h @@ -28,6 +28,9 @@ namespace psr { +/// \file This file contains functions for reading in text files and json files +/// and provides error handling for this process aswell, if needed. + [[nodiscard]] llvm::ErrorOr readTextFileOrErr(const llvm::Twine &Path); [[nodiscard]] llvm::ErrorOr> diff --git a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h index aa3e4ef23..bf574ceca 100644 --- a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h +++ b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h @@ -13,6 +13,10 @@ #include namespace psr { +/// This class implements the AnalysisPrinterBase that printf the analysis +/// results *while* the analysis is still running. +/// +/// Override doOnResult() to customize, how the results are printed. template class OnTheFlyAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; @@ -21,10 +25,11 @@ class OnTheFlyAnalysisPrinter : public AnalysisPrinterBase { public: explicit OnTheFlyAnalysisPrinter(llvm::raw_ostream &OS) - : AnalysisPrinterBase(), OS(&OS){}; + : AnalysisPrinterBase(), OS(&OS) {}; explicit OnTheFlyAnalysisPrinter(const llvm::Twine &Filename) - : AnalysisPrinterBase(), OS(openFileStream(Filename)){}; + : AnalysisPrinterBase(), + OS(openFileStream(Filename)) {}; OnTheFlyAnalysisPrinter() = default; ~OnTheFlyAnalysisPrinter() = default; diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index f2c7504c7..612732679 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -147,7 +147,7 @@ struct StringIDLess { bool operator()(const std::string &LHS, const std::string &RHS) const; }; -/// See "https://en.cppreference.com/w/cpp/experimental/scope_exit/scope_exit" +/// See template class scope_exit { // NOLINT public: template ()())> @@ -168,14 +168,16 @@ template class scope_exit { // NOLINT template scope_exit(Fn) -> scope_exit; -// Copied from "https://en.cppreference.com/w/cpp/utility/variant/visit" -template struct Overloaded : Ts... { using Ts::operator()...; }; +// Copied from +template struct Overloaded : Ts... { + using Ts::operator()...; +}; // explicit deduction guide (not needed as of C++20) template Overloaded(Ts...) -> Overloaded; /// Based on the reference implementation of std::remove_if -/// "https://en.cppreference.com/w/cpp/algorithm/remove" and optimized for the +/// and optimized for the /// case that a sorted list of indices is given instead of an unary predicate /// specifying the elements to be removed. template template [[nodiscard]] constexpr auto &&forward_like(U &&X) noexcept { // NOLINT // NOLINTNEXTLINE diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index c7465cdd8..fbdb82242 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -366,7 +366,7 @@ void LLVMTaintConfig::forAllLeakCandidatesAtImpl( } } - // Do not iterate over the actual paramaters of Inst as we did in + // Do not iterate over the actual parameters of Inst as we did in // forAllGeneratedValuesAt, because sink-values are not propagated in the // current taint analyses. Handling sink-values should be done in the // SinkCallBack