From fe2ca2f5fbf27cb232a26fc8d2271e16964dc24b Mon Sep 17 00:00:00 2001 From: mxHuber Date: Thu, 3 Apr 2025 22:00:10 +0200 Subject: [PATCH 01/17] comments to doxygen --- .../phasar/DataFlow/IfdsIde/EdgeFunctions.h | 405 ++++++++-------- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 451 +++++++++--------- include/phasar/Domain/AnalysisDomain.h | 30 +- 3 files changed, 444 insertions(+), 442 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h index 3b9034133..cf5e38ce2 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h @@ -42,222 +42,225 @@ template class EdgeFunctions { virtual ~EdgeFunctions() = default; - // - // Also refer to FlowFunctions::getNormalFlowFunction() - // - // Describes a value computation problem along a normal (non-call, non-return) - // intra-procedural exploded supergraph edge. A normal edge function - // implementation is queried for each edge that has been generated by appling - // the flow function returned by FlowFunctions::getNormalFlowFunction(). The - // supergraph edge whose computation is requested is defined by the supergraph - // nodes CurrNode and SuccNode. - // - // Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological - // lambda fact. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getNormalFlowfunction()): - // - // f(0) -> {0} // pass the lambda (or zero fact) as identity - // f(o) -> {o, x} // generate a new fact x from o - // f(.) -> {.} // pass all other facts that hold before instruction_1 - // // as identity - // - // The above flow-function implementation corresponds to the following edges - // in the exploded supergraph. - // - // 0 o ... - // | |\ ... - // Curr := x = instruction_1 o p | | \ ... - // | | | ... - // v v v ... - // 0 o x ... - // - // Succ := y = instruction_2 q r - // - // For each edge generated by the respective flow function a normal edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getNormalEdgeFunction(0, Curr, 0 Succ); - // getNormalEdgeFunction(o, Curr, o Succ); - // getNormalEdgeFunction(o, Curr, x Succ); - // + /// + /// Also refer to FlowFunctions::getNormalFlowFunction() + /// + /// Describes a value computation problem along a normal (non-call, + /// non-return) intra-procedural exploded supergraph edge. A normal edge + /// function implementation is queried for each edge that has been generated + /// by appling the flow function returned by + /// FlowFunctions::getNormalFlowFunction(). The supergraph edge whose + /// computation is requested is defined by the supergraph nodes CurrNode and + /// SuccNode. + /// + /// Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological + /// lambda fact. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getNormalFlowfunction()): + /// + /// f(0) -> {0} // pass the lambda (or zero fact) as identity + /// f(o) -> {o, x} // generate a new fact x from o + /// f(.) -> {.} // pass all other facts that hold before + /// instruction_1 + /// // as identity + /// + /// The above flow-function implementation corresponds to the following edges + /// in the exploded supergraph. + /// + /// 0 o ... + /// | |\ ... + /// Curr := x = instruction_1 o p | | \ ... + /// | | | ... + /// v v v ... + /// 0 o x ... + /// + /// Succ := y = instruction_2 q r + /// + /// For each edge generated by the respective flow function a normal edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// + /// getNormalEdgeFunction(0, Curr, 0 Succ); + /// getNormalEdgeFunction(o, Curr, o Succ); + /// getNormalEdgeFunction(o, Curr, x Succ); + /// virtual EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) = 0; - // - // Also refer to FlowFunctions::getCallFlowFunction() - // - // Describes a value computation problem along a call flow. A call edge - // function is queried for each edge that has been generated by applying the - // flow function that has been returned by FlowFunctions::getCallFlowFunction. - // The supergraph edge whose computation is requested is defined by the - // supergraph nodes SrcNode and DestNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getCallFlowFunction()): - // - // f(0) -> {0} // pass as identity into the callee target - // f(o) -> {q} // map actual o into formal q - // f(p) -> {r} // map actual p into formal r - // f(.) -> {} // kill all other facts that are not visible to the - // // callee target - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o p ... - // \ \ \ ... - // CallInst := x = CalleeFun(o, p, ...) \ \ +----------------+ - // \ +---------------- | - // +-------------+ + | - // ... | | | - // ... | | | - // 0 o p ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, r, ...) | | | - // v v v - // 0 q r ... - // - // start point - // - // For each edge generated by the respective flow function a call edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getCallEdgeFunction(CallInst, 0, CalleeFun, 0); - // getCallEdgeFunction(CallInst, o, CalleeFun, q); - // getCallEdgeFunction(CallInst, p, CalleeFun, r); - // + /// + /// Also refer to FlowFunctions::getCallFlowFunction() + /// + /// Describes a value computation problem along a call flow. A call edge + /// function is queried for each edge that has been generated by applying the + /// flow function that has been returned by + /// FlowFunctions::getCallFlowFunction. The supergraph edge whose computation + /// is requested is defined by the supergraph nodes SrcNode and DestNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getCallFlowFunction()): + /// + /// f(0) -> {0} // pass as identity into the callee target + /// f(o) -> {q} // map actual o into formal q + /// f(p) -> {r} // map actual p into formal r + /// f(.) -> {} // kill all other facts that are not visible to the + /// // callee target + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o p ... + /// \ \ \ ... + /// CallInst := x = CalleeFun(o, p, ...) \ \ +----------------+ + /// \ +---------------- | + /// +-------------+ + | + /// ... | | | + /// ... | | | + /// 0 o p ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, r, ...) | | | + /// v v v + /// 0 q r ... + /// + /// start point + /// + /// For each edge generated by the respective flow function a call edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// + /// getCallEdgeFunction(CallInst, 0, CalleeFun, 0); + /// getCallEdgeFunction(CallInst, o, CalleeFun, q); + /// getCallEdgeFunction(CallInst, p, CalleeFun, r); + /// virtual EdgeFunction getCallEdgeFunction(n_t CallInst, d_t SrcNode, f_t CalleeFun, d_t DestNode) = 0; - // - // Also refer to FlowFunction::getRetFlowFunction() - // - // Describes a value computation problem along a return flow. A return edge - // function implementation is queried for each edge that has been generated by - // applying the flow function that has been returned by - // FlowFunctions::getRetFlowFunction(). The supergraph edge whose computation - // is requested is defined by the supergraph nodes ExitNode and RetNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getRetFlowFunction()): - // - // f(0) -> {0} // pass as identity into the callee target - // f(r) -> {x} // map return value to lhs variable at CallSite - // f(q) -> {o} // map pointer-typed formal q to actual o - // f(.) -> {} // kill all other facts that are not visible to the - // // caller - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // - // CallSite = RetSite := x = CalleeFun(o, ...) - // +------------------+ - // +--|---------------+ | - // +--|--|------------+ | | - // v v v ... | | | - // 0 o x ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, ...) | | | - // | | | - // 0 q r - // - // ExitInst := return r - // - // For each edge generated by the respective flow function a return edge - // function is queried that describes a value computation. This results in the - // following queries: - // - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, 0, RetSite, 0); - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, q, RetSite, o); - // getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, r, RetSite, x); - // + /// + /// Also refer to FlowFunction::getRetFlowFunction() + /// + /// Describes a value computation problem along a return flow. A return edge + /// function implementation is queried for each edge that has been generated + /// by applying the flow function that has been returned by + /// FlowFunctions::getRetFlowFunction(). The supergraph edge whose computation + /// is requested is defined by the supergraph nodes ExitNode and RetNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getRetFlowFunction()): + /// + /// f(0) -> {0} // pass as identity into the callee target + /// f(r) -> {x} // map return value to lhs variable at CallSite + /// f(q) -> {o} // map pointer-typed formal q to actual o + /// f(.) -> {} // kill all other facts that are not visible to the + /// // caller + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o ... + /// + /// CallSite = RetSite := x = CalleeFun(o, ...) + /// +------------------+ + /// +--|---------------+ | + /// +--|--|------------+ | | + /// v v v ... | | | + /// 0 o x ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, ...) | | | + /// | | | + /// 0 q r + /// + /// ExitInst := return r + /// + /// For each edge generated by the respective flow function a return edge + /// function is queried that describes a value computation. This results in + /// the following queries: + /// + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, 0, RetSite, 0); + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, q, RetSite, o); + /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, r, RetSite, x); + /// virtual EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, d_t ExitNode, n_t RetSite, d_t RetNode) = 0; - // - // Also refer to FlowFunctions::getCallToRetFlowFunction() - // - // Describes a value computation problem along data-flows alongsite a - // CallSite. A return edge function implementation is queried for each edge - // that has been generated by applying the flow function that has been - // returned by FlowFunctions::getCallToRetFlowFunction(). The supergraph edge - // whose computation is requested is defined by the supergraph nodes CallNode - // and RetSiteNode. - // - // The concrete implementation of an edge function e is depending on the - // analysis problem. In the following, we present a brief, contrived example: - // - // Consider the following flow function implementation (cf. - // FlowFunctions::getCallToRetFlowFunction()): - // - // f(0) -> {0} // pass lambda as identity alongsite the CallSite - // f(o) -> {o} // assuming that o is passed by value, it is passed - // // alongsite the CallSite - // f(p) -> {} // assuming that p is a pointer-typed value, we need - // // to kill p, as it will be handled by the call- and - // // return-flow functions - // f(.) -> {.} // pass everything that is not involved in the call as - // // identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | | - // | +-------+ - // +--------+ | - // | | - // CallSite = RetSite := x = CalleeFun(o, p, ...) | | - // | | - // +--------+ | - // | +-------+ - // v v - // 0 o x ... - // - // For each edge generated by the respective flow function a call-to-return - // edge function is queried that describes a value computation. This results - // in the following queries: - // - // getCallToRetEdgeFunction(CallSite, 0, RetSite, 0, {CalleeFun}); - // getCallToRetEdgeFunction(CallSite, o, RetSite, o, {CalleeFun}); - // + /// + /// Also refer to FlowFunctions::getCallToRetFlowFunction() + /// + /// Describes a value computation problem along data-flows alongsite a + /// CallSite. A return edge function implementation is queried for each edge + /// that has been generated by applying the flow function that has been + /// returned by FlowFunctions::getCallToRetFlowFunction(). The supergraph edge + /// whose computation is requested is defined by the supergraph nodes CallNode + /// and RetSiteNode. + /// + /// The concrete implementation of an edge function e is depending on the + /// analysis problem. In the following, we present a brief, contrived example: + /// + /// Consider the following flow function implementation (cf. + /// FlowFunctions::getCallToRetFlowFunction()): + /// + /// f(0) -> {0} // pass lambda as identity alongsite the CallSite + /// f(o) -> {o} // assuming that o is passed by value, it is passed + /// // alongsite the CallSite + /// f(p) -> {} // assuming that p is a pointer-typed value, we need + /// // to kill p, as it will be handled by the call- and + /// // return-flow functions + /// f(.) -> {.} // pass everything that is not involved in the call + /// as + /// // identity + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o ... + /// | | + /// | +-------+ + /// +--------+ | + /// | | + /// CallSite = RetSite := x = CalleeFun(o, p, ...) | | + /// | | + /// +--------+ | + /// | +-------+ + /// v v + /// 0 o x ... + /// + /// For each edge generated by the respective flow function a call-to-return + /// edge function is queried that describes a value computation. This results + /// in the following queries: + /// + /// getCallToRetEdgeFunction(CallSite, 0, RetSite, 0, {CalleeFun}); + /// getCallToRetEdgeFunction(CallSite, o, RetSite, o, {CalleeFun}); + /// virtual EdgeFunction getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, llvm::ArrayRef Callees) = 0; - // - // Also refer to FlowFunction::getSummaryFlowFunction() - // - // Describes a value computation problem along a summary data flow. A summary - // edge function implementation is queried for each edge that has been - // generated by FlowFunctions::getSummaryFlowFunction(). The supergraph edge - // whose computation is requested is defined by the supergraph nodes CurrNode - // and SuccNode. - // - // The default implementation returns a nullptr to indicate that the mechanism - // should not be used. - // + /// + /// Also refer to FlowFunction::getSummaryFlowFunction() + /// + /// Describes a value computation problem along a summary data flow. A summary + /// edge function implementation is queried for each edge that has been + /// generated by FlowFunctions::getSummaryFlowFunction(). The supergraph edge + /// whose computation is requested is defined by the supergraph nodes CurrNode + /// and SuccNode. + /// + /// The default implementation returns a nullptr to indicate that the + /// mechanism should not be used. + /// virtual EdgeFunction getSummaryEdgeFunction(n_t /*Curr*/, d_t /*CurrNode*/, n_t /*Succ*/, diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 2f3fa53f5..367344d45 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -36,9 +36,9 @@ namespace psr { // FlowFunction Class //===----------------------------------------------------------------------===// -// -// This class models a flow function for distributive data-flow problems. -// +/// +/// This class models a flow function for distributive data-flow problems. +/// template > class FlowFunction { static_assert(std::is_same::value, "Container values needs to be the same as D"); @@ -52,17 +52,17 @@ template > class FlowFunction { virtual ~FlowFunction() = default; - // - // This function is called for each data-flow fact Source that holds before - // the instruction under analysis. The return value is a (potentially empty) - // set of data-flow facts that are generated from Source and hold after the - // instruction under analysis. In other words: the function describes what - // exploded supergraph edges have to be "drawn". - // - // Please also refer to the various flow function factories of the - // FlowFunctions interface: FlowFunctions::get*FlowFunction() for more - // details. - // + /// + /// This function is called for each data-flow fact Source that holds before + /// the instruction under analysis. The return value is a (potentially empty) + /// set of data-flow facts that are generated from Source and hold after the + /// instruction under analysis. In other words: the function describes what + /// exploded supergraph edges have to be "drawn". + /// + /// Please also refer to the various flow function factories of the + /// FlowFunctions interface: FlowFunctions::get*FlowFunction() for more + /// details. + /// virtual container_type computeTargets(D Source) = 0; }; @@ -635,236 +635,235 @@ class FlowFunctions virtual ~FlowFunctions() = default; - // - // Describes the effects of the current instruction, i.e. data-flows, along - // normal (non-call, non-return) instructions. Analysis writers are free to - // inspect the successor instructions, too, as a lookahead. - // - // Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological - // lambda fact. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds before the current statement under analysis. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each combination of input fact d_i (given as an input to - // f) and respective output facts (f(d_i)) represents an edge that must be - // "drawn" to construct the exploded supergraph for the analysis problem to be - // solved. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before instruction_1. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass the lambda (or zero fact) as identity - // f(o) -> {o, x} // generate a new fact x from o - // f(.) -> {.} // pass all other facts that hold before instruction_1 - // // as identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | |\ ... - // x = instruction_1 o p | | \ ... - // | | | ... - // v v v ... - // 0 o x ... - // - // y = instruction_2 q r - // + /// + /// Describes the effects of the current instruction, i.e. data-flows, along + /// normal (non-call, non-return) instructions. Analysis writers are free to + /// inspect the successor instructions, too, as a lookahead. + /// + /// Let instruction_1 := Curr, instruction_2 := Succ, and 0 the tautological + /// lambda fact. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds before the current statement under analysis. f's return type is + /// a set of (target) facts that have to be generated from the source fact d_i + /// by the data-flow solver. Each combination of input fact d_i (given as an + /// input to f) and respective output facts (f(d_i)) represents an edge that + /// must be "drawn" to construct the exploded supergraph for the analysis + /// problem to be solved. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before instruction_1. + /// We assume that f is implemented to produce the following outputs. + /// + /// f(0) -> {0} // pass the lambda (or zero fact) as identity + /// f(o) -> {o, x} // generate a new fact x from o + /// f(.) -> {.} // pass all other facts that hold before + /// instruction_1 + /// // as identity + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o ... + /// | |\ ... + /// x = instruction_1 o p | | \ ... + /// | | | ... + /// v v v ... + /// 0 o x ... + /// + /// y = instruction_2 q r + /// virtual FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) = 0; - // - // Handles call flows: describes the effects of a function call at callInst - // to the callee target destFun. If a call instruction has multiple callee - // targets, for instance, because it is an indirect function call that cannot - // be analyzed precisely in a static manner, the call flow function will be - // queried for each callee target. - // - // This flow function usually handles parameter passing and maps actual to - // formal parameters. If an analysis writer does not wish to analyze a given - // callee target they can return a flow function implementation that kills all - // data-flow facts (e.g. KillAll) such that call is not followed. A commonly - // used trick to model the effects of functions that are not present (e.g. - // library functions such as malloc(), free(), etc.) is to kill all facts at - // the call to the respective target and plugin the semantics in the - // call-to-return flow function. In the call-to-return flow function, an - // analysis writer can check if the function of interest is one of the - // possible targets and then, return a flow function implementation that - // describes the special semantics of that function call. - // - // Let start_point be the starting point of the callee target CalleeFun. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the CallInst. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold before - // start_point. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before CallInst. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass as identity into the callee target - // f(o) -> {q} // map actual o into formal q - // f(p) -> {r} // map actual p into formal r - // f(.) -> {} // kill all other facts that are not visible to the - // // callee target - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o p ... - // \ \ \ ... - // x = CalleeFun(o, p, ...) \ \ +----------------+ - // \ +---------------- | - // +-------------+ + | - // ... | | | - // ... | | | - // 0 o p ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, r, ...) | | | - // v v v - // 0 q r ... - // - // start point - // + /// + /// Handles call flows: describes the effects of a function call at callInst + /// to the callee target destFun. If a call instruction has multiple callee + /// targets, for instance, because it is an indirect function call that cannot + /// be analyzed precisely in a static manner, the call flow function will be + /// queried for each callee target. + /// + /// This flow function usually handles parameter passing and maps actual to + /// formal parameters. If an analysis writer does not wish to analyze a given + /// callee target they can return a flow function implementation that kills + /// all data-flow facts (e.g. KillAll) such that call is not followed. A + /// commonly used trick to model the effects of functions that are not present + /// (e.g. library functions such as malloc(), free(), etc.) is to kill all + /// facts at the call to the respective target and plugin the semantics in the + /// call-to-return flow function. In the call-to-return flow function, an + /// analysis writer can check if the function of interest is one of the + /// possible targets and then, return a flow function implementation that + /// describes the special semantics of that function call. + /// + /// Let start_point be the starting point of the callee target CalleeFun. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the CallInst. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold before start_point. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before CallInst. We + /// assume that f is implemented to produce the following outputs. + /// + /// f(0) -> {0} // pass as identity into the callee target + /// f(o) -> {q} // map actual o into formal q + /// f(p) -> {r} // map actual p into formal r + /// f(.) -> {} // kill all other facts that are not visible to the + /// // callee target + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o p ... + /// \ \ \ ... + /// x = CalleeFun(o, p, ...) \ \ +----------------+ + /// \ +---------------- | + /// +-------------+ + | + /// ... | | | + /// ... | | | + /// 0 o p ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, r, ...) | | | + /// v v v + /// 0 q r ... + /// + /// start point + /// virtual FlowFunctionPtrType getCallFlowFunction(n_t CallInst, f_t CalleeFun) = 0; - // - // Handles return flows: describes the data-flows from an ExitInst to the - // corresponding RetSite. - // - // This flow function usually handles the returned value of the callee target - // as well as the parameter mapping back to the caller of CalleeFun for - // pointer parameters as modifications made by CalleeFun are visible to the - // caller. Data-flow facts that are not returned or escape via function - // pointer parameters (or global variables) are usually killed. - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the ExitInst. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold after - // CallSite. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before ExitInst. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass as identity into the callee target - // f(r) -> {x} // map return value to lhs variable at CallSite - // f(q) -> {o} // map pointer-typed formal q to actual o - // f(.) -> {} // kill all other facts that are not visible to the - // // caller - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // - // x = CalleeFun(o, ...) - // +------------------+ - // +--|---------------+ | - // +--|--|------------+ | | - // v v v ... | | | - // 0 o x ... | | | - // | | | - // | | | - // | | | - // Ty CalleeFun(q, ...) | | | - // | | | - // 0 q r ... - // - // return r - // + /// + /// Handles return flows: describes the data-flows from an ExitInst to the + /// corresponding RetSite. + /// + /// This flow function usually handles the returned value of the callee target + /// as well as the parameter mapping back to the caller of CalleeFun for + /// pointer parameters as modifications made by CalleeFun are visible to the + /// caller. Data-flow facts that are not returned or escape via function + /// pointer parameters (or global variables) are usually killed. + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the ExitInst. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold after CallSite. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before ExitInst. We + /// assume that f is implemented to produce the following outputs. + /// + /// f(0) -> {0} // pass as identity into the callee target + /// f(r) -> {x} // map return value to lhs variable at CallSite + /// f(q) -> {o} // map pointer-typed formal q to actual o + /// f(.) -> {} // kill all other facts that are not visible to the + /// // caller + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o ... + /// + /// x = CalleeFun(o, ...) + /// +------------------+ + /// +--|---------------+ | + /// +--|--|------------+ | | + /// v v v ... | | | + /// 0 o x ... | | | + /// | | | + /// | | | + /// | | | + /// Ty CalleeFun(q, ...) | | | + /// | | | + /// 0 q r ... + /// + /// return r + /// virtual FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, n_t RetSite) = 0; - // Performs any side-effects of a return-flow-function - // - // In case of unbalanced returns (if the option `followReturnsPastSeeds` is - // activated in the IfdsIdeSolverConfig), we will eventually reach a function - // that is not called from other functions. Still, we may want to apply a - // return-flow-function -- just for its side-effects, such as registering a - // taint + /// Performs any side-effects of a return-flow-function + /// + /// In case of unbalanced returns (if the option `followReturnsPastSeeds` is + /// activated in the IfdsIdeSolverConfig), we will eventually reach a function + /// that is not called from other functions. Still, we may want to apply a + /// return-flow-function -- just for its side-effects, such as registering a + /// taint virtual void applyUnbalancedRetFlowFunctionSideEffects(f_t CalleeFun, n_t ExitInst, d_t Source) { // By default, do nothing } - // - // Describes the data-flows alongsite a CallSite. - // - // This flow function usually passes all data-flow facts that are not involved - // in the function call alongsite the CallSite. Data-flow facts that are not - // actual parameters or passed by value, modifications to those within a - // callee are not visible in the caller context, are mostly passed as - // identity. The call-to-return flow function may also be used to describe - // special semantics (cf. getCallFlowFunction()). - // - // The returned flow function implementation f - // (FlowFunction::computeTargets()) is applied to each data-flow fact d_i that - // holds right before the CallSite. f's return type is a set - // of (target) facts that have to be generated from the source fact d_i by the - // data-flow solver. Each target fact that is generated will hold after - // CallSite. - // - // The concrete implementation of f is depending on the analysis problem. In - // the following, we present a brief, contrived example: - // - // f is applied to each data-flow fact d_i that holds before CallSite. We - // assume that f is implemented to produce the following outputs. - // - // f(0) -> {0} // pass lambda as identity alongsite the CallSite - // f(o) -> {o} // assuming that o is passed by value, it is passed - // // alongsite the CallSite - // f(p) -> {} // assuming that p is a pointer-typed value, we need - // // to kill p, as it will be handled by the call- and - // // return-flow functions - // f(.) -> {.} // pass everything that is not involved in the call as - // // identity - // - // The above implementation corresponds to the following edges in the exploded - // supergraph. - // - // 0 o ... - // | | - // | +-------+ - // +--------+ | - // | | - // x = CalleeFun(o, p, ...) | | - // | | - // +--------+ | - // | +-------+ - // v v - // 0 o x ... - // + /// + /// Describes the data-flows alongsite a CallSite. + /// + /// This flow function usually passes all data-flow facts that are not + /// involved in the function call alongsite the CallSite. Data-flow facts that + /// are not actual parameters or passed by value, modifications to those + /// within a callee are not visible in the caller context, are mostly passed + /// as identity. The call-to-return flow function may also be used to describe + /// special semantics (cf. getCallFlowFunction()). + /// + /// The returned flow function implementation f + /// (FlowFunction::computeTargets()) is applied to each data-flow fact d_i + /// that holds right before the CallSite. f's return type is a set of (target) + /// facts that have to be generated from the source fact d_i by the data-flow + /// solver. Each target fact that is generated will hold after CallSite. + /// + /// The concrete implementation of f is depending on the analysis problem. In + /// the following, we present a brief, contrived example: + /// + /// f is applied to each data-flow fact d_i that holds before CallSite. We + /// assume that f is implemented to produce the following outputs. + /// + /// f(0) -> {0} // pass lambda as identity alongsite the CallSite + /// f(o) -> {o} // assuming that o is passed by value, it is passed + /// // alongsite the CallSite + /// f(p) -> {} // assuming that p is a pointer-typed value, we need + /// // to kill p, as it will be handled by the call- and + /// // return-flow functions + /// f(.) -> {.} // pass everything that is not involved in the call + /// as + /// // identity + /// + /// The above implementation corresponds to the following edges in the + /// exploded supergraph. + /// + /// 0 o ... + /// | | + /// | +-------+ + /// +--------+ | + /// | | + /// x = CalleeFun(o, p, ...) | | + /// | | + /// +--------+ | + /// | +-------+ + /// v v + /// 0 o x ... + /// virtual FlowFunctionPtrType getCallToRetFlowFunction(n_t CallSite, n_t RetSite, llvm::ArrayRef Callees) = 0; - // - // May be used to encode special sementics of a given callee target (whose - // call should not be directly followed by the data-flow solver) similar to - // the getCallFlowFunction() --> getCallToRetFlowFunction() trick (cf. - // getCallFlowFunction()). - // - // The default implementation returns a nullptr to indicate that the mechanism - // should not be used. - // + /// + /// May be used to encode special sementics of a given callee target (whose + /// call should not be directly followed by the data-flow solver) similar to + /// the getCallFlowFunction() --> getCallToRetFlowFunction() trick (cf. + /// getCallFlowFunction()). + /// + /// The default implementation returns a nullptr to indicate that the + /// mechanism should not be used. + /// virtual FlowFunctionPtrType getSummaryFlowFunction(n_t /*Curr*/, f_t /*CalleeFun*/) { return nullptr; diff --git a/include/phasar/Domain/AnalysisDomain.h b/include/phasar/Domain/AnalysisDomain.h index bf79fe797..e0ce27c64 100644 --- a/include/phasar/Domain/AnalysisDomain.h +++ b/include/phasar/Domain/AnalysisDomain.h @@ -14,21 +14,21 @@ namespace psr { -// AnalysisDomain - This class should be specialized by different static -// analyses types... which is why the default version declares all analysis -// domains as aliases of void. -// -// Virtually all of PhASAR's internal analyses are implemented in a generic way -// using interfaces and template parameters. In order to specify concrete types -// for the template parameters such that an analysis can compute some useful -// information on some concrete target code, a configuration template parameter -// of type AnalysisDomain is passed around to make the necessary information -// available to the required analyses. -// -// If a type is not meant to be used by an analysis it should be left as an -// alias to void. If any analysis detects that a parameter is required to -// conduct an analysis but not correctly set, it will statically report an error -// and ask for the missing piece of information. +/// AnalysisDomain - This class should be specialized by different static +/// analyses types... which is why the default version declares all analysis +/// domains as aliases of void. +/// +/// Virtually all of PhASAR's internal analyses are implemented in a generic way +/// using interfaces and template parameters. In order to specify concrete types +/// for the template parameters such that an analysis can compute some useful +/// information on some concrete target code, a configuration template parameter +/// of type AnalysisDomain is passed around to make the necessary information +/// available to the required analyses. +/// +/// If a type is not meant to be used by an analysis it should be left as an +/// alias to void. If any analysis detects that a parameter is required to +/// conduct an analysis but not correctly set, it will statically report an +/// error and ask for the missing piece of information. struct AnalysisDomain { // Data-flow fact --- Specifies the type of an individual data-flow fact that // is propagated through the program under analysis. From 401a79dec2bc9d0eeb2f80f115373c826e246774 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 7 Apr 2025 22:57:54 +0200 Subject: [PATCH 02/17] some more descriptions --- include/phasar/AnalysisStrategy/AnalysisSetup.h | 2 +- include/phasar/ControlFlow/CallGraphData.h | 6 +++++- .../phasar/DataFlow/IfdsIde/IDETabulationProblem.h | 9 +++++++++ include/phasar/DataFlow/IfdsIde/Solver/Compressor.h | 12 ++++++++++++ .../DataFlow/IfdsIde/Solver/FlowFunctionCache.h | 4 ++++ include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h | 3 +++ .../phasar/DataFlow/Mono/Contexts/CallStringCTX.h | 5 +++++ include/phasar/DataFlow/Mono/IntraMonoProblem.h | 8 ++++++++ .../phasar/DataFlow/Mono/Solver/InterMonoSolver.h | 4 ++++ .../phasar/DataFlow/Mono/Solver/IntraMonoSolver.h | 2 ++ 10 files changed, 53 insertions(+), 2 deletions(-) diff --git a/include/phasar/AnalysisStrategy/AnalysisSetup.h b/include/phasar/AnalysisStrategy/AnalysisSetup.h index 975ed1c29..7b40140d4 100644 --- a/include/phasar/AnalysisStrategy/AnalysisSetup.h +++ b/include/phasar/AnalysisStrategy/AnalysisSetup.h @@ -16,7 +16,7 @@ namespace psr { -// Indicates that an analysis does not need a special configuration (file). +/// Indicates that an analysis does not need a special configuration (file). struct HasNoConfigurationType {}; struct AnalysisSetup { diff --git a/include/phasar/ControlFlow/CallGraphData.h b/include/phasar/ControlFlow/CallGraphData.h index ed50d66e4..332185ddb 100644 --- a/include/phasar/ControlFlow/CallGraphData.h +++ b/include/phasar/ControlFlow/CallGraphData.h @@ -18,8 +18,12 @@ #include namespace psr { + +/// A data structure used for storing, serializing and deserializing call-graph +/// data. struct CallGraphData { - // Mangled FunName --> [CS-IDs] + + /// Mangled FunName --> [CS-IDs] std::unordered_map> FToFunctionVertexTy{}; CallGraphData() noexcept = default; diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index 57120a439..a050ea6c3 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -73,6 +73,15 @@ class IDETabulationProblem : public FlowFunctions, using ConfigurationTy = HasNoConfigurationType; + /// Takes an intermediate representation data base (IRDB) and collects + /// information from it to create a tabulation problem. Can be solved using + /// the IDESolver for example. + /// @param[in] IRDB The project intermediate representation data base, on + /// which the tabulation problem will be build up. + /// @param[in] EntryPoints All entry points of the project, given as a vector + /// of strings, where the strings are the names of the entry functions. An + /// example would simply be { "main" }. + /// @param[in] ZeroValue An optional argument, to set a custom zero value. explicit IDETabulationProblem( const ProjectIRDBBase *IRDB, std::vector EntryPoints, std::optional diff --git a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h index 7d357a0a0..ff39ddf18 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h @@ -18,6 +18,8 @@ namespace psr { template class Compressor; +/// A data structure capable of compressing and storing data of a given type T. +/// This implementation can efficiently pass by value. template class Compressor>> { public: @@ -61,6 +63,8 @@ class Compressor>> { llvm::SmallVector FromInt; }; +/// A data structure capable of compressing and storing data of a given type T. +/// This implementation cannot efficiently pass by value. template class Compressor>> { public: @@ -69,6 +73,9 @@ class Compressor>> { ToInt.reserve(Capacity); } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present yet, it will be added first and its index will + /// then be returned. uint32_t getOrInsert(const T &Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; @@ -79,6 +86,9 @@ class Compressor>> { return Ret; } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present yet, it will be added first and its index will + /// then be returned. uint32_t getOrInsert(T &&Elem) { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; @@ -89,6 +99,8 @@ class Compressor>> { return Ret; } + /// Returns the index of the given element in the compressors storage. If the + /// element isn't present, std::nullopt will be returned std::optional getOrNull(const T &Elem) const { if (auto It = ToInt.find(&Elem); It != ToInt.end()) { return It->second; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h index a0f42d821..41ba2ccf5 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h @@ -137,6 +137,10 @@ template struct FlowFunctionCacheBase { } // namespace detail +/// This class caches flow and edge functions to avoid their reconstruction. +/// When a flow or edge function must be applied to multiple times, a cached +/// version is used if existend, otherwise a new one is created and inserted +/// into the cache. template class FlowFunctionCache : detail::FlowFunctionCacheBase, diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index e6423bab0..b85b25495 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -27,6 +27,9 @@ namespace psr { +/// Solves the given IFDSTabulationProblem as described in the 1996 paper by +/// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results +/// can then be queried by using resultAt() and resultsAt(). template > class IFDSSolver diff --git a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h index 098c4ff21..5070b6026 100644 --- a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h +++ b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h @@ -13,6 +13,11 @@ namespace psr { +/// Stores a call string context that can be used as an index for data +/// structures like std::unordered_map. The size_t operator is overloaded to +/// return a hash function. +/// @tparam N Type of the call string elements. +/// @tparam K Maximal length the call string can have. template class CallStringCTX { protected: std::deque CallString; diff --git a/include/phasar/DataFlow/Mono/IntraMonoProblem.h b/include/phasar/DataFlow/Mono/IntraMonoProblem.h index 4915cfe4f..2f7c77b86 100644 --- a/include/phasar/DataFlow/Mono/IntraMonoProblem.h +++ b/include/phasar/DataFlow/Mono/IntraMonoProblem.h @@ -62,6 +62,14 @@ template class IntraMonoProblem { // a user problem can override the type of configuration to be used, if any using ConfigurationTy = HasNoConfigurationType; + /// An intraprocedural monotone problem generated from an intermediate + /// representation, a type hierarchy of said representation, a control flow + /// graph, points-to information and optionally a vector of entry points. + /// @param IRDB A project intermediate representation data base. + /// @param TH A type hierarchy based on the given IRDB. + /// @param CF A control flow graph based on the given IRDB. + /// @param PT Points-to information based on the given IRDB. + /// @param EntryPoints A vector of entry points. Empty by default. IntraMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const CFGBase *CF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h index 0be77db3d..4e8805e19 100644 --- a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h @@ -27,6 +27,10 @@ namespace psr { +/// A solver class for interprocedual monotone problems. +/// @tparam AnalysisDomainTy type of the analysis domain. +/// @tparam K An unsigned integer used as the maximum size for call string +/// contexts. template class InterMonoSolver { public: using ProblemTy = InterMonoProblem; diff --git a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h index 739b6fa30..5284f5e87 100644 --- a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h @@ -29,6 +29,8 @@ namespace psr { +/// A solver class for intraprocedual monotone problems. +/// @tparam AnalysisDomainTy type of the analysis domain. template class IntraMonoSolver { public: using ProblemTy = IntraMonoProblem; From c9bdfe8d7fc32b8ea852503933d1b53686753f4f Mon Sep 17 00:00:00 2001 From: mxHuber Date: Mon, 14 Apr 2025 13:38:03 +0200 Subject: [PATCH 03/17] Overhauled README.dox --- docs/README.dox | 222 +++++++++++++++++- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 103 ++++---- .../ControlFlow/GlobalCtorsDtorsModel.h | 14 ++ .../ControlFlow/LLVMBasedBackwardCFG.h | 1 + .../ControlFlow/LLVMBasedBackwardICFG.h | 1 + .../PhasarLLVM/ControlFlow/LLVMBasedCFG.h | 1 + .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 1 + .../ControlFlow/LLVMVFTableProvider.h | 2 + .../ControlFlow/Resolver/CHAResolver.h | 2 + .../ControlFlow/Resolver/NOResolver.h | 1 + .../ControlFlow/Resolver/OTFResolver.h | 2 + .../ControlFlow/Resolver/RTAResolver.h | 2 + .../ControlFlow/Resolver/Resolver.h | 3 + .../ControlFlow/SparseLLVMBasedCFG.h | 1 + .../ControlFlow/SparseLLVMBasedICFG.h | 9 + .../ControlFlow/SparseLLVMBasedICFGView.h | 2 + 16 files changed, 318 insertions(+), 49 deletions(-) diff --git a/docs/README.dox b/docs/README.dox index e00ef5977..da163935f 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -2,14 +2,230 @@ @mainpage PhASAR: A LLVM-based Static Analysis Framework -@author Philipp Schubert (E-Mail: philipp.schubert@upb.de) and others +@section SSEG Secure Software Engineering Group + +PhASAR is primarily developed and maintained by the Secure Software Engineering +Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. + +@authors +- Philipp Dominik Schubert (@pdschubert)(philipp.schubert@upb.de) +- Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) +- Sriteja Kummita (@sritejakv) +- Lucas Briese (@jusito) +- Martin Mory (@MMory)(martin.mory@upb.de) +- others \b Copyright \n - Copyright 2017 Philipp Schubert. All rights reserved. + Copyright (c) 2017 - 2020 Philipp Schubert and others \b License \n See LICENSE.txt -TODO: add detailed description. +@section RVOTCPPS Required Version of the C++ Standard + +PhASAR requires at least C++-17. + +However, building in C++20 mode is supported. You may enable this setting the cmake variable CMAKE_CXX_STANDARD to 20. Although phasar currently does not make use of C++-20 features (except for some concepts behind an #ifdef border), your client application that just uses phasar as a library may want to use C++20 ealier. + +@section CSVOLLVM Currently Supported Version of LLVM + +PhASAR is currently set up to support LLVM-15.0.* + +@section WIPHASAR What is PhASAR? + +PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. + +@section BRCH Breaking Changes + +To keep PhASAR in a state that it is well suited for state-of-the-art research in static analysis, as well as for productive use, we have to make breaking changes. Please refer to BreakingChanges.md for detailed information on what was broken recently and how to migrate. + +@section HDIGSWPHASAR How do I get started with PhASAR? + +We have some documentation on PhASAR in our Wiki. You probably would like to read this README first. + +Please also have a look on PhASAR's project directory and notice the project directory examples/ as well as the custom tool tools/example-tool/myphasartool.cpp. + +@section BPHASAR Building PhASAR + +It is recommended to compile PhASAR yourself in order to get the full C++ experience and to have full control over the build mode. However, you may also want to try out one of the pre-built versions of PhASAR or the Docker container. + +As a shortcut for the very first PhASAR build on your system, you can use our bootstrap script. Please note that you must have python installed for the script to work properly. + +@code +./bootstrap.sh +@endcode + +Note: If you want to do changes within PhASAR, it is recommended to build it in Debug mode: + +@code +./bootstrap.sh -DCMAKE_BUILD_TYPE=Debug +@endcode + +The bootstrap script may ask for superuser permissions (to install the dependencies); however it is not recommended to start the whole script with sudo. + +For subsequent builds, see Compiling PhASAR. + +@section CPHASAR Compiling PhASAR (if not already done using the bootstrap script) + +Set the system's variables for the C and C++ compiler to clang: + +@code +export CC=/usr/local/bin/clang +export CXX=/usr/local/bin/clang++ +@endcode + +You may need to adjust the paths according to your system. When you cloned PhASAR from Github you need to initialize PhASAR's submodules before building it: + +@code +git submodule update --init +@endcode + +If you downloaded PhASAR as a compressed release (e.g. .zip or .tar.gz) you can use the init-submodules-release.sh script that manually clones the required submodules: + +@code +utils/init-submodules-release.sh +@endcode + +Navigate into the PhASAR directory. The following commands will do the job and compile the PhASAR framework: + +@code +mkdir build +cd build/ +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. +ninja -j $(nproc) # or use a different number of cores to compile it +sudo ninja install # only if you wish to install PhASAR system wide +@endcode + +When you have used the bootstrap.sh script to install PhASAR, the above steps are already done. Use them as a reference if you wish to modify PhASAR and recompile it. + +After compilation using cmake the following two binaries can be found in the build/tools directory: + +- phasar-cli - the PhASAR command-line tool (previously called phasar-llvm) that provides access to analyses that are already implemented within PhASAR. Use this if you don't want to build an own tool on top of PhASAR. + +- myphasartool - an example tool that shows how tools can be build on top of PhASAR + +Please be careful and check if errors occur during the compilation. + +When using CMake to compile PhASAR the following optional parameters can be used: + + +
Parameter : Type Effect +
BUILD_SHARED_LIBS : BOOL Build shared libraries -- Not recommended anymore. You may want to use PHASAR_BUILD_DYNLIB instead (default is OFF) +
PHASAR_BUILD_DYNLIB : BOOL Build one fat shared library (default is OFF) +
CMAKE_BUILD_TYPE : STRING Build PhASAR in 'Debug', 'RelWithDebInfo' or 'Release' mode (default is 'Debug') +
CMAKE_INSTALL_PREFIX : PATH Path where PhASAR will be installed if "ninja install” is invoked or the “install” target is built (default is /usr/local/phasar) +
PHASAR_CUSTOM_CONFIG_INSTALL_DIR : PATH If set, customizes the directory, where configuration files for PhASAR are installed (default is /usr/local/.phasar-config) +
PHASAR_ENABLE_DYNAMIC_LOG : BOOL Makes it possible to switch the logger on and off at runtime (default is ON) +
PHASAR_BUILD_DOC : BOOL Build PhASAR documentation (default is OFF) +
PHASAR_BUILD_UNITTESTS : BOOL Build PhASAR unit tests (default is ON) +
PHASAR_BUILD_IR : BOOL Build PhASAR IR (required for running the unit tests) (default is ON) +
PHASAR_BUILD_OPENSSL_TS_UNITTESTS : BOOL Build PhASAR unit tests that require OpenSSL (default is OFF) +
PHASAR_ENABLE_PAMM : STRING Enable the performance measurement mechanism ('Off', 'Core' or 'Full', default is Off) +
PHASAR_ENABLE_PIC : BOOL Build Position-Independed Code (default is ON) +
PHASAR_ENABLE_WARNINGS : BOOL Enable compiler warnings (default is ON) +
CMAKE_CXX_STANDARD : INT Build phasar in C++17 or C++20 mode (default is 17) +
+ +You can use these parameters either directly or modify the installer-script bootstrap.sh + +@section AROCT A Remark on Compile Time + +C++'s long compile times are always a pain. As shown in the above, when using cmake the compilation can easily be run in parallel, resulting in shorter compilation times. Make use of it! + +@section RUNNINGATS Running a Test Solver + +To test if everything works as expected please run the following command: + +@code $ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest @endcode + +You can find the phasar-cli tool in the build-tree under tools/phasar-cli. + +If you obtain output other than a segmentation fault or an exception terminating the program abnormally everything works as expected. + +@section BPHASAROAMS Building PhASAR on a MacOS System + +Due to unfortunate updates to MacOS and the handling of C++, especially on the newer M1 processors, we can't support native development on Mac. The easiest solution to develop PhASAR on a Mac right now is to use dockers development environments. Clone this repository as described in their documentation. Afterwards, you have to login once manually, as a root user by running docker exec -it -u root /bin/bash to complete the rest of the build process as described in this readme (install submodules, run bootstrap.sh, ...). Now you can just attach your docker container to VS Code or any other IDE, which supports remote development. + +@section INST Installation + +PhASAR can be installed using the installer scripts as explained in the following. However, you do not need to install PhASAR in order to use it. + +Installing PhASAR on an Ubuntu System + +In the following, we would like to give an complete example of how to install PhASAR using an Ubuntu or Unix-like system. + +Therefore, we provide an installation script. To install PhASAR, just navigate to the top-level directory of PhASAR and use the following command: + +@code ./bootstrap.sh --install @endcode + +The bootstrap script may ask for superuser permissions. + +Done! + +If You have already built phasar, you can just invoke + +@code sudo ninja install @endcode + +@section HTUPHASAR How to use PhASAR? + +We recomment using phasar as a library with cmake or conan. + +If you already have installed phasar, Use-PhASAR-as-a-library may be a good start. + +Otherwise, we recommend adding PhASAR as a git submodule to your repository. In this case, just add_subdirectory the phasar submodule directory within your CMakeLists.txt. + +Assuming you have checked out phasar in external/phasar, the phasar-related cmake commands may look like this: + +@code + +add_subdirectory(external/phasar EXCLUDE_FROM_ALL) # Build phasar with your tool + +... + +target_link_libraries(yourphasartool + ... + phasar # Make your tool link against phasar +) + +@endcode + +Depending on your use of PhASAR you also may need to add LLVM to your build. + +For more information please consult our PhASAR wiki pages. + +@section HTUWCONAN How to use with Conan v2 ? + +To export the recipe and dependencies execute from repo root: + +- @code conan export utils/conan/llvm-core/ --version 15.0.7 --user secure-software-engineering @endcode +- @code conan export utils/conan/clang/ --version 15.0.7 --user secure-software-engineering @endcode +- @code conan export . @endcode +- View exported conan list "phasar/*" +- Consume the package + +If you just want to use phasar-cli: + +- @code conan install --tool-requires phasar/... --build=missing -of . @endcode +- @code source conanbuild.sh @endcode +- @code phasar-cli --help @endcode + +@section PHUTIPHASAR Please help us to improve PhASAR + +You are using PhASAR and would like to help us in the future? Then please support us by filling out this web form. + +By giving us feedback you help to decide in what direction PhASAR should stride in the future and give us clues about our user base. Thank you very much! + +@section IPHASARGPCH Installing PhASAR's Git pre-commit hook + +You are very much welcome to contribute to the PhASAR project. Please make sure that you install our pre-commit hook that ensures your commit adheres to the most important coding rules of the PhASAR project. For more details please consult Coding Conventions and Contributing to PhASAR. + +To install the pre-commit hook, please run the following commands in PhASAR's root directory: + +@code +- pip install pre-commit +- pre-commit install +@endcode + +Thanks. And have fun with the project. */ diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 367344d45..e405d2ede 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -153,13 +153,13 @@ template class FlowFunctionTemplates { /// dataflow-facts x, f(x) = {x}. /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x1 x1 x3 ... /// | | | ... /// id-instruction | | | ... /// v v v ... /// x1 x2 x3 ... - /// + /// \endcode static auto identityFlow() { struct IdFF final : public FlowFunction { container_type computeTargets(d_t Source) override { @@ -178,14 +178,14 @@ template class FlowFunctionTemplates { /// dataflow-facts x, f(x) = F(x). /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x /// | /// inst F /// / / | \ \ ... /// v v v v v /// x1 x2 x x3 x4 - /// + /// \endcode template static auto lambdaFlow(Fn &&F) { struct LambdaFlow final : public FlowFunction { LambdaFlow(Fn &&F) : Flow(std::forward(F)) {} @@ -208,17 +208,18 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateFlow(v, w), then for all incoming /// dataflow facts x: + /// \code /// f(w) = {v, w}, /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ | ... /// inst | | \ | ... /// v v v v ... /// x w v u - /// + /// \endcode /// \note If the FactToGenerate already holds at the beginning of the /// statement, this flow function does not kill it. For IFDS analysis it makes /// no difference, but in the case of IDE, the corresponding edge functions @@ -250,9 +251,10 @@ template class FlowFunctionTemplates { /// /// So, given a flow function f = generateFlowIf(v, p), for all incoming /// dataflow facts x: + /// \code /// f(x) = {v, x} if p(x) == true /// f(x) = {x} else. - /// + /// \endcode template >> static auto generateFlowIf(d_t FactToGenerate, Fn Predicate) { @@ -281,17 +283,18 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateManyFlows({v1, v2, ..., vN}, w), for all /// incoming dataflow facts x: + /// \code /// f(w) = {v1, v2, ..., vN, w} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ \ ... \ | ... /// inst | | \ \ ... \ | ... /// v v v v ... \ v ... /// x w v1 v2 ... vN u - /// + /// \endcode template , typename = std::enable_if_t>> static auto generateManyFlows(Range &&FactsToGenerate, d_t From) { @@ -324,17 +327,18 @@ template class FlowFunctionTemplates { /// (FactToKill). /// /// Given a flow function f = killFlow(v), for all incoming dataflow facts x: + /// \code /// f(v) = {} /// f(x) = {x} - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// u v w ... /// | | | /// inst | | /// v v /// u v w ... - /// + /// \endcode static auto killFlow(d_t FactToKill) { struct KillFlow final : public FlowFunction { KillFlow(d_t KillValue) : KillValue(std::move(KillValue)) {} @@ -355,9 +359,10 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = killFlowIf(p), for all incoming dataflow facts /// x: + /// \code /// f(x) = {} if p(x) == true /// f(x) = {x} else. - /// + /// \endcode template >> static auto killFlowIf(Fn Predicate) { @@ -382,20 +387,21 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = killManyFlows({v1, v2, ..., vN}), for all /// incoming dataflow facts x: + /// \code /// f(v1) = {} /// f(v2) = {} /// ... /// f(vN) = {} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// u v1 v2 ... vN w ... /// | | | | | /// inst | | /// v v /// u v1 v2 ... vN w ... - /// + /// \endcode template , typename = std::enable_if_t>> static auto killManyFlows(Range &&FactsToKill) { @@ -419,8 +425,9 @@ template class FlowFunctionTemplates { /// A flow function that stops propagating *all* incoming dataflow facts. /// /// Given a flow function f = killAllFlows(), for all incoming dataflow facts + /// \code /// x, f(x) = {}. - /// + /// \endcode static auto killAllFlows() { struct KillAllFF final : public FlowFunction { Container computeTargets(d_t /*Source*/) override { return Container(); } @@ -440,20 +447,21 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateFlowAndKillAllOthers(v, w), for all /// incoming dataflow facts x: + /// \code /// f(w) = {v, w} /// f(x) = {}. - /// + /// \endcode /// Equivalent to: killFlowIf(λz.z!=w) o generateFlow(v, w) (where o denotes /// function composition) /// /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ | /// inst | \ ... /// v v /// x w v u - /// + /// \endcode static auto generateFlowAndKillAllOthers(d_t FactToGenerate, d_t From) { struct GenFlowAndKillAllOthers final : public FlowFunction { @@ -481,17 +489,18 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = generateManyFlowsAndKillAllOthers({v1, v2, ..., /// vN}, w), for all incoming dataflow facts x: + /// \code /// f(w) = {v1, v2, ..., vN, w} /// f(x) = {}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w u ... /// | |\ \ ... \ | ... /// inst | \ \ ... \ ... /// v v v ... \ ... /// x w v1 v2 ... vN u - /// + /// \endcode template , typename = std::enable_if_t>> static auto generateManyFlowsAndKillAllOthers(Range &&FactsToGenerate, @@ -533,19 +542,20 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = transferFlow(v, w), for all incoming dataflow /// facts x: + /// \code /// f(v) = {} /// f(w) = {v, w} /// f(x) = {x}. - /// + /// \endcode /// In the exploded supergraph it may look as follows: - /// + /// \code /// x w v u ... /// | |\ | | ... /// | | \ | ... /// inst | | \ | ... /// v v v v ... /// x w v u - /// + /// \endcode static auto transferFlow(d_t FactToGenerate, d_t From) { struct TransferFlow final : public FlowFunction { TransferFlow(d_t GenValue, d_t FromValue) @@ -575,8 +585,9 @@ template class FlowFunctionTemplates { /// /// Given a flow function f = unionFlows(g, h), for all incoming dataflow /// facts x: + /// \code /// f(x) = g(x) u h(x). (where u denotes set-union) - /// + /// \endcode template && @@ -657,16 +668,16 @@ class FlowFunctions /// /// f is applied to each data-flow fact d_i that holds before instruction_1. /// We assume that f is implemented to produce the following outputs. - /// + /// \code /// f(0) -> {0} // pass the lambda (or zero fact) as identity /// f(o) -> {o, x} // generate a new fact x from o /// f(.) -> {.} // pass all other facts that hold before /// instruction_1 /// // as identity - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o ... /// | |\ ... /// x = instruction_1 o p | | \ ... @@ -675,7 +686,7 @@ class FlowFunctions /// 0 o x ... /// /// y = instruction_2 q r - /// + /// \endcode virtual FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) = 0; /// @@ -710,16 +721,16 @@ class FlowFunctions /// /// f is applied to each data-flow fact d_i that holds before CallInst. We /// assume that f is implemented to produce the following outputs. - /// + /// \code /// f(0) -> {0} // pass as identity into the callee target /// f(o) -> {q} // map actual o into formal q /// f(p) -> {r} // map actual p into formal r /// f(.) -> {} // kill all other facts that are not visible to the /// // callee target - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o p ... /// \ \ \ ... /// x = CalleeFun(o, p, ...) \ \ +----------------+ @@ -736,7 +747,7 @@ class FlowFunctions /// 0 q r ... /// /// start point - /// + /// \endcode virtual FlowFunctionPtrType getCallFlowFunction(n_t CallInst, f_t CalleeFun) = 0; @@ -761,16 +772,16 @@ class FlowFunctions /// /// f is applied to each data-flow fact d_i that holds before ExitInst. We /// assume that f is implemented to produce the following outputs. - /// + /// \code /// f(0) -> {0} // pass as identity into the callee target /// f(r) -> {x} // map return value to lhs variable at CallSite /// f(q) -> {o} // map pointer-typed formal q to actual o /// f(.) -> {} // kill all other facts that are not visible to the /// // caller - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o ... /// /// x = CalleeFun(o, ...) @@ -787,7 +798,7 @@ class FlowFunctions /// 0 q r ... /// /// return r - /// + /// \endcode virtual FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, n_t RetSite) = 0; @@ -825,7 +836,7 @@ class FlowFunctions /// /// f is applied to each data-flow fact d_i that holds before CallSite. We /// assume that f is implemented to produce the following outputs. - /// + /// \code /// f(0) -> {0} // pass lambda as identity alongsite the CallSite /// f(o) -> {o} // assuming that o is passed by value, it is passed /// // alongsite the CallSite @@ -835,10 +846,10 @@ class FlowFunctions /// f(.) -> {.} // pass everything that is not involved in the call /// as /// // identity - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o ... /// | | /// | +-------+ @@ -850,7 +861,7 @@ class FlowFunctions /// | +-------+ /// v v /// 0 o x ... - /// + /// \endcode virtual FlowFunctionPtrType getCallToRetFlowFunction(n_t CallSite, n_t RetSite, llvm::ArrayRef Callees) = 0; @@ -870,6 +881,6 @@ class FlowFunctions } }; -} // namespace psr +} // namespace psr #endif diff --git a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h index 348f341f4..0a6984af6 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h +++ b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h @@ -30,9 +30,23 @@ class GlobalCtorsDtorsModel { static constexpr llvm::StringLiteral UserEntrySelectorName = "__psrCRuntimeUserEntrySelector"; + /// @brief + /// Function that creates a new global model and inserts it into the IRDB. The + /// returned function is the global model that was inserted. + /// @param[in, out] IRDB Intermediate representation data base that will have + /// the global model inserted. + /// @param[in] UserEntryPoints Entry points for the program given as + /// llvm::Function pointers. static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); + /// @brief + /// Function that creates a new global model and inserts it into the IRDB. The + /// returned function is the global model that was inserted. + /// @param[in, out] IRDB Intermediate representation data base that will have + /// the global model inserted. + /// @param[in] UserEntryPoints Entry points for the program given as + /// std::strings. static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h index a488e9b46..e0bccc34b 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h @@ -22,6 +22,7 @@ namespace psr { class LLVMProjectIRDB; class LLVMBasedBackwardCFG; +/// A class that represents a backwards control flow graph. class LLVMBasedBackwardCFG : public detail::LLVMBasedCFGImpl { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index d9bbb1786..23e1afe52 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -25,6 +25,7 @@ template class CallGraph; template <> struct CFGTraits : CFGTraits {}; +/// A class that represents a backwards interprocedural control flow graph. class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index a3134c713..f4b27bde3 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -36,6 +36,7 @@ template <> struct CFGTraits { template <> struct CFGTraits : CFGTraits {}; +/// A class that implements a control flow graph. namespace detail { template class LLVMBasedCFGImpl : public CFGBase { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index 9ed1799e0..96b04c07c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -44,6 +44,7 @@ class Resolver; class LLVMBasedICFG; template <> struct CFGTraits : CFGTraits {}; +/// A class that implements an interprocedural control flow graph. class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 1fdc8100b..9765462ad 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -23,6 +23,8 @@ class GlobalVariable; namespace psr { class LLVMProjectIRDB; +/// A class that creates an unordered map of the virtual functions of an +/// intermediate representation data bases module. class LLVMVFTableProvider { public: explicit LLVMVFTableProvider(const llvm::Module &Mod); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 818a59de8..447ed5999 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -26,6 +26,8 @@ class CallBase; namespace psr { class DIBasedTypeHierarchy; + +/// A resolver that performs a class hierarchy analysis. class CHAResolver : public Resolver { public: CHAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 376eb5962..9f7dc7b95 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -18,6 +18,7 @@ class CallBase; namespace psr { +/// A resolver that doesn't resolve indirect- and virtual calls class NOResolver final : public Resolver { public: NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 3bbdc83f5..1f100b9f4 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -36,6 +36,8 @@ namespace psr { class DIBasedTypeHierarchy; +/// A resolver that performs an on-the-fly analysis based on points-to info +/// (default). class OTFResolver : public Resolver { public: OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index f4371c608..d42adfac8 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -28,6 +28,8 @@ class DICompositeType; namespace psr { class DIBasedTypeHierarchy; + +/// A resolver that performs a rapid type analysis. class RTAResolver : public CHAResolver { public: RTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 8748e56aa..e67a25fab 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -68,6 +68,9 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); +/// A parent class that serves as the basis for specific resolver analyses. +/// Create a specific resolver by making a new class, inheriting this resolver +/// class and implementing the virtual functions as needed. class Resolver { protected: const LLVMProjectIRDB *IRDB; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index 8645f5c72..f97f8214f 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -23,6 +23,7 @@ template <> struct CFGTraits : CFGTraits { using v_t = const llvm::Value *; }; +/// A class that represents a sparse control flow graph. class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { friend struct SVFGCache; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 2d43ae64e..db3a7004c 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -20,12 +20,21 @@ class SparseLLVMBasedCFG; class DIBasedTypeHierarchy; struct SVFGCache; +/// A class that represents a sparse interprocedural control flow graph. class SparseLLVMBasedICFG : public LLVMBasedICFG, public SparseLLVMBasedCFGProvider { friend SparseLLVMBasedCFGProvider; public: + /// @param[in, out] IRDB Intermediate representation data base. + /// @param[in] CGType The type of the call graph analysis. + /// @param[in] EntryPoints The entry points of the program the IRDB is based + /// on. Often this is just { "main" }. + /// @param TH Type Hierarchy of the given IRDB. + /// @param PT Points-to information that represents aliases. + /// @param S Level of soundness. + /// @param IncludeGlobals Flag to determine if globals should be included. explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, llvm::ArrayRef EntryPoints = {}, diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index 8c0aaa9da..f99612ea8 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -40,6 +40,8 @@ class SparseLLVMBasedICFGView friend SparseLLVMBasedCFGProvider; public: + /// @param[in] ICF Interprocedural control flow graph. + /// @param[in] PT Points-to information that represents aliases. explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, LLVMAliasInfoRef PT); From ef33968584b842d9fc510165f2f15c227e55025e Mon Sep 17 00:00:00 2001 From: mxHuber Date: Sun, 20 Apr 2025 20:01:13 +0200 Subject: [PATCH 04/17] Added texts + fixed typos --- README.md | 2 +- docs/README.dox | 91 ++++++++++--------- .../phasar/DataFlow/IfdsIde/EdgeFunctions.h | 16 ++-- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 2 +- .../ControlFlow/GlobalCtorsDtorsModel.h | 10 +- .../ControlFlow/Resolver/CHAResolver.h | 6 ++ .../ControlFlow/Resolver/NOResolver.h | 5 + .../ControlFlow/Resolver/OTFResolver.h | 10 +- .../ControlFlow/Resolver/RTAResolver.h | 6 ++ .../ControlFlow/SparseLLVMBasedICFG.h | 7 +- .../DataFlow/IfdsIde/FunctionDataFlowFacts.h | 6 +- .../IfdsIde/LLVMFunctionDataFlowFacts.h | 10 +- .../DataFlow/IfdsIde/LLVMZeroValue.h | 8 +- .../PhasarLLVM/Domain/LLVMAnalysisDomain.h | 2 + .../phasar/PhasarLLVM/Pointer/LLVMAliasSet.h | 14 +-- .../PhasarLLVM/Pointer/LLVMAliasSetData.h | 3 + .../Pointer/LLVMBasedAliasAnalysis.h | 7 ++ .../TypeHierarchy/DIBasedTypeHierarchy.h | 15 +++ .../TypeHierarchy/DIBasedTypeHierarchyData.h | 3 + include/phasar/PhasarLLVM/Utils/Annotation.h | 6 +- include/phasar/PhasarPass/PhasarPass.h | 10 ++ include/phasar/PhasarPass/PhasarPrinterPass.h | 2 + include/phasar/Pointer/AliasInfo.h | 7 +- include/phasar/TypeHierarchy/VFTable.h | 2 + .../TaintConfig/LLVMTaintConfig.cpp | 2 +- 25 files changed, 163 insertions(+), 89 deletions(-) diff --git a/README.md b/README.md index d15020fae..17396e546 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Currently, PhASAR is maintained by PhASAR requires at least C++-17. However, building in C++20 mode is supported. You may enable this setting the cmake variable `CMAKE_CXX_STANDARD` to `20`. -Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 ealier. +Although phasar currently does not make use of C++-20 features (except for some `concept`s behind an #ifdef border), your client application that just *uses* phasar as a library may want to use C++20 earlier. ## Currently Supported Version of LLVM diff --git a/docs/README.dox b/docs/README.dox index da163935f..fe4955521 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -2,62 +2,59 @@ @mainpage PhASAR: A LLVM-based Static Analysis Framework -@section SSEG Secure Software Engineering Group +@subsubsection SSEG Secure Software Engineering Group PhASAR is primarily developed and maintained by the Secure Software Engineering Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. @authors -- Philipp Dominik Schubert (@pdschubert)(philipp.schubert@upb.de) -- Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) -- Sriteja Kummita (@sritejakv) -- Lucas Briese (@jusito) -- Martin Mory (@MMory)(martin.mory@upb.de) +- Philipp Dominik Schubert (@pdschubert)(philipp.schubert@upb.de) +- Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) +- Sriteja Kummita (@sritejakv) +- Lucas Briese (@jusito) +- Martin Mory (@MMory)(martin.mory@upb.de) - others -\b Copyright \n - Copyright (c) 2017 - 2020 Philipp Schubert and others - \b License \n See LICENSE.txt -@section RVOTCPPS Required Version of the C++ Standard +@subsubsection RVOTCPPS Required Version of the C++ Standard PhASAR requires at least C++-17. -However, building in C++20 mode is supported. You may enable this setting the cmake variable CMAKE_CXX_STANDARD to 20. Although phasar currently does not make use of C++-20 features (except for some concepts behind an #ifdef border), your client application that just uses phasar as a library may want to use C++20 ealier. +However, building in C++20 mode is supported. You may enable this setting the cmake variable CMAKE_CXX_STANDARD to 20. Although phasar currently does not make use of C++-20 features (except for some concepts behind an #ifdef border), your client application that just uses phasar as a library may want to use C++20 earlier. -@section CSVOLLVM Currently Supported Version of LLVM +@subsubsection CSVOLLVM Currently Supported Version of LLVM PhASAR is currently set up to support LLVM-15.0.* -@section WIPHASAR What is PhASAR? +@subsubsection WIPHASAR What is PhASAR? PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. -@section BRCH Breaking Changes +@subsubsection BRCH Breaking Changes To keep PhASAR in a state that it is well suited for state-of-the-art research in static analysis, as well as for productive use, we have to make breaking changes. Please refer to BreakingChanges.md for detailed information on what was broken recently and how to migrate. -@section HDIGSWPHASAR How do I get started with PhASAR? +@subsubsection HDIGSWPHASAR How do I get started with PhASAR? We have some documentation on PhASAR in our Wiki. You probably would like to read this README first. Please also have a look on PhASAR's project directory and notice the project directory examples/ as well as the custom tool tools/example-tool/myphasartool.cpp. -@section BPHASAR Building PhASAR +@subsubsection BPHASAR Building PhASAR It is recommended to compile PhASAR yourself in order to get the full C++ experience and to have full control over the build mode. However, you may also want to try out one of the pre-built versions of PhASAR or the Docker container. As a shortcut for the very first PhASAR build on your system, you can use our bootstrap script. Please note that you must have python installed for the script to work properly. -@code +@code{.sh} ./bootstrap.sh @endcode Note: If you want to do changes within PhASAR, it is recommended to build it in Debug mode: -@code +@code{.sh} ./bootstrap.sh -DCMAKE_BUILD_TYPE=Debug @endcode @@ -65,30 +62,30 @@ The bootstrap script may ask for superuser permissions (to install the dependenc For subsequent builds, see Compiling PhASAR. -@section CPHASAR Compiling PhASAR (if not already done using the bootstrap script) +@subsubsection CPHASAR Compiling PhASAR (if not already done using the bootstrap script) Set the system's variables for the C and C++ compiler to clang: -@code +@code{.sh} export CC=/usr/local/bin/clang export CXX=/usr/local/bin/clang++ @endcode -You may need to adjust the paths according to your system. When you cloned PhASAR from Github you need to initialize PhASAR's submodules before building it: +You may need to adjust the paths according to your system. When you cloned PhASAR from GitHub you need to initialize PhASAR's submodules before building it: -@code +@code{.sh} git submodule update --init @endcode If you downloaded PhASAR as a compressed release (e.g. .zip or .tar.gz) you can use the init-submodules-release.sh script that manually clones the required submodules: -@code +@code{.sh} utils/init-submodules-release.sh @endcode Navigate into the PhASAR directory. The following commands will do the job and compile the PhASAR framework: -@code +@code{.sh} mkdir build cd build/ cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. @@ -113,7 +110,7 @@ When using CMake to compile PhASAR the following optional parameters can be used BUILD_SHARED_LIBS : BOOL Build shared libraries -- Not recommended anymore. You may want to use PHASAR_BUILD_DYNLIB instead (default is OFF) PHASAR_BUILD_DYNLIB : BOOL Build one fat shared library (default is OFF) CMAKE_BUILD_TYPE : STRING Build PhASAR in 'Debug', 'RelWithDebInfo' or 'Release' mode (default is 'Debug') - CMAKE_INSTALL_PREFIX : PATH Path where PhASAR will be installed if "ninja install” is invoked or the “install” target is built (default is /usr/local/phasar) + CMAKE_INSTALL_PREFIX : PATH Path where PhASAR will be installed if "ninja install" is invoked or the “install” target is built (default is /usr/local/phasar) PHASAR_CUSTOM_CONFIG_INSTALL_DIR : PATH If set, customizes the directory, where configuration files for PhASAR are installed (default is /usr/local/.phasar-config) PHASAR_ENABLE_DYNAMIC_LOG : BOOL Makes it possible to switch the logger on and off at runtime (default is ON) PHASAR_BUILD_DOC : BOOL Build PhASAR documentation (default is OFF) @@ -128,25 +125,27 @@ When using CMake to compile PhASAR the following optional parameters can be used You can use these parameters either directly or modify the installer-script bootstrap.sh -@section AROCT A Remark on Compile Time +@subsubsection AROCT A Remark on Compile Time C++'s long compile times are always a pain. As shown in the above, when using cmake the compilation can easily be run in parallel, resulting in shorter compilation times. Make use of it! -@section RUNNINGATS Running a Test Solver +@subsubsection RUNNINGATS Running a Test Solver To test if everything works as expected please run the following command: -@code $ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest @endcode +@code{.sh} +$ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest +@endcode You can find the phasar-cli tool in the build-tree under tools/phasar-cli. If you obtain output other than a segmentation fault or an exception terminating the program abnormally everything works as expected. -@section BPHASAROAMS Building PhASAR on a MacOS System +@subsubsection BPHASAROAMS Building PhASAR on a MacOS System Due to unfortunate updates to MacOS and the handling of C++, especially on the newer M1 processors, we can't support native development on Mac. The easiest solution to develop PhASAR on a Mac right now is to use dockers development environments. Clone this repository as described in their documentation. Afterwards, you have to login once manually, as a root user by running docker exec -it -u root /bin/bash to complete the rest of the build process as described in this readme (install submodules, run bootstrap.sh, ...). Now you can just attach your docker container to VS Code or any other IDE, which supports remote development. -@section INST Installation +@subsubsection INST Installation PhASAR can be installed using the installer scripts as explained in the following. However, you do not need to install PhASAR in order to use it. @@ -156,7 +155,9 @@ In the following, we would like to give an complete example of how to install Ph Therefore, we provide an installation script. To install PhASAR, just navigate to the top-level directory of PhASAR and use the following command: -@code ./bootstrap.sh --install @endcode +@code{.sh} +./bootstrap.sh --install +@endcode The bootstrap script may ask for superuser permissions. @@ -164,9 +165,11 @@ Done! If You have already built phasar, you can just invoke -@code sudo ninja install @endcode +@code{.sh} +sudo ninja install +@endcode -@section HTUPHASAR How to use PhASAR? +@subsubsection HTUPHASAR How to use PhASAR? We recomment using phasar as a library with cmake or conan. @@ -176,7 +179,7 @@ Otherwise, we recommend adding PhASAR as a git submodule to your repository. In Assuming you have checked out phasar in external/phasar, the phasar-related cmake commands may look like this: -@code +@code{.sh} add_subdirectory(external/phasar EXCLUDE_FROM_ALL) # Build phasar with your tool @@ -193,35 +196,35 @@ Depending on your use of PhASAR you also may need to add LLVM to your build. For more information please consult our PhASAR wiki pages. -@section HTUWCONAN How to use with Conan v2 ? +@subsubsection HTUWCONAN How to use with Conan v2 ? To export the recipe and dependencies execute from repo root: -- @code conan export utils/conan/llvm-core/ --version 15.0.7 --user secure-software-engineering @endcode -- @code conan export utils/conan/clang/ --version 15.0.7 --user secure-software-engineering @endcode -- @code conan export . @endcode +- @code{.sh} conan export utils/conan/llvm-core/ --version 15.0.7 --user secure-software-engineering @endcode +- @code{.sh} conan export utils/conan/clang/ --version 15.0.7 --user secure-software-engineering @endcode +- @code{.sh} conan export . @endcode - View exported conan list "phasar/*" - Consume the package If you just want to use phasar-cli: -- @code conan install --tool-requires phasar/... --build=missing -of . @endcode -- @code source conanbuild.sh @endcode -- @code phasar-cli --help @endcode +- @code{.sh} conan install --tool-requires phasar/... --build=missing -of . @endcode +- @code{.sh} source conanbuild.sh @endcode +- @code{.sh} phasar-cli --help @endcode -@section PHUTIPHASAR Please help us to improve PhASAR +@subsubsection PHUTIPHASAR Please help us to improve PhASAR You are using PhASAR and would like to help us in the future? Then please support us by filling out this web form. By giving us feedback you help to decide in what direction PhASAR should stride in the future and give us clues about our user base. Thank you very much! -@section IPHASARGPCH Installing PhASAR's Git pre-commit hook +@subsubsection IPHASARGPCH Installing PhASAR's Git pre-commit hook You are very much welcome to contribute to the PhASAR project. Please make sure that you install our pre-commit hook that ensures your commit adheres to the most important coding rules of the PhASAR project. For more details please consult Coding Conventions and Contributing to PhASAR. To install the pre-commit hook, please run the following commands in PhASAR's root directory: -@code +@code{.sh} - pip install pre-commit - pre-commit install @endcode diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h index cf5e38ce2..5a57cdfe9 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h @@ -70,7 +70,7 @@ template class EdgeFunctions { /// /// The above flow-function implementation corresponds to the following edges /// in the exploded supergraph. - /// + /// \code /// 0 o ... /// | |\ ... /// Curr := x = instruction_1 o p | | \ ... @@ -79,7 +79,7 @@ template class EdgeFunctions { /// 0 o x ... /// /// Succ := y = instruction_2 q r - /// + /// \endcode /// For each edge generated by the respective flow function a normal edge /// function is queried that describes a value computation. This results in /// the following queries: @@ -114,7 +114,7 @@ template class EdgeFunctions { /// /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o p ... /// \ \ \ ... /// CallInst := x = CalleeFun(o, p, ...) \ \ +----------------+ @@ -131,7 +131,7 @@ template class EdgeFunctions { /// 0 q r ... /// /// start point - /// + /// \endcode /// For each edge generated by the respective flow function a call edge /// function is queried that describes a value computation. This results in /// the following queries: @@ -167,7 +167,7 @@ template class EdgeFunctions { /// /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o ... /// /// CallSite = RetSite := x = CalleeFun(o, ...) @@ -184,7 +184,7 @@ template class EdgeFunctions { /// 0 q r /// /// ExitInst := return r - /// + /// \endcode /// For each edge generated by the respective flow function a return edge /// function is queried that describes a value computation. This results in /// the following queries: @@ -225,7 +225,7 @@ template class EdgeFunctions { /// /// The above implementation corresponds to the following edges in the /// exploded supergraph. - /// + /// \code /// 0 o ... /// | | /// | +-------+ @@ -237,7 +237,7 @@ template class EdgeFunctions { /// | +-------+ /// v v /// 0 o x ... - /// + /// \endcode /// For each edge generated by the respective flow function a call-to-return /// edge function is queried that describes a value computation. This results /// in the following queries: diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index e405d2ede..c30a70200 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -223,7 +223,7 @@ template class FlowFunctionTemplates { /// \note If the FactToGenerate already holds at the beginning of the /// statement, this flow function does not kill it. For IFDS analysis it makes /// no difference, but in the case of IDE, the corresponding edge functions - /// are being joined together potentially lowing precition. If that is an + /// are being joined together potentially lowering precision. If that is an /// issue, use transferFlow instead. static auto generateFlow(d_t FactToGenerate, d_t From) { struct GenFrom final : public FlowFunction { diff --git a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h index 0a6984af6..9e8f4b66e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h +++ b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h @@ -30,9 +30,8 @@ class GlobalCtorsDtorsModel { static constexpr llvm::StringLiteral UserEntrySelectorName = "__psrCRuntimeUserEntrySelector"; - /// @brief - /// Function that creates a new global model and inserts it into the IRDB. The - /// returned function is the global model that was inserted. + /// @brief Function that creates a new global model and inserts it into the + /// IRDB. The returned function is the global model that was inserted. /// @param[in, out] IRDB Intermediate representation data base that will have /// the global model inserted. /// @param[in] UserEntryPoints Entry points for the program given as @@ -40,9 +39,8 @@ class GlobalCtorsDtorsModel { static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); - /// @brief - /// Function that creates a new global model and inserts it into the IRDB. The - /// returned function is the global model that was inserted. + /// @brief Function that creates a new global model and inserts it into the + /// IRDB. The returned function is the global model that was inserted. /// @param[in, out] IRDB Intermediate representation data base that will have /// the global model inserted. /// @param[in] UserEntryPoints Entry points for the program given as diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 447ed5999..035ba37c2 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -30,6 +30,12 @@ class DIBasedTypeHierarchy; /// A resolver that performs a class hierarchy analysis. class CHAResolver : public Resolver { public: + /// @brief Class that implements a Resolver's virtual functions to be able to + /// perform a class hierarchy analysis. + /// @param[in] IRDB The project intermediate representation data base, on + /// which the tabulation problem will be build up. + /// @param[in] VTP Virtual Function Table Provider. + /// @param[in] TH A type hierarchy based on the given IRDB. CHAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 9f7dc7b95..05fd1b396 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -21,6 +21,11 @@ namespace psr { /// A resolver that doesn't resolve indirect- and virtual calls class NOResolver final : public Resolver { public: + /// @brief Class that implements a Resolver's virtual functions to be able to + /// perform an analysis that does not resolve indirect- and virtual calls. + /// @param[in] IRDB The project intermediate representation data base, on + /// which the tabulation problem will be build up. + /// @param[in] VTP Virtual Function Table Provider. NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP); ~NOResolver() override = default; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 1f100b9f4..08b2912f4 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -36,10 +36,16 @@ namespace psr { class DIBasedTypeHierarchy; -/// A resolver that performs an on-the-fly analysis based on points-to info -/// (default). +/// A resolver that performs an on-the-fly analysis based on points-to +/// information (default). class OTFResolver : public Resolver { public: + /// @brief Class that implements a Resolver's virtual functions to be able to + /// perform an on-the-fly analysis based on points-to information. + /// @param[in] IRDB The project intermediate representation data base, on + /// which the tabulation problem will be build up. + /// @param[in] VTP Virtual Function Table Provider. + /// @param[in] PT Points-to information. OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index d42adfac8..9e786643f 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -32,6 +32,12 @@ class DIBasedTypeHierarchy; /// A resolver that performs a rapid type analysis. class RTAResolver : public CHAResolver { public: + /// @brief Class that implements a Resolver's virtual functions to be able to + /// perform a rapid type analysis. + /// @param[in] IRDB The project intermediate representation data base, on + /// which the tabulation problem will be build up. + /// @param[in] VTP Virtual Function Table Provider. + /// @param[in] TH A type hierarchy based on the given IRDB. RTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH); diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index db3a7004c..901465935 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -27,11 +27,14 @@ class SparseLLVMBasedICFG friend SparseLLVMBasedCFGProvider; public: - /// @param[in, out] IRDB Intermediate representation data base. + /// @param[in, out] IRDB Intermediate representation data base. The IRDB will + /// be changed, only if IncludeGlobals is set to true. /// @param[in] CGType The type of the call graph analysis. /// @param[in] EntryPoints The entry points of the program the IRDB is based /// on. Often this is just { "main" }. - /// @param TH Type Hierarchy of the given IRDB. + /// @param TH Type Hierarchy of the given IRDB. Type Hierarchy can only be + /// null, if the call graph type does not need a type hierarchy. In any other + /// case, this must not be null. An example of this is the OTF analysis. /// @param PT Points-to information that represents aliases. /// @param S Level of soundness. /// @param IncludeGlobals Flag to determine if globals should be included. diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h index e40d109ab..a12c70637 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h @@ -25,7 +25,7 @@ struct DataFlowFact { class FunctionDataFlowFacts { public: - using ParamaterMappingTy = + using ParameterMappingTy = std::unordered_map>; FunctionDataFlowFacts() noexcept = default; @@ -67,10 +67,10 @@ class FunctionDataFlowFacts { return It->second; } - return getDefaultValue(); + return getDefaultValue(); } - llvm::StringMap Fdff; + llvm::StringMap Fdff; }; } // namespace psr::library_summary diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h index 19e03b49b..b51e26172 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h @@ -14,10 +14,12 @@ class LLVMFunctionDataFlowFacts; [[nodiscard]] LLVMFunctionDataFlowFacts readFromFDFF(const FunctionDataFlowFacts &Fdff, const LLVMProjectIRDB &Irdb); +/// @brief A class that stores and maps llvm::Function * to Parameter Mappin +/// Types class LLVMFunctionDataFlowFacts { public: LLVMFunctionDataFlowFacts() noexcept = default; - using ParamaterMappingTy = FunctionDataFlowFacts::ParamaterMappingTy; + using ParameterMappingTy = FunctionDataFlowFacts::ParameterMappingTy; /// insert a set of data flow facts void insertSet(const llvm::Function *Fun, uint32_t Index, @@ -56,19 +58,19 @@ class LLVMFunctionDataFlowFacts { return getFacts(Fun, Arg->getArgNo()); } - [[nodiscard]] const ParamaterMappingTy & + [[nodiscard]] const ParameterMappingTy & getFactsForFunction(const llvm::Function *Fun) { auto Iter = LLVMFdff.find(Fun); if (Iter != LLVMFdff.end()) { return Iter->second; } - return getDefaultValue(); + return getDefaultValue(); } friend LLVMFunctionDataFlowFacts readFromFDFF(const FunctionDataFlowFacts &Fdff, const LLVMProjectIRDB &Irdb); private: - std::unordered_map LLVMFdff; + std::unordered_map LLVMFdff; }; } // namespace psr::library_summary diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h index ad55a9840..2758da38a 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h @@ -28,10 +28,10 @@ class Value; namespace psr { -/** - * This class may be used to represent the special zero value for IFDS - * and IDE problems. The LLVMZeroValue is implemented as a singleton. - */ +/// +/// @brief This class may be used to represent the special zero value for IFDS +/// and IDE problems. The LLVMZeroValue is implemented as a singleton. +/// class LLVMZeroValue : public llvm::GlobalVariable { private: LLVMZeroValue(llvm::Module &Mod); diff --git a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h index 755bbc923..a2f74c536 100644 --- a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h +++ b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h @@ -25,6 +25,8 @@ class LLVMProjectIRDB; class LLVMBasedICFG; class LLVMBasedCFG; +/// A default analysis domain that can be either used outright, or be used as a +/// reference for custom analysis domains. struct LLVMAnalysisDomainDefault : public AnalysisDomain { using d_t = const llvm::Value *; using n_t = const llvm::Instruction *; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h index ff4ccbe5d..326c3b4f0 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h @@ -55,12 +55,14 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, /** * Creates points-to set(s) for all functions in the IRDB. If - * UseLazyEvaluation is true, computes points-to-sets for functions that do + * UseLazyEvaluation is true, computes points-to set(s) for functions that do * not use global variables on the fly */ explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, bool UseLazyEvaluation = true, AliasAnalysisType PATy = AliasAnalysisType::CFLAnders); - + /** + * Loads points-to set(s) from json file + */ explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedPTS); @@ -107,8 +109,8 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, * Shows a parts of an alias set. Good for debugging when one wants to peak * into a points to set. * - * @param ValueSetPair a pair on an Value* and the corresponding points to set - * @param Peak the amount of instrutions shown from the points to set + * @param ValueSetPair a pair on a Value* and the corresponding points-to set + * @param Peak the amount of instructions shown from the points-to set */ static void peakIntoAliasSet(const AliasSetMap::value_type &ValueSetPair, int Peak); @@ -116,8 +118,8 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, /** * Prints out the size distribution for all points to sets. * - * @param Peak the amount of instrutions shown from one of the biggest points - * to sets, use 0 show nothing. + * @param Peak the amount of instructions shown from one of the biggest + * points-to sets, use 0 to show nothing. */ void drawAliasSetsDistribution(int Peak = 10) const; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h index bf924e39c..13cdfbf6b 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h @@ -15,6 +15,9 @@ #include namespace psr { + +/// A data structure used for storing, serializing and deserializing +/// LLVMAliasSet data struct LLVMAliasSetData { std::vector> AliasSets; std::vector AnalyzedFunctions; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h index 1195da6c8..7b98b1d3c 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h @@ -28,6 +28,13 @@ class LLVMProjectIRDB; class LLVMBasedAliasAnalysis { public: + /// @brief A class that can perform an analysis, which generates alias + /// informatioan from a given intermediate representation data base. + /// @param IRDB The project intermediate representation data base, which the + /// analysis will generate the alias information from. + /// @param UseLazyEvaluation Set to true to use a faster but less precise + /// algorithm. + /// @param PATy Boolean to determine the needed analysis type. explicit LLVMBasedAliasAnalysis( LLVMProjectIRDB &IRDB, bool UseLazyEvaluation, AliasAnalysisType PATy = AliasAnalysisType::Basic); diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h index 1b5694c6d..e02f3091e 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h @@ -25,6 +25,10 @@ namespace psr { class LLVMProjectIRDB; +/// @brief A class that represents the type hierarchy of an intermediate +/// representation data base (IRDB) of a project. The algorithm uses Debug +/// Information (DI) to create the type hierarchy, therefore the given IRDB must +/// contain debug information for the hierarchy to be created. class DIBasedTypeHierarchy : public TypeHierarchy { public: @@ -39,7 +43,18 @@ class DIBasedTypeHierarchy static inline constexpr llvm::StringLiteral PureVirtualCallName = "__cxa_pure_virtual"; + /// @brief Creates a type hierarchy based on an intermediate representation + /// data base. + /// @param[in] IRDB The intermediate representation data base of which the + /// type hierarchy will be based upon. This MUST contain debug information for + /// the algorithm to work! explicit DIBasedTypeHierarchy(const LLVMProjectIRDB &IRDB); + /// @brief Loads an already computed type hierarchy. + /// @param[in] IRDB The intermediate representation data base of the type + /// hierarchy. + /// @param[in] SerializedData The already existing type hierarchy, given by + /// the appropiate class DIBasedTypeHierarchyData, which contains all + /// neccesary information. explicit DIBasedTypeHierarchy(const LLVMProjectIRDB *IRDB, const DIBasedTypeHierarchyData &SerializedData); ~DIBasedTypeHierarchy() override = default; diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h index 52cd85569..ff59cfe3a 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h @@ -18,6 +18,9 @@ #include namespace psr { +/// @brief A structure that is used to store already calculated type hierarchy +/// data, serialize that data or deserialize a json file with a previously +/// serialized type hierarchy. struct DIBasedTypeHierarchyData { // DITypes and llvm::Function * are serialized by serializing their names and // using the DebugInfoFinder to deserialize them diff --git a/include/phasar/PhasarLLVM/Utils/Annotation.h b/include/phasar/PhasarLLVM/Utils/Annotation.h index 31d9c42ba..23a1ee0dd 100644 --- a/include/phasar/PhasarLLVM/Utils/Annotation.h +++ b/include/phasar/PhasarLLVM/Utils/Annotation.h @@ -10,10 +10,8 @@ namespace psr { -//===----------------------------------------------------------------------===// -// Helper classes that allow for an easier retrieval of annotation information. -//===----------------------------------------------------------------------===// - +/// @brief Helper classes that allow for an easier retrieval of annotation +/// information. class VarAnnotation { public: VarAnnotation(const llvm::CallBase *AnnotationCall) noexcept; diff --git a/include/phasar/PhasarPass/PhasarPass.h b/include/phasar/PhasarPass/PhasarPass.h index d8272b98c..93a0eb4c6 100644 --- a/include/phasar/PhasarPass/PhasarPass.h +++ b/include/phasar/PhasarPass/PhasarPass.h @@ -20,6 +20,16 @@ class raw_ostream; namespace psr { +/// @brief PhasarPass is an implementation of llvm passes for the PhASAR +/// framework. +/// What is a pass? +/// "The LLVM pass framework is an important part of the LLVM system, +/// because LLVM passes are where most of the interesting parts of the compiler +/// exist. Passes perform the transformations and optimizations that make up the +/// compiler, they build the analysis results that are used by these +/// transformations, and they are, above all, a structuring technique for +/// compiler code." +/// Source: https://llvm.org/docs/WritingAnLLVMNewPMPass.html class PhasarPass : public llvm::ModulePass { public: static inline char ID = 12; diff --git a/include/phasar/PhasarPass/PhasarPrinterPass.h b/include/phasar/PhasarPass/PhasarPrinterPass.h index 76baa2f9a..d344ded13 100644 --- a/include/phasar/PhasarPass/PhasarPrinterPass.h +++ b/include/phasar/PhasarPass/PhasarPrinterPass.h @@ -19,6 +19,8 @@ class AnalysisUsage; namespace psr { +/// @brief A pass that can be used to print information while a PhASAR analysis +/// is running. Example usage would be for debugging. class PhasarPrinterPass : public llvm::ModulePass { public: static inline char ID = 12; // NOLINT FIXME: make const when LLVM supports it diff --git a/include/phasar/Pointer/AliasInfo.h b/include/phasar/Pointer/AliasInfo.h index db861acdc..dce8fe5b1 100644 --- a/include/phasar/Pointer/AliasInfo.h +++ b/include/phasar/Pointer/AliasInfo.h @@ -41,7 +41,7 @@ struct AliasInfoTraits> : DefaultAATraits {}; template struct AliasInfoTraits> : DefaultAATraits {}; -/// A type-erased reference to any object implementing the IsAliasInfo +/// \brief A type-erased reference to any object implementing the IsAliasInfo /// interface. Use this, if your analysis is not tied to a specific alias info /// implementation. /// @@ -260,8 +260,9 @@ class AliasInfoRef : public AnalysisPropertiesMixin> { const VTable *VT{}; }; -/// Similar to AliasInfoRef, but exclusively owns the held reference. Use this, -/// if you need to decide dynamically, which alias info implementation to use. +/// \brief Similar to AliasInfoRef, but exclusively owns the held reference. Use +/// this, if you need to decide dynamically, which alias info implementation to +/// use. /// /// Implicitly convertible to AliasInfoRef. /// diff --git a/include/phasar/TypeHierarchy/VFTable.h b/include/phasar/TypeHierarchy/VFTable.h index a236f2e34..fc36eb4f7 100644 --- a/include/phasar/TypeHierarchy/VFTable.h +++ b/include/phasar/TypeHierarchy/VFTable.h @@ -20,6 +20,8 @@ class raw_ostream; namespace psr { +/// @brief A class to store the data of a virtual function data from an +/// intermediate representation data base. template class VFTable { public: virtual ~VFTable() = default; diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index c7465cdd8..c2a4f3e17 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -366,7 +366,7 @@ void LLVMTaintConfig::forAllLeakCandidatesAtImpl( } } - // Do not iterate over the actual paramaters of Inst as we did in + // Do not iterate over the actual Parameters of Inst as we did in // forAllGeneratedValuesAt, because sink-values are not propagated in the // current taint analyses. Handling sink-values should be done in the // SinkCallBack From 4f7ea4305598b27ae1bd24993d5860a277c21123 Mon Sep 17 00:00:00 2001 From: mxHuber Date: Sun, 27 Apr 2025 23:02:24 +0200 Subject: [PATCH 05/17] More descriptions and typo fixes --- docs/README.dox | 2 +- include/phasar/ControlFlow/ICFGBase.h | 2 +- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 4 +- .../DataFlow/IfdsIde/SpecialSummaries.h | 10 ++--- .../phasar/DataFlow/Mono/InterMonoProblem.h | 8 ++++ .../phasar/DataFlow/Mono/IntraMonoProblem.h | 10 ++--- .../PathSensitivity/PathSensitivityManager.h | 3 ++ include/phasar/Domain/AnalysisDomain.h | 4 +- .../ControlFlow/LLVMBasedBackwardICFG.h | 5 ++- .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 5 ++- .../ControlFlow/SparseLLVMBasedCFG.h | 4 +- .../ControlFlow/SparseLLVMBasedICFG.h | 5 ++- .../ControlFlow/SparseLLVMBasedICFGView.h | 5 ++- .../Problems/IDEExtendedTaintAnalysis.h | 2 + .../Z3BasedPathSensitvityManager.h | 2 + include/phasar/PhasarLLVM/Utils/Annotation.h | 3 +- include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h | 7 ++++ .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 10 ++++- include/phasar/Utils/AnalysisPrinterBase.h | 2 + include/phasar/Utils/DebugOutput.h | 3 ++ include/phasar/Utils/DefaultAnalysisPrinter.h | 1 + include/phasar/Utils/EquivalenceClassMap.h | 42 +++++++++---------- include/phasar/Utils/ErrorHandling.h | 4 ++ include/phasar/Utils/IO.h | 3 ++ .../phasar/Utils/OnTheFlyAnalysisPrinter.h | 2 + .../external/llvm/CFLAliasAnalysisUtils.h | 8 +--- 26 files changed, 105 insertions(+), 51 deletions(-) diff --git a/docs/README.dox b/docs/README.dox index fe4955521..6c16e7f5e 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -16,7 +16,7 @@ Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. - others \b License \n - See LICENSE.txt + See LICENSE.txt @subsubsection RVOTCPPS Required Version of the C++ Standard diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index 4ef63126d..251ac3082 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -37,7 +37,7 @@ template class ICFGBase { } /// returns the function definition or declaration with the given name. If - /// ther eis no such function, returns a default constructed f_t (nullptr for + /// there is no such function, returns a default constructed f_t (nullptr for /// pointers). [[nodiscard]] f_t getFunction(llvm::StringRef Fun) const { return self().getFunctionImpl(Fun); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index b85b25495..c3ce3f9c7 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -27,8 +27,8 @@ namespace psr { -/// Solves the given IFDSTabulationProblem as described in the 1996 paper by -/// Sagiv, Horwitz and Reps. To solve the problem, call solve(). Results +/// Solves the given IFDSTabulationProblem as described in the 1995 paper by +/// Reps, Horwitz and Sagiv. To solve the problem, call solve(). Results /// can then be queried by using resultAt() and resultsAt(). template > diff --git a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h index 41cb4d58e..96675a4d3 100644 --- a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h +++ b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h @@ -45,9 +45,9 @@ class [[deprecated("This ancient API is not maintained and should not be used " std::map> SpecialEdgeFunctions; std::vector SpecialFunctionNames; - // Constructs the SpecialSummaryMap such that it contains all glibc, - // llvm.intrinsics and C++'s new, new[], delete, delete[] with identity - // flow functions. + /// Constructs the SpecialSummaryMap such that it contains all glibc, + /// llvm.intrinsics and C++'s new, new[], delete, delete[] with identity + /// flow functions. SpecialSummaries() { // insert default flow and edge functions for (const auto &FunctionName : @@ -71,7 +71,7 @@ class [[deprecated("This ancient API is not maintained and should not be used " return Instance; } - // Returns true, when an existing function is overwritten, false otherwise. + /// Returns true, when an existing function is overwritten, false otherwise. bool provideSpecialSummary(const std::string &Name, FlowFunctionPtrType FlowFunc) { bool Override = containsSpecialSummary(Name); @@ -79,7 +79,7 @@ class [[deprecated("This ancient API is not maintained and should not be used " return Override; } - // Returns true, when an existing function is overwritten, false otherwise. + /// Returns true, when an existing function is overwritten, false otherwise. bool provideSpecialSummary(const std::string &Name, FlowFunctionPtrType FlowFunc, std::shared_ptr> EdgeFunc) { diff --git a/include/phasar/DataFlow/Mono/InterMonoProblem.h b/include/phasar/DataFlow/Mono/InterMonoProblem.h index 9ee1b3298..6ad3bcb32 100644 --- a/include/phasar/DataFlow/Mono/InterMonoProblem.h +++ b/include/phasar/DataFlow/Mono/InterMonoProblem.h @@ -47,6 +47,14 @@ class InterMonoProblem : public IntraMonoProblem { const i_t *ICF; public: + /// An interprocedural monotone problem generated from an intermediate + /// representation, a type hierarchy of said representation, a control flow + /// graph, points-to information and optionally a vector of entry points. + /// @param[in] IRDB A project intermediate representation data base. + /// @param[in] TH A type hierarchy based on the given IRDB. + /// @param[in] CF A control flow graph based on the given IRDB. + /// @param[in] PT Points-to information based on the given IRDB. + /// @param[in] EntryPoints A vector of entry points. Empty by default. InterMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const i_t *ICF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/IntraMonoProblem.h b/include/phasar/DataFlow/Mono/IntraMonoProblem.h index 2f7c77b86..8fd23b850 100644 --- a/include/phasar/DataFlow/Mono/IntraMonoProblem.h +++ b/include/phasar/DataFlow/Mono/IntraMonoProblem.h @@ -65,11 +65,11 @@ template class IntraMonoProblem { /// An intraprocedural monotone problem generated from an intermediate /// representation, a type hierarchy of said representation, a control flow /// graph, points-to information and optionally a vector of entry points. - /// @param IRDB A project intermediate representation data base. - /// @param TH A type hierarchy based on the given IRDB. - /// @param CF A control flow graph based on the given IRDB. - /// @param PT Points-to information based on the given IRDB. - /// @param EntryPoints A vector of entry points. Empty by default. + /// @param[in] IRDB A project intermediate representation data base. + /// @param[in] TH A type hierarchy based on the given IRDB. + /// @param[in] CF A control flow graph based on the given IRDB. + /// @param[in] PT Points-to information based on the given IRDB. + /// @param[in] EntryPoints A vector of entry points. Empty by default. IntraMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const CFGBase *CF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h index d781d127f..b1ce4e2cd 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h @@ -28,6 +28,9 @@ namespace psr { +/// The PathSensitivityManager class is an implementation of the +/// PathSensitivityManagerBase class and the PathSensitivityManagerMixin class. +/// It holds an exploded super graph. template class PathSensitivityManager : public PathSensitivityManagerBase, diff --git a/include/phasar/Domain/AnalysisDomain.h b/include/phasar/Domain/AnalysisDomain.h index e0ce27c64..b467506fd 100644 --- a/include/phasar/Domain/AnalysisDomain.h +++ b/include/phasar/Domain/AnalysisDomain.h @@ -30,8 +30,8 @@ namespace psr { /// conduct an analysis but not correctly set, it will statically report an /// error and ask for the missing piece of information. struct AnalysisDomain { - // Data-flow fact --- Specifies the type of an individual data-flow fact that - // is propagated through the program under analysis. + /// Data-flow fact --- Specifies the type of an individual data-flow fact that + /// is propagated through the program under analysis. using d_t = void; // (Control-flow) Node --- Specifies the type of a node in the // (inter-procedural) control-flow graph and can be though of as an individual diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index 23e1afe52..cdb76cb0d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -25,7 +25,10 @@ template class CallGraph; template <> struct CFGTraits : CFGTraits {}; -/// A class that represents a backwards interprocedural control flow graph. +/// A class that represents a backwards interprocedural control flow graph. It +/// uses the LLVMBasedBackwardICFG class itself as a template argument for +/// the ICFGBase template argument. This is called a curiously recurring +/// template pattern. class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index 96b04c07c..20d8d8950 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -44,7 +44,10 @@ class Resolver; class LLVMBasedICFG; template <> struct CFGTraits : CFGTraits {}; -/// A class that implements an interprocedural control flow graph. +/// A class that implements an interprocedural control flow graph. It +/// uses the LLVMBasedICFG class itself as a template argument for the +/// ICFGBase template argument. This is called a curiously recurring template +/// pattern. class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index f97f8214f..3ef05b893 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -23,7 +23,9 @@ template <> struct CFGTraits : CFGTraits { using v_t = const llvm::Value *; }; -/// A class that represents a sparse control flow graph. +/// A class that represents a sparse control flow graph. It uses the +/// SparseLLVMBasedCFG class itself as a template argument for the SparseCFGBase +/// template argument. This is called a curiously recurring template pattern. class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { friend struct SVFGCache; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 901465935..5b46fc6b2 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -20,7 +20,10 @@ class SparseLLVMBasedCFG; class DIBasedTypeHierarchy; struct SVFGCache; -/// A class that represents a sparse interprocedural control flow graph. +/// A class that represents a sparse interprocedural control flow graph. It uses +/// the SparseLLVMBasedICFG class itself as a template argument for the +/// SparseLLVMBasedCFGProvider template argument. This is called a curiously +/// recurring template pattern. class SparseLLVMBasedICFG : public LLVMBasedICFG, public SparseLLVMBasedCFGProvider { diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index f99612ea8..830db44ed 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -31,7 +31,10 @@ struct CFGTraits : CFGTraits {}; /// Similar to SparseLLVMBasedICFG; the only difference is that this one *is* no /// LLVMBasedICFG -- it contains a pointer to an already existing one. -/// It still owns the sparse value-flow graphs +/// It still owns the sparse value-flow graphs. +/// It also uses the SparseLLVMBasedICFGView class itself as a template argument +/// for the SparseLLVMBasedCFGProvider template argument. This is called a +/// curiously recurring template pattern. class SparseLLVMBasedICFGView : public LLVMBasedCFG, public ICFGBase, diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index 34012ec33..f437bbf2c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -51,6 +51,8 @@ struct IDEExtendedTaintAnalysisDomain : public LLVMAnalysisDomainDefault { }; namespace XTaint { +/// An extended taint analysis is a field sensitive taint analysis, that uses K +/// limit field access path. class IDEExtendedTaintAnalysis : public IDETabulationProblem, public AnalysisBase { diff --git a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h index 988a977c4..d3979aa1e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h +++ b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h @@ -40,6 +40,8 @@ class Instruction; namespace psr { class LLVMPathConstraints; +/// A base class that can be used to implement a path solver based on the Z3 SMT +/// Solver. class Z3BasedPathSensitivityManagerBase : public PathSensitivityManagerBase { public: diff --git a/include/phasar/PhasarLLVM/Utils/Annotation.h b/include/phasar/PhasarLLVM/Utils/Annotation.h index 23a1ee0dd..bf5527e7c 100644 --- a/include/phasar/PhasarLLVM/Utils/Annotation.h +++ b/include/phasar/PhasarLLVM/Utils/Annotation.h @@ -10,8 +10,9 @@ namespace psr { -/// @brief Helper classes that allow for an easier retrieval of annotation +/// \file Helper classes that allow for an easier retrieval of annotation /// information. + class VarAnnotation { public: VarAnnotation(const llvm::CallBase *AnnotationCall) noexcept; diff --git a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h index f9d3d91d7..4394fe1bb 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h @@ -39,6 +39,10 @@ class DILocation; namespace psr { +/// \file This file contains useful structs and functions to get and store +/// information about the source code or the intermediate representation of a +/// project. + struct DebugLocation { unsigned Line{}; unsigned Column{}; @@ -47,6 +51,9 @@ struct DebugLocation { [[nodiscard]] llvm::DILocalVariable *getDILocalVariable(const llvm::Value *V); +/// A struct that contains information about a source code line, what the +/// corresponding function name of that line is and what the file name is that +/// the line is in. struct SourceCodeInfo { std::string SourceCodeLine; std::string SourceCodeFilename; diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index b0d8bccf9..809265ecb 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -53,11 +53,17 @@ bool isIntegerLikeType(const llvm::Type *T) noexcept; bool isAllocaInstOrHeapAllocaFunction(const llvm::Value *V) noexcept; bool isHeapAllocatingFunction(const llvm::Function *F) noexcept; -// TODO add description +/// Returns true if the provided function and the function type are both not +/// null and have the same number of parameters and the same return type. If the +/// argument ExactMatch is set to true, which it is by default, the two provided +/// arguments must also have the same type for each argument, for the function +/// to return true. bool matchesSignature(const llvm::Function *F, const llvm::FunctionType *FType, bool ExactMatch = true); -// TODO add description +// Returns true iff the provided functions are both not null and have the same +// number of paramters, the same return type and each parameter of both +// functions has the same type aswell. bool matchesSignature(const llvm::FunctionType *FType1, const llvm::FunctionType *FType2); diff --git a/include/phasar/Utils/AnalysisPrinterBase.h b/include/phasar/Utils/AnalysisPrinterBase.h index a1ca6aee3..e99bad914 100644 --- a/include/phasar/Utils/AnalysisPrinterBase.h +++ b/include/phasar/Utils/AnalysisPrinterBase.h @@ -10,6 +10,8 @@ namespace psr { +/// A class that serves as the basis for a custom analysis printer +/// implementation. template class AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; using d_t = typename AnalysisDomainTy::d_t; diff --git a/include/phasar/Utils/DebugOutput.h b/include/phasar/Utils/DebugOutput.h index 03f6098b3..bfd282183 100644 --- a/include/phasar/Utils/DebugOutput.h +++ b/include/phasar/Utils/DebugOutput.h @@ -23,6 +23,9 @@ namespace psr { namespace detail { +/// \file This file contains many useful ways of printing information for +/// debugging purposes. + template void printHelper(OS_t &OS, const T &Data); template diff --git a/include/phasar/Utils/DefaultAnalysisPrinter.h b/include/phasar/Utils/DefaultAnalysisPrinter.h index f9008b946..5db54f46b 100644 --- a/include/phasar/Utils/DefaultAnalysisPrinter.h +++ b/include/phasar/Utils/DefaultAnalysisPrinter.h @@ -30,6 +30,7 @@ template struct Warning { LatticeElement(std::move(Lattice)), AnalysisType(DfAnalysisType) {} }; +/// A default implementation of the AnalysisPrinterBase class. template class DefaultAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; diff --git a/include/phasar/Utils/EquivalenceClassMap.h b/include/phasar/Utils/EquivalenceClassMap.h index a11d3fe4e..e006686d0 100644 --- a/include/phasar/Utils/EquivalenceClassMap.h +++ b/include/phasar/Utils/EquivalenceClassMap.h @@ -23,11 +23,11 @@ namespace psr { -// EquivalenceClassMap is a special map type that splits the keys into -// equivalence classes regarding their mapped values. Meaning, that all keys -// that are equivalent are mapped to the same value. Two keys are treated as -// equivalent and merged into a equivalence class when they refer to Values -// that compare equal. +/// EquivalenceClassMap is a special map type that splits the keys into +/// equivalence classes regarding their mapped values. Meaning, that all keys +/// that are equivalent are mapped to the same value. Two keys are treated as +/// equivalent and merged into a equivalence class when they refer to Values +/// that compare equal. template struct EquivalenceClassMap { template using SetType = std::set; using EquivalenceClassBucketT = std::pair, ValueT>; @@ -69,41 +69,41 @@ template struct EquivalenceClassMap { return llvm::make_range(begin(), end()); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type insert(const KeyT &Key, ValueType &&Value) { return try_emplace(Key, std::forward(Value)); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type insert(KeyT &&Key, ValueType &&Value) { return try_emplace(std::move(Key), std::forward(Value)); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. insert_return_type insert(const std::pair &KVPair) { return try_emplace(KVPair.first, KVPair.second); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. insert_return_type insert(std::pair &&KVPair) { return try_emplace(std::move(KVPair.first), std::move(KVPair.second)); } - // Insert a range of Key Values pairs into the map. + /// Insert a range of Key Values pairs into the map. template void insert(InputIt I, InputIt End) { for (; I != End; ++I) { try_emplace(I->first, I->second); } } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type try_emplace(KeyT &&Key, Ts &&...Args) { ValueT Val{std::forward(Args...)}; @@ -118,8 +118,8 @@ template struct EquivalenceClassMap { return std::make_pair(StoredData.back().first.begin(), true); } - // Inserts Key into the corresponding equivalence class for Value. If Value - // is not already in the map a new equivalence class is created. + /// Inserts Key into the corresponding equivalence class for Value. If Value + /// is not already in the map a new equivalence class is created. template insert_return_type try_emplace(const KeyT &Key, Ts &&...Args) { ValueT Val{std::forward(Args...)}; @@ -134,7 +134,7 @@ template struct EquivalenceClassMap { return std::make_pair(StoredData.back().first.begin(), true); } - // Return 1 if the specified key is in the map, 0 otherwise. + /// Return 1 if the specified key is in the map, 0 otherwise. [[nodiscard]] inline size_type count(const KeyT &Key) const { for (auto &KVPair : StoredData) { if (KVPair.first.count(Key) >= 1) { @@ -148,7 +148,7 @@ template struct EquivalenceClassMap { return StoredData.size(); } - // Returns the size of the map, i.e., the number of equivalence classes. + /// Returns the size of the map, i.e., the number of equivalence classes. [[nodiscard]] inline size_type size() const { return numEquivalenceClasses(); } @@ -273,7 +273,7 @@ class EquivalenceClassMapNG { return Values.size(); } - // Returns the size of the map, i.e., the number of equivalence classes. + /// Returns the size of the map, i.e., the number of equivalence classes. [[nodiscard]] inline size_t size() const noexcept { return numEquivalenceClasses(); } diff --git a/include/phasar/Utils/ErrorHandling.h b/include/phasar/Utils/ErrorHandling.h index 3e5920af1..93dfacf2c 100644 --- a/include/phasar/Utils/ErrorHandling.h +++ b/include/phasar/Utils/ErrorHandling.h @@ -18,6 +18,10 @@ #include namespace psr { + +/// \file This file contains useful functions for handling errors, by using +/// std::system_error, or returning null or a default value. + template T getOrThrow(llvm::ErrorOr ValOrErr) { if (ValOrErr) { return std::move(*ValOrErr); diff --git a/include/phasar/Utils/IO.h b/include/phasar/Utils/IO.h index f9966df63..8aea9333c 100644 --- a/include/phasar/Utils/IO.h +++ b/include/phasar/Utils/IO.h @@ -28,6 +28,9 @@ namespace psr { +/// \file This file contains functions for reading in text files and json files +/// and provides error handling for this process aswell, if needed. + [[nodiscard]] llvm::ErrorOr readTextFileOrErr(const llvm::Twine &Path); [[nodiscard]] llvm::ErrorOr> diff --git a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h index aa3e4ef23..0437f4951 100644 --- a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h +++ b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h @@ -13,6 +13,8 @@ #include namespace psr { +/// This class implements an AnalysisPrinterBase, which opens a file stream on +/// the fly to print analysis data. template class OnTheFlyAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; diff --git a/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h b/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h index 2eae2824b..4608a5d86 100644 --- a/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h +++ b/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h @@ -4,12 +4,8 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// -// \file -// These are the utilities/helpers used by the CFL Alias Analyses available in -// tree, i.e. Steensgaard's and Andersens'. -// -//===----------------------------------------------------------------------===// +/// \file These are the utilities/helpers used by the CFL Alias Analyses +/// available in tree, i.e. Steensgaard's and Andersens'. #ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H #define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H From acbfaf2627768cf06e1106a3756af2dc4a9170bc Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 09:17:08 +0200 Subject: [PATCH 06/17] Revert change in LLVM code --- .../Pointer/external/llvm/CFLAliasAnalysisUtils.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h b/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h index 4608a5d86..2eae2824b 100644 --- a/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h +++ b/lib/PhasarLLVM/Pointer/external/llvm/CFLAliasAnalysisUtils.h @@ -4,8 +4,12 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -/// \file These are the utilities/helpers used by the CFL Alias Analyses -/// available in tree, i.e. Steensgaard's and Andersens'. +//===----------------------------------------------------------------------===// +// \file +// These are the utilities/helpers used by the CFL Alias Analyses available in +// tree, i.e. Steensgaard's and Andersens'. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H #define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H From fe4be0efdc053e31b07475b7495695258af91571 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 11:36:23 +0200 Subject: [PATCH 07/17] Fix existing docs --- docs/Doxyfile.in | 8 +- docs/README.dox | 232 +----------------- include/phasar/ControlFlow/CallGraphData.h | 2 +- .../phasar/DataFlow/IfdsIde/EdgeFunction.h | 8 +- .../phasar/DataFlow/IfdsIde/EdgeFunctions.h | 32 +-- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 6 +- .../DataFlow/IfdsIde/IDETabulationProblem.h | 25 +- .../DataFlow/IfdsIde/IFDSIDESolverConfig.h | 18 ++ .../DataFlow/IfdsIde/Solver/Compressor.h | 12 +- .../IfdsIde/Solver/FlowFunctionCache.h | 2 +- .../DataFlow/IfdsIde/Solver/IFDSSolver.h | 11 +- .../IfdsIde/Solver/IterativeIDESolver.h | 2 +- .../DataFlow/Mono/Contexts/CallStringCTX.h | 7 +- .../phasar/DataFlow/Mono/InterMonoProblem.h | 7 +- .../phasar/DataFlow/Mono/IntraMonoProblem.h | 9 +- .../DataFlow/Mono/Solver/InterMonoSolver.h | 8 +- .../DataFlow/Mono/Solver/IntraMonoSolver.h | 6 +- .../PathSensitivity/PathSensitivityManager.h | 5 +- .../PathSensitivityManagerMixin.h | 20 ++ include/phasar/Domain/AnalysisDomain.h | 5 +- .../ControlFlow/GlobalCtorsDtorsModel.h | 21 +- .../ControlFlow/LLVMBasedBackwardCFG.h | 3 +- .../ControlFlow/LLVMBasedBackwardICFG.h | 6 +- .../PhasarLLVM/ControlFlow/LLVMBasedCFG.h | 3 +- .../PhasarLLVM/ControlFlow/LLVMBasedICFG.h | 6 +- .../ControlFlow/LLVMVFTableProvider.h | 8 +- .../ControlFlow/Resolver/CHAResolver.h | 9 +- .../ControlFlow/Resolver/NOResolver.h | 7 +- .../ControlFlow/Resolver/OTFResolver.h | 10 +- .../ControlFlow/Resolver/RTAResolver.h | 9 +- .../ControlFlow/Resolver/Resolver.h | 3 +- .../ControlFlow/SparseLLVMBasedCFG.h | 5 +- .../ControlFlow/SparseLLVMBasedICFG.h | 23 +- .../ControlFlow/SparseLLVMBasedICFGView.h | 14 +- .../DataFlow/IfdsIde/FunctionDataFlowFacts.h | 1 + .../IfdsIde/LLVMFunctionDataFlowFacts.h | 3 +- .../DataFlow/IfdsIde/LLVMZeroValue.h | 5 +- .../Problems/IDEExtendedTaintAnalysis.h | 4 +- .../Z3BasedPathSensitvityManager.h | 16 +- .../PhasarLLVM/Domain/LLVMAnalysisDomain.h | 6 +- .../phasar/PhasarLLVM/Pointer/LLVMAliasSet.h | 37 ++- .../PhasarLLVM/Pointer/LLVMAliasSetData.h | 4 +- .../Pointer/LLVMBasedAliasAnalysis.h | 11 +- .../TypeHierarchy/DIBasedTypeHierarchy.h | 35 ++- .../TypeHierarchy/DIBasedTypeHierarchyData.h | 2 +- include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h | 3 +- .../phasar/PhasarLLVM/Utils/LLVMShorthands.h | 16 +- include/phasar/PhasarPass/PhasarPass.h | 3 +- include/phasar/PhasarPass/PhasarPrinterPass.h | 2 - include/phasar/TypeHierarchy/VFTable.h | 3 +- include/phasar/Utils/AnalysisPrinterBase.h | 4 +- include/phasar/Utils/DefaultAnalysisPrinter.h | 3 +- include/phasar/Utils/EquivalenceClassMap.h | 11 +- .../phasar/Utils/OnTheFlyAnalysisPrinter.h | 6 +- include/phasar/Utils/Utilities.h | 8 +- .../TaintConfig/LLVMTaintConfig.cpp | 2 +- 56 files changed, 290 insertions(+), 447 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index b45f14eeb..f3eaa58cc 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -38,13 +38,13 @@ PROJECT_NAME = "PhASAR" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = @PHASAR_VERSION@ +PROJECT_NUMBER = @PHASAR_VERSION@@development # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. -PROJECT_BRIEF = "PhASAR a LLVM-based Static Analysis Framework" +PROJECT_BRIEF = "A LLVM-based Static Analysis Framework" # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels @@ -794,7 +794,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = */external/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -805,7 +805,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = +EXCLUDE_SYMBOLS = *::detail::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include diff --git a/docs/README.dox b/docs/README.dox index 6c16e7f5e..6ae3b786c 100644 --- a/docs/README.dox +++ b/docs/README.dox @@ -2,233 +2,27 @@ @mainpage PhASAR: A LLVM-based Static Analysis Framework +PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. + +This page contains the generated code documentation of PhASAR. +You can find the original source on GitHub: . The README should already give you a good first overview. + +For further information, please checkout PhASAR's [Wiki](https://github.com/secure-software-engineering/phasar/wiki). + @subsubsection SSEG Secure Software Engineering Group -PhASAR is primarily developed and maintained by the Secure Software Engineering -Group at Heinz Nixdorf Institute (University of Paderborn) and Fraunhofer IEM. +PhASAR is primarily developed and maintained by the [Secure Software Engineering Group](https://www.hni.uni-paderborn.de/sse) at Heinz Nixdorf Institute (University of Paderborn) and [Fraunhofer IEM](https://www.iem.fraunhofer.de/). + +PhASAR was initially developed by Philipp Dominik Schubert (@pdschubert)(). -@authors -- Philipp Dominik Schubert (@pdschubert)(philipp.schubert@upb.de) +\b Currently, PhASAR is maintained by - Fabian Schiebel (@fabianbs96)(fabian.schiebel@iem.fraunhofer.de) - Sriteja Kummita (@sritejakv) - Lucas Briese (@jusito) - Martin Mory (@MMory)(martin.mory@upb.de) -- others +- *others* \b License \n - See LICENSE.txt - -@subsubsection RVOTCPPS Required Version of the C++ Standard - -PhASAR requires at least C++-17. - -However, building in C++20 mode is supported. You may enable this setting the cmake variable CMAKE_CXX_STANDARD to 20. Although phasar currently does not make use of C++-20 features (except for some concepts behind an #ifdef border), your client application that just uses phasar as a library may want to use C++20 earlier. - -@subsubsection CSVOLLVM Currently Supported Version of LLVM - -PhASAR is currently set up to support LLVM-15.0.* - -@subsubsection WIPHASAR What is PhASAR? - -PhASAR is a LLVM-based static analysis framework written in C++. It allows users to specify arbitrary data-flow problems which are then solved in a fully-automated manner on the specified LLVM IR target code. Computing points-to information, call-graph(s), etc. is done by the framework, thus you can focus on what matters. - -@subsubsection BRCH Breaking Changes - -To keep PhASAR in a state that it is well suited for state-of-the-art research in static analysis, as well as for productive use, we have to make breaking changes. Please refer to BreakingChanges.md for detailed information on what was broken recently and how to migrate. - -@subsubsection HDIGSWPHASAR How do I get started with PhASAR? - -We have some documentation on PhASAR in our Wiki. You probably would like to read this README first. - -Please also have a look on PhASAR's project directory and notice the project directory examples/ as well as the custom tool tools/example-tool/myphasartool.cpp. - -@subsubsection BPHASAR Building PhASAR - -It is recommended to compile PhASAR yourself in order to get the full C++ experience and to have full control over the build mode. However, you may also want to try out one of the pre-built versions of PhASAR or the Docker container. - -As a shortcut for the very first PhASAR build on your system, you can use our bootstrap script. Please note that you must have python installed for the script to work properly. - -@code{.sh} -./bootstrap.sh -@endcode - -Note: If you want to do changes within PhASAR, it is recommended to build it in Debug mode: - -@code{.sh} -./bootstrap.sh -DCMAKE_BUILD_TYPE=Debug -@endcode - -The bootstrap script may ask for superuser permissions (to install the dependencies); however it is not recommended to start the whole script with sudo. - -For subsequent builds, see Compiling PhASAR. - -@subsubsection CPHASAR Compiling PhASAR (if not already done using the bootstrap script) - -Set the system's variables for the C and C++ compiler to clang: - -@code{.sh} -export CC=/usr/local/bin/clang -export CXX=/usr/local/bin/clang++ -@endcode - -You may need to adjust the paths according to your system. When you cloned PhASAR from GitHub you need to initialize PhASAR's submodules before building it: - -@code{.sh} -git submodule update --init -@endcode - -If you downloaded PhASAR as a compressed release (e.g. .zip or .tar.gz) you can use the init-submodules-release.sh script that manually clones the required submodules: - -@code{.sh} -utils/init-submodules-release.sh -@endcode - -Navigate into the PhASAR directory. The following commands will do the job and compile the PhASAR framework: - -@code{.sh} -mkdir build -cd build/ -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. -ninja -j $(nproc) # or use a different number of cores to compile it -sudo ninja install # only if you wish to install PhASAR system wide -@endcode - -When you have used the bootstrap.sh script to install PhASAR, the above steps are already done. Use them as a reference if you wish to modify PhASAR and recompile it. - -After compilation using cmake the following two binaries can be found in the build/tools directory: - -- phasar-cli - the PhASAR command-line tool (previously called phasar-llvm) that provides access to analyses that are already implemented within PhASAR. Use this if you don't want to build an own tool on top of PhASAR. - -- myphasartool - an example tool that shows how tools can be build on top of PhASAR - -Please be careful and check if errors occur during the compilation. - -When using CMake to compile PhASAR the following optional parameters can be used: - - -
Parameter : Type Effect -
BUILD_SHARED_LIBS : BOOL Build shared libraries -- Not recommended anymore. You may want to use PHASAR_BUILD_DYNLIB instead (default is OFF) -
PHASAR_BUILD_DYNLIB : BOOL Build one fat shared library (default is OFF) -
CMAKE_BUILD_TYPE : STRING Build PhASAR in 'Debug', 'RelWithDebInfo' or 'Release' mode (default is 'Debug') -
CMAKE_INSTALL_PREFIX : PATH Path where PhASAR will be installed if "ninja install" is invoked or the “install” target is built (default is /usr/local/phasar) -
PHASAR_CUSTOM_CONFIG_INSTALL_DIR : PATH If set, customizes the directory, where configuration files for PhASAR are installed (default is /usr/local/.phasar-config) -
PHASAR_ENABLE_DYNAMIC_LOG : BOOL Makes it possible to switch the logger on and off at runtime (default is ON) -
PHASAR_BUILD_DOC : BOOL Build PhASAR documentation (default is OFF) -
PHASAR_BUILD_UNITTESTS : BOOL Build PhASAR unit tests (default is ON) -
PHASAR_BUILD_IR : BOOL Build PhASAR IR (required for running the unit tests) (default is ON) -
PHASAR_BUILD_OPENSSL_TS_UNITTESTS : BOOL Build PhASAR unit tests that require OpenSSL (default is OFF) -
PHASAR_ENABLE_PAMM : STRING Enable the performance measurement mechanism ('Off', 'Core' or 'Full', default is Off) -
PHASAR_ENABLE_PIC : BOOL Build Position-Independed Code (default is ON) -
PHASAR_ENABLE_WARNINGS : BOOL Enable compiler warnings (default is ON) -
CMAKE_CXX_STANDARD : INT Build phasar in C++17 or C++20 mode (default is 17) -
- -You can use these parameters either directly or modify the installer-script bootstrap.sh - -@subsubsection AROCT A Remark on Compile Time - -C++'s long compile times are always a pain. As shown in the above, when using cmake the compilation can easily be run in parallel, resulting in shorter compilation times. Make use of it! - -@subsubsection RUNNINGATS Running a Test Solver - -To test if everything works as expected please run the following command: - -@code{.sh} -$ phasar-cli -m test/llvm_test_code/basic/module_cpp.ll -D ifds-solvertest -@endcode - -You can find the phasar-cli tool in the build-tree under tools/phasar-cli. - -If you obtain output other than a segmentation fault or an exception terminating the program abnormally everything works as expected. - -@subsubsection BPHASAROAMS Building PhASAR on a MacOS System - -Due to unfortunate updates to MacOS and the handling of C++, especially on the newer M1 processors, we can't support native development on Mac. The easiest solution to develop PhASAR on a Mac right now is to use dockers development environments. Clone this repository as described in their documentation. Afterwards, you have to login once manually, as a root user by running docker exec -it -u root /bin/bash to complete the rest of the build process as described in this readme (install submodules, run bootstrap.sh, ...). Now you can just attach your docker container to VS Code or any other IDE, which supports remote development. - -@subsubsection INST Installation - -PhASAR can be installed using the installer scripts as explained in the following. However, you do not need to install PhASAR in order to use it. - -Installing PhASAR on an Ubuntu System - -In the following, we would like to give an complete example of how to install PhASAR using an Ubuntu or Unix-like system. - -Therefore, we provide an installation script. To install PhASAR, just navigate to the top-level directory of PhASAR and use the following command: - -@code{.sh} -./bootstrap.sh --install -@endcode - -The bootstrap script may ask for superuser permissions. - -Done! - -If You have already built phasar, you can just invoke - -@code{.sh} -sudo ninja install -@endcode - -@subsubsection HTUPHASAR How to use PhASAR? - -We recomment using phasar as a library with cmake or conan. - -If you already have installed phasar, Use-PhASAR-as-a-library may be a good start. - -Otherwise, we recommend adding PhASAR as a git submodule to your repository. In this case, just add_subdirectory the phasar submodule directory within your CMakeLists.txt. - -Assuming you have checked out phasar in external/phasar, the phasar-related cmake commands may look like this: - -@code{.sh} - -add_subdirectory(external/phasar EXCLUDE_FROM_ALL) # Build phasar with your tool - -... - -target_link_libraries(yourphasartool - ... - phasar # Make your tool link against phasar -) - -@endcode - -Depending on your use of PhASAR you also may need to add LLVM to your build. - -For more information please consult our PhASAR wiki pages. - -@subsubsection HTUWCONAN How to use with Conan v2 ? - -To export the recipe and dependencies execute from repo root: - -- @code{.sh} conan export utils/conan/llvm-core/ --version 15.0.7 --user secure-software-engineering @endcode -- @code{.sh} conan export utils/conan/clang/ --version 15.0.7 --user secure-software-engineering @endcode -- @code{.sh} conan export . @endcode -- View exported conan list "phasar/*" -- Consume the package - -If you just want to use phasar-cli: - -- @code{.sh} conan install --tool-requires phasar/... --build=missing -of . @endcode -- @code{.sh} source conanbuild.sh @endcode -- @code{.sh} phasar-cli --help @endcode - -@subsubsection PHUTIPHASAR Please help us to improve PhASAR - -You are using PhASAR and would like to help us in the future? Then please support us by filling out this web form. - -By giving us feedback you help to decide in what direction PhASAR should stride in the future and give us clues about our user base. Thank you very much! - -@subsubsection IPHASARGPCH Installing PhASAR's Git pre-commit hook - -You are very much welcome to contribute to the PhASAR project. Please make sure that you install our pre-commit hook that ensures your commit adheres to the most important coding rules of the PhASAR project. For more details please consult Coding Conventions and Contributing to PhASAR. - -To install the pre-commit hook, please run the following commands in PhASAR's root directory: - -@code{.sh} -- pip install pre-commit -- pre-commit install -@endcode - -Thanks. And have fun with the project. + PhASAR is made available under the permissive MIT License. See LICENSE.txt */ diff --git a/include/phasar/ControlFlow/CallGraphData.h b/include/phasar/ControlFlow/CallGraphData.h index 332185ddb..58393936a 100644 --- a/include/phasar/ControlFlow/CallGraphData.h +++ b/include/phasar/ControlFlow/CallGraphData.h @@ -20,7 +20,7 @@ namespace psr { /// A data structure used for storing, serializing and deserializing call-graph -/// data. +/// information. struct CallGraphData { /// Mangled FunName --> [CS-IDs] diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h index 4a74d9e3f..6a94c0f24 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h @@ -118,8 +118,8 @@ class EdgeFunctionBase { : AllocationPolicy::CustomHeapAllocated; }; -/// Non-null reference to an edge function that is guarenteed to be managed by -/// an EdgeFunction object. +/// \brief Non-null reference to an edge function that is guarenteed to be +/// managed by an EdgeFunction object. template class [[clang::trivial_abi]] EdgeFunctionRef final : EdgeFunctionBase { template friend class EdgeFunction; @@ -164,8 +164,8 @@ class [[clang::trivial_abi]] EdgeFunctionRef final : EdgeFunctionBase { IsCached{}; }; -/// Ref-counted and type-erased edge function with small-object optimization. -/// Supports caching. +/// \brief Ref-counted and type-erased edge function with small-object +/// optimization. Supports caching. template // -- combined copy and move assignment // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h index 5a57cdfe9..e6b4cd2e1 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctions.h @@ -61,12 +61,12 @@ template class EdgeFunctions { /// /// Consider the following flow function implementation (cf. /// FlowFunctions::getNormalFlowfunction()): - /// + /// \code /// f(0) -> {0} // pass the lambda (or zero fact) as identity /// f(o) -> {o, x} // generate a new fact x from o /// f(.) -> {.} // pass all other facts that hold before - /// instruction_1 - /// // as identity + /// // instruction_1 as identity + /// \endcode /// /// The above flow-function implementation corresponds to the following edges /// in the exploded supergraph. @@ -84,10 +84,11 @@ template class EdgeFunctions { /// function is queried that describes a value computation. This results in /// the following queries: /// + /// \code /// getNormalEdgeFunction(0, Curr, 0 Succ); /// getNormalEdgeFunction(o, Curr, o Succ); /// getNormalEdgeFunction(o, Curr, x Succ); - /// + /// \endcode virtual EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) = 0; @@ -105,13 +106,13 @@ template class EdgeFunctions { /// /// Consider the following flow function implementation (cf. /// FlowFunctions::getCallFlowFunction()): - /// + /// \code /// f(0) -> {0} // pass as identity into the callee target /// f(o) -> {q} // map actual o into formal q /// f(p) -> {r} // map actual p into formal r /// f(.) -> {} // kill all other facts that are not visible to the /// // callee target - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. /// \code @@ -135,11 +136,11 @@ template class EdgeFunctions { /// For each edge generated by the respective flow function a call edge /// function is queried that describes a value computation. This results in /// the following queries: - /// + /// \code /// getCallEdgeFunction(CallInst, 0, CalleeFun, 0); /// getCallEdgeFunction(CallInst, o, CalleeFun, q); /// getCallEdgeFunction(CallInst, p, CalleeFun, r); - /// + /// \endcode virtual EdgeFunction getCallEdgeFunction(n_t CallInst, d_t SrcNode, f_t CalleeFun, d_t DestNode) = 0; @@ -158,13 +159,13 @@ template class EdgeFunctions { /// /// Consider the following flow function implementation (cf. /// FlowFunctions::getRetFlowFunction()): - /// + /// \code /// f(0) -> {0} // pass as identity into the callee target /// f(r) -> {x} // map return value to lhs variable at CallSite /// f(q) -> {o} // map pointer-typed formal q to actual o /// f(.) -> {} // kill all other facts that are not visible to the /// // caller - /// + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. /// \code @@ -188,11 +189,11 @@ template class EdgeFunctions { /// For each edge generated by the respective flow function a return edge /// function is queried that describes a value computation. This results in /// the following queries: - /// + /// \code /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, 0, RetSite, 0); /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, q, RetSite, o); /// getReturnEdgeFunction(CallSite, CalleeFun, ExitInst, r, RetSite, x); - /// + /// \endcode virtual EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFun, n_t ExitInst, d_t ExitNode, n_t RetSite, d_t RetNode) = 0; @@ -212,7 +213,7 @@ template class EdgeFunctions { /// /// Consider the following flow function implementation (cf. /// FlowFunctions::getCallToRetFlowFunction()): - /// + /// \code /// f(0) -> {0} // pass lambda as identity alongsite the CallSite /// f(o) -> {o} // assuming that o is passed by value, it is passed /// // alongsite the CallSite @@ -220,9 +221,8 @@ template class EdgeFunctions { /// // to kill p, as it will be handled by the call- and /// // return-flow functions /// f(.) -> {.} // pass everything that is not involved in the call - /// as - /// // identity - /// + /// // as identity + /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. /// \code diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index c30a70200..6e54fe393 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -672,8 +672,7 @@ class FlowFunctions /// f(0) -> {0} // pass the lambda (or zero fact) as identity /// f(o) -> {o, x} // generate a new fact x from o /// f(.) -> {.} // pass all other facts that hold before - /// instruction_1 - /// // as identity + /// // instruction_1 as identity /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. @@ -844,8 +843,7 @@ class FlowFunctions /// // to kill p, as it will be handled by the call- and /// // return-flow functions /// f(.) -> {.} // pass everything that is not involved in the call - /// as - /// // identity + /// // as identity /// \endcode /// The above implementation corresponds to the following edges in the /// exploded supergraph. diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index a050ea6c3..914749a5d 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -53,6 +53,12 @@ class AllTopFnProvider< } }; +/// \brief The analysis problem interface for IDE problems (solvable by the +/// IDESolver). Subclass this and override all pure-virtual functions to create +/// your own IDE analysis. +/// +/// For more information on how to write an IDE analysis, see [Writing an IDE +/// Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IDE-analysis) template > class IDETabulationProblem : public FlowFunctions, @@ -73,15 +79,16 @@ class IDETabulationProblem : public FlowFunctions, using ConfigurationTy = HasNoConfigurationType; - /// Takes an intermediate representation data base (IRDB) and collects - /// information from it to create a tabulation problem. Can be solved using - /// the IDESolver for example. - /// @param[in] IRDB The project intermediate representation data base, on - /// which the tabulation problem will be build up. - /// @param[in] EntryPoints All entry points of the project, given as a vector - /// of strings, where the strings are the names of the entry functions. An - /// example would simply be { "main" }. - /// @param[in] ZeroValue An optional argument, to set a custom zero value. + /// Takes an IR data base (IRDB) and collects information from it to create a + /// tabulation problem. + /// @param[in] IRDB The project IR data base, that holds the code under + /// analysis + /// @param[in] EntryPoints The (mangled) names of all entry functions of the + /// project, given as a vector of strings. An example would simply be + /// `{"main"}`. To set every function as entry point, pass `"__ALL__"` + /// @param[in] ZeroValue Provides the special tautological zero value (aka. + /// Λ). If not provided here, you must set it via \link initializeZeroValue() + /// \endlink. explicit IDETabulationProblem( const ProjectIRDBBase *IRDB, std::vector EntryPoints, std::optional diff --git a/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h b/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h index 71dcba326..8f2046fcb 100644 --- a/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h +++ b/include/phasar/DataFlow/IfdsIde/IFDSIDESolverConfig.h @@ -39,22 +39,40 @@ enum class SolverConfigOptions : uint32_t { All = ~0U }; +/// \brief Configuration options for the solving process of IFDS/IDE problems struct IFDSIDESolverConfig { IFDSIDESolverConfig() noexcept = default; IFDSIDESolverConfig(SolverConfigOptions Options) noexcept; + /// Returns whether the solver should handle unbalanced returns (default: + /// false) [[nodiscard]] bool followReturnsPastSeeds() const; + /// Returns whether the solver should automatically insert an identityFlow + /// propagation for the special zero value (default: true) [[nodiscard]] bool autoAddZero() const; + /// Returns whether the IDE solver should perform IDE's phase 2 (default: + /// true). You may want to turn this off for IFDS analyses. [[nodiscard]] bool computeValues() const; + /// Returns, whether the solver should record all ESG edges (default: false) + /// \note This option may severly hurt the solver's performance [[nodiscard]] bool recordEdges() const; + /// Returns, whether the solver should emit the ESG as DOT graph on the + /// command-line (default: false) [[nodiscard]] bool emitESG() const; + /// Currently unused [[nodiscard]] bool computePersistedSummaries() const; + /// \see followReturnsPastSeeds void setFollowReturnsPastSeeds(bool Set = true); + /// \see autoAddZero void setAutoAddZero(bool Set = true); + /// \see computeValues void setComputeValues(bool Set = true); + /// \see recordEdges void setRecordEdges(bool Set = true); + /// \see emitESG void setEmitESG(bool Set = true); + /// \see computePersistedSummaries void setComputePersistedSummaries(bool Set = true); void setConfig(SolverConfigOptions Opt); diff --git a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h index ff39ddf18..d0f9472d9 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h @@ -18,8 +18,10 @@ namespace psr { template class Compressor; -/// A data structure capable of compressing and storing data of a given type T. -/// This implementation can efficiently pass by value. +/// \brief A utility class that assigns a sequential Id to every inserted +/// object. +/// +/// This specialization handles types that can be efficiently passed by value template class Compressor>> { public: @@ -63,8 +65,10 @@ class Compressor>> { llvm::SmallVector FromInt; }; -/// A data structure capable of compressing and storing data of a given type T. -/// This implementation cannot efficiently pass by value. +/// \brief A utility class that assigns a sequential Id to every inserted +/// object. +/// +/// This specialization handles types that cannot be efficiently passed by value template class Compressor>> { public: diff --git a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h index 41ba2ccf5..d38ecb7f8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/FlowFunctionCache.h @@ -140,7 +140,7 @@ template struct FlowFunctionCacheBase { /// This class caches flow and edge functions to avoid their reconstruction. /// When a flow or edge function must be applied to multiple times, a cached /// version is used if existend, otherwise a new one is created and inserted -/// into the cache. +/// into the cache. This class is used within both IDE solver implementations. template class FlowFunctionCache : detail::FlowFunctionCacheBase, diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index c3ce3f9c7..cfebf428e 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -27,9 +27,14 @@ namespace psr { -/// Solves the given IFDSTabulationProblem as described in the 1995 paper by -/// Reps, Horwitz and Sagiv. To solve the problem, call solve(). Results -/// can then be queried by using resultAt() and resultsAt(). +/// \brief Solves the given IFDSTabulationProblem as described in the 1995 paper +/// by Reps, Horwitz and Sagiv. To solve the problem, call solve(). Results can +/// then be queried by using resultAt() and resultsAt(). +/// +/// \note PhASAR implements IFDS in terms of IDE, so in case you do not need the +/// raw SolverResults, for maximum performance you should use +/// IFDSIDESolverConfig#setComputeValues(bool) to disable IDE's +/// phase 2. template > class IFDSSolver diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h index bab1509d0..68ee40cb8 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h @@ -50,7 +50,7 @@ namespace psr { /// This solver implements the optimizations and the $JF_N$ layout from the /// paper "Scaling Interprocedural Static Data-Flow Analysis to Large C/C++ /// Applications: An Experience Report" -/// (https://doi.org/10.4230/LIPIcs.ECOOP.2024.36) by Schiebel, Sattler, +/// () by Schiebel, Sattler, /// Schubert, Apel, and Bodden. template > diff --git a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h index 5070b6026..c7326729b 100644 --- a/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h +++ b/include/phasar/DataFlow/Mono/Contexts/CallStringCTX.h @@ -13,10 +13,9 @@ namespace psr { -/// Stores a call string context that can be used as an index for data -/// structures like std::unordered_map. The size_t operator is overloaded to -/// return a hash function. -/// @tparam N Type of the call string elements. +/// Stores a call-string context that can be used in interprocedural monotone +/// analysis to achieve (limited) context sensitivity. +/// @tparam N Type of the call-string elements. /// @tparam K Maximal length the call string can have. template class CallStringCTX { protected: diff --git a/include/phasar/DataFlow/Mono/InterMonoProblem.h b/include/phasar/DataFlow/Mono/InterMonoProblem.h index 6ad3bcb32..e4a345a14 100644 --- a/include/phasar/DataFlow/Mono/InterMonoProblem.h +++ b/include/phasar/DataFlow/Mono/InterMonoProblem.h @@ -31,6 +31,9 @@ namespace psr { template class TypeHierarchy; template class ICFG; +/// \brief The analysis problem interface for interprocedural monotone problems +/// (solvable by the InterMonoSolver). Subclass this and override all +/// pure-virtual functions to create your own inter-mono analysis. template class InterMonoProblem : public IntraMonoProblem { public: @@ -50,11 +53,11 @@ class InterMonoProblem : public IntraMonoProblem { /// An interprocedural monotone problem generated from an intermediate /// representation, a type hierarchy of said representation, a control flow /// graph, points-to information and optionally a vector of entry points. - /// @param[in] IRDB A project intermediate representation data base. + /// @param[in] IRDB A project IR data base. /// @param[in] TH A type hierarchy based on the given IRDB. /// @param[in] CF A control flow graph based on the given IRDB. /// @param[in] PT Points-to information based on the given IRDB. - /// @param[in] EntryPoints A vector of entry points. Empty by default. + /// @param[in] EntryPoints A vector of entry points. Provide at least one. InterMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const i_t *ICF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/IntraMonoProblem.h b/include/phasar/DataFlow/Mono/IntraMonoProblem.h index 8fd23b850..69aca7ede 100644 --- a/include/phasar/DataFlow/Mono/IntraMonoProblem.h +++ b/include/phasar/DataFlow/Mono/IntraMonoProblem.h @@ -35,6 +35,9 @@ struct HasNoConfigurationType; template class TypeHierarchy; template class CFG; +/// \brief The analysis problem interface for intraprocedural monotone problems +/// (solvable by the IntraMonoSolver). Subclass this and override all +/// pure-virtual functions to create your own mono analysis. template class IntraMonoProblem { public: using n_t = typename AnalysisDomainTy::n_t; @@ -58,18 +61,16 @@ template class IntraMonoProblem { [[maybe_unused]] Soundness S = Soundness::Soundy; public: - // denote that a problem does not require a configuration (type/file) - // a user problem can override the type of configuration to be used, if any using ConfigurationTy = HasNoConfigurationType; /// An intraprocedural monotone problem generated from an intermediate /// representation, a type hierarchy of said representation, a control flow /// graph, points-to information and optionally a vector of entry points. - /// @param[in] IRDB A project intermediate representation data base. + /// @param[in] IRDB A project IR data base. /// @param[in] TH A type hierarchy based on the given IRDB. /// @param[in] CF A control flow graph based on the given IRDB. /// @param[in] PT Points-to information based on the given IRDB. - /// @param[in] EntryPoints A vector of entry points. Empty by default. + /// @param[in] EntryPoints A vector of entry points. Provide at least one. IntraMonoProblem(const ProjectIRDBBase *IRDB, const TypeHierarchy *TH, const CFGBase *CF, AliasInfoRef PT, diff --git a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h index 4e8805e19..b0abf8835 100644 --- a/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/InterMonoSolver.h @@ -27,9 +27,11 @@ namespace psr { -/// A solver class for interprocedual monotone problems. -/// @tparam AnalysisDomainTy type of the analysis domain. -/// @tparam K An unsigned integer used as the maximum size for call string +/// \brief A solver class for interprocedual monotone problems (derived from +/// InterMonoProblem). To solve the problem, call solve(). +/// +/// \tparam AnalysisDomainTy type of the analysis domain. +/// \tparam K An unsigned integer used as the maximum length for call-string /// contexts. template class InterMonoSolver { public: diff --git a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h index 5284f5e87..ed5b1f449 100644 --- a/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h +++ b/include/phasar/DataFlow/Mono/Solver/IntraMonoSolver.h @@ -29,8 +29,10 @@ namespace psr { -/// A solver class for intraprocedual monotone problems. -/// @tparam AnalysisDomainTy type of the analysis domain. +/// \brief A solver class for intraprocedual monotone problems. To solve the +/// problem, call solve(). +/// +/// \tparam AnalysisDomainTy type of the analysis domain. template class IntraMonoSolver { public: using ProblemTy = IntraMonoProblem; diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h index b1ce4e2cd..b97393231 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManager.h @@ -28,9 +28,8 @@ namespace psr { -/// The PathSensitivityManager class is an implementation of the -/// PathSensitivityManagerBase class and the PathSensitivityManagerMixin class. -/// It holds an exploded super graph. +/// \brief A utility class that allows path-reconstruction for IFDS/IDE solver +/// results. template class PathSensitivityManager : public PathSensitivityManagerBase, diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h index c92c35aae..74ed9e1c5 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h @@ -67,6 +67,11 @@ class PathSensitivityManagerMixin { } public: + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts in FactsRange holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename FactsRangeTy, typename ConfigTy, typename Filter = DefaultPathTracingFilter, @@ -147,6 +152,11 @@ class PathSensitivityManagerMixin { return Dag; } + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename L, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> @@ -159,6 +169,11 @@ class PathSensitivityManagerMixin { return pathsDagToAll(std::move(Inst), FactsRange, Config, PFilter); } + /// Reconstruct the combined control- and data-flow paths the lead to the + /// given data-flow fact Fact holding right after Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> @@ -171,6 +186,11 @@ class PathSensitivityManagerMixin { PFilter); } + /// Reconstruct the combined control- and data-flow paths the lead to any of + /// the given data-flow facts in FactsRange holding at Inst. + /// + /// The result is given as graph, where cycles are unrolled once in an + /// implementation-defined way. template < typename ConfigTy, typename Filter = DefaultPathTracingFilter, typename = std::enable_if_t>> diff --git a/include/phasar/Domain/AnalysisDomain.h b/include/phasar/Domain/AnalysisDomain.h index b467506fd..a4622695a 100644 --- a/include/phasar/Domain/AnalysisDomain.h +++ b/include/phasar/Domain/AnalysisDomain.h @@ -35,9 +35,10 @@ struct AnalysisDomain { using d_t = void; // (Control-flow) Node --- Specifies the type of a node in the // (inter-procedural) control-flow graph and can be though of as an individual - // statement or instruction of the program. + // statement or instruction of the target program. using n_t = void; - // Function --- Specifies the type of functions. + // Function --- Specifies the type of functions/procedures in the target + // program. using f_t = void; // (User-defined) type --- Specifies the type of a user-defined (i.e. struct // or class) data type. diff --git a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h index 9e8f4b66e..614a4fa80 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h +++ b/include/phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h @@ -16,6 +16,12 @@ namespace psr { class LLVMProjectIRDB; +/// \brief Provides utilities to inject a function into the IR under analysis +/// that captures global constructors and destructors as described in the 2021 +/// Paper "Modeling the Effects of Global Variables in Data-Flow Analysis for +/// C/C++" by Schubert et al. +/// +/// See <10.1109/SCAM52516.2021.00010> for more information. class GlobalCtorsDtorsModel { public: static constexpr llvm::StringLiteral ModelName = @@ -32,24 +38,25 @@ class GlobalCtorsDtorsModel { /// @brief Function that creates a new global model and inserts it into the /// IRDB. The returned function is the global model that was inserted. - /// @param[in, out] IRDB Intermediate representation data base that will have - /// the global model inserted. + /// @param[in, out] IRDB IR data base that will have the global model + /// inserted. /// @param[in] UserEntryPoints Entry points for the program given as - /// llvm::Function pointers. + /// llvm::Function pointers. You usually want to pass here the main function static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); + /// @brief Function that creates a new global model and inserts it into the /// IRDB. The returned function is the global model that was inserted. - /// @param[in, out] IRDB Intermediate representation data base that will have - /// the global model inserted. + /// @param[in, out] IRDB IR data base that will have the global model + /// inserted. /// @param[in] UserEntryPoints Entry points for the program given as - /// std::strings. + /// std::strings. You usually want to pass here the main function static llvm::Function * buildModel(LLVMProjectIRDB &IRDB, llvm::ArrayRef UserEntryPoints); - /// Returns true, if a function was generated by phasar. + /// Returns true, if a function was generated by the GlobalCtorsDtorsModel. [[nodiscard]] static bool isPhasarGenerated(const llvm::Function &F) noexcept; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h index e0bccc34b..be69af134 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h @@ -22,7 +22,8 @@ namespace psr { class LLVMProjectIRDB; class LLVMBasedBackwardCFG; -/// A class that represents a backwards control flow graph. +/// \brief A class that represents a backwards control flow graph. Conforms to +/// the CFGBase CRTP interface. class LLVMBasedBackwardCFG : public detail::LLVMBasedCFGImpl { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index cdb76cb0d..9368cc204 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -25,10 +25,8 @@ template class CallGraph; template <> struct CFGTraits : CFGTraits {}; -/// A class that represents a backwards interprocedural control flow graph. It -/// uses the LLVMBasedBackwardICFG class itself as a template argument for -/// the ICFGBase template argument. This is called a curiously recurring -/// template pattern. +/// \brief A class that represents a backwards interprocedural control flow +/// graph. Conforms to the ICFGBase CRTP interface. class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h index f4b27bde3..229d71c83 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h @@ -36,7 +36,8 @@ template <> struct CFGTraits { template <> struct CFGTraits : CFGTraits {}; -/// A class that implements a control flow graph. +/// \brief A class that implements a control flow graph. Conforms to the CFGBase +/// CRTP interface. namespace detail { template class LLVMBasedCFGImpl : public CFGBase { friend CFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h index 20d8d8950..fbb2693c9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h @@ -44,10 +44,8 @@ class Resolver; class LLVMBasedICFG; template <> struct CFGTraits : CFGTraits {}; -/// A class that implements an interprocedural control flow graph. It -/// uses the LLVMBasedICFG class itself as a template argument for the -/// ICFGBase template argument. This is called a curiously recurring template -/// pattern. +/// \brief A class that implements a inter-procedural control flow graph. +/// Conforms to the ICFGBase CRTP interface. class LLVMBasedICFG : public LLVMBasedCFG, public ICFGBase { friend ICFGBase; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h index 9765462ad..646ff7132 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMVFTableProvider.h @@ -23,8 +23,12 @@ class GlobalVariable; namespace psr { class LLVMProjectIRDB; -/// A class that creates an unordered map of the virtual functions of an -/// intermediate representation data bases module. +/// \brief A class that provides access to all C++ virtual function tables +/// (VTables) found in the target program. +/// +/// Useful for constructing a call graph for a C++-based target. +/// \note This class only works, if the target program's IR was generated with +/// debug information. Pass `-g` to the compiler to achieve this. class LLVMVFTableProvider { public: explicit LLVMVFTableProvider(const llvm::Module &Mod); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index 035ba37c2..dc6f7c8ff 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -27,15 +27,10 @@ class CallBase; namespace psr { class DIBasedTypeHierarchy; -/// A resolver that performs a class hierarchy analysis. +/// \brief A resolver that performs Class Hierarchy Analysis to resolve calls +/// to C++ virtual functions. Requires debug information. class CHAResolver : public Resolver { public: - /// @brief Class that implements a Resolver's virtual functions to be able to - /// perform a class hierarchy analysis. - /// @param[in] IRDB The project intermediate representation data base, on - /// which the tabulation problem will be build up. - /// @param[in] VTP Virtual Function Table Provider. - /// @param[in] TH A type hierarchy based on the given IRDB. CHAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 05fd1b396..88afa796e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -18,14 +18,9 @@ class CallBase; namespace psr { -/// A resolver that doesn't resolve indirect- and virtual calls +/// \brief A resolver that doesn't resolve indirect- and virtual calls class NOResolver final : public Resolver { public: - /// @brief Class that implements a Resolver's virtual functions to be able to - /// perform an analysis that does not resolve indirect- and virtual calls. - /// @param[in] IRDB The project intermediate representation data base, on - /// which the tabulation problem will be build up. - /// @param[in] VTP Virtual Function Table Provider. NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP); ~NOResolver() override = default; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 08b2912f4..eca760ae7 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -36,16 +36,10 @@ namespace psr { class DIBasedTypeHierarchy; -/// A resolver that performs an on-the-fly analysis based on points-to -/// information (default). +/// \brief A resolver that uses alias information to resolve indirect and +/// virtual calls class OTFResolver : public Resolver { public: - /// @brief Class that implements a Resolver's virtual functions to be able to - /// perform an on-the-fly analysis based on points-to information. - /// @param[in] IRDB The project intermediate representation data base, on - /// which the tabulation problem will be build up. - /// @param[in] VTP Virtual Function Table Provider. - /// @param[in] PT Points-to information. OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index 9e786643f..c6e003211 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -29,15 +29,10 @@ class DICompositeType; namespace psr { class DIBasedTypeHierarchy; -/// A resolver that performs a rapid type analysis. +/// \brief A resolver that performs Rapid Type Analysis to resolve calls +/// to C++ virtual functions. Requires debug information. class RTAResolver : public CHAResolver { public: - /// @brief Class that implements a Resolver's virtual functions to be able to - /// perform a rapid type analysis. - /// @param[in] IRDB The project intermediate representation data base, on - /// which the tabulation problem will be build up. - /// @param[in] VTP Virtual Function Table Provider. - /// @param[in] TH A type hierarchy based on the given IRDB. RTAResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH); diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index e67a25fab..c59717c25 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -68,7 +68,8 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); -/// A parent class that serves as the basis for specific resolver analyses. +/// \brief A base class for call-target resolvers. Used to build call graphs. +/// /// Create a specific resolver by making a new class, inheriting this resolver /// class and implementing the virtual functions as needed. class Resolver { diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h index 3ef05b893..6524bfad0 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -23,9 +23,8 @@ template <> struct CFGTraits : CFGTraits { using v_t = const llvm::Value *; }; -/// A class that represents a sparse control flow graph. It uses the -/// SparseLLVMBasedCFG class itself as a template argument for the SparseCFGBase -/// template argument. This is called a curiously recurring template pattern. +/// \brief A class that implements a sparse control flow graph. Conforms to the +/// CFGBase CRTP interface. class SparseLLVMBasedCFG : public LLVMBasedCFG, public SparseCFGBase { friend struct SVFGCache; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h index 5b46fc6b2..df946ae27 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -20,27 +20,20 @@ class SparseLLVMBasedCFG; class DIBasedTypeHierarchy; struct SVFGCache; -/// A class that represents a sparse interprocedural control flow graph. It uses -/// the SparseLLVMBasedICFG class itself as a template argument for the -/// SparseLLVMBasedCFGProvider template argument. This is called a curiously -/// recurring template pattern. +/// \brief A class that implements a sparse interprocedural control flow graph. +/// Conforms to the ICFGBase CRTP interface. +/// +/// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or +/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// of Interprocedural Distributive Environment Problems" +/// class SparseLLVMBasedICFG : public LLVMBasedICFG, public SparseLLVMBasedCFGProvider { friend SparseLLVMBasedCFGProvider; public: - /// @param[in, out] IRDB Intermediate representation data base. The IRDB will - /// be changed, only if IncludeGlobals is set to true. - /// @param[in] CGType The type of the call graph analysis. - /// @param[in] EntryPoints The entry points of the program the IRDB is based - /// on. Often this is just { "main" }. - /// @param TH Type Hierarchy of the given IRDB. Type Hierarchy can only be - /// null, if the call graph type does not need a type hierarchy. In any other - /// case, this must not be null. An example of this is the OTF analysis. - /// @param PT Points-to information that represents aliases. - /// @param S Level of soundness. - /// @param IncludeGlobals Flag to determine if globals should be included. + /// Constructor that delegates all arguments to the ctor of LLVMBasedICFG explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, llvm::ArrayRef EntryPoints = {}, diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h index 830db44ed..7fbbb65a5 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -29,12 +29,14 @@ struct SVFGCache; template <> struct CFGTraits : CFGTraits {}; -/// Similar to SparseLLVMBasedICFG; the only difference is that this one *is* no -/// LLVMBasedICFG -- it contains a pointer to an already existing one. +/// \brief Similar to SparseLLVMBasedICFG; the only difference is that this one +/// *is* no LLVMBasedICFG -- it contains a pointer to an already existing one. /// It still owns the sparse value-flow graphs. -/// It also uses the SparseLLVMBasedICFGView class itself as a template argument -/// for the SparseLLVMBasedCFGProvider template argument. This is called a -/// curiously recurring template pattern. +/// +/// Use this in the IDESolver or IFDSSolver to profit from the SparseIFDS or +/// SparseIDE optimization after Karakays et al. "Symbol-Specific Sparsification +/// of Interprocedural Distributive Environment Problems" +/// class SparseLLVMBasedICFGView : public LLVMBasedCFG, public ICFGBase, @@ -43,8 +45,6 @@ class SparseLLVMBasedICFGView friend SparseLLVMBasedCFGProvider; public: - /// @param[in] ICF Interprocedural control flow graph. - /// @param[in] PT Points-to information that represents aliases. explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, LLVMAliasInfoRef PT); diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h index a12c70637..98cdd56f7 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h @@ -23,6 +23,7 @@ struct DataFlowFact { std::variant Fact; }; +/// \brief Simple representation of a serializable data-flow summary class FunctionDataFlowFacts { public: using ParameterMappingTy = diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h index b51e26172..8afba2f71 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h @@ -14,8 +14,7 @@ class LLVMFunctionDataFlowFacts; [[nodiscard]] LLVMFunctionDataFlowFacts readFromFDFF(const FunctionDataFlowFacts &Fdff, const LLVMProjectIRDB &Irdb); -/// @brief A class that stores and maps llvm::Function * to Parameter Mappin -/// Types +/// @brief A LLVM-specific mapping of FunctionDataFlowFacts class LLVMFunctionDataFlowFacts { public: LLVMFunctionDataFlowFacts() noexcept = default; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h index 2758da38a..729e3c44a 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h @@ -28,9 +28,8 @@ class Value; namespace psr { -/// -/// @brief This class may be used to represent the special zero value for IFDS -/// and IDE problems. The LLVMZeroValue is implemented as a singleton. +/// \brief This class may be used to represent the special zero value (aka. Λ) +/// for IFDS and IDE problems. The LLVMZeroValue is implemented as a singleton. /// class LLVMZeroValue : public llvm::GlobalVariable { private: diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h index f437bbf2c..bad969a70 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.h @@ -51,8 +51,8 @@ struct IDEExtendedTaintAnalysisDomain : public LLVMAnalysisDomainDefault { }; namespace XTaint { -/// An extended taint analysis is a field sensitive taint analysis, that uses K -/// limit field access path. +/// \brief An IDE-based taint analysis that uses k-limited field-access paths to +/// achieve field sensitivity class IDEExtendedTaintAnalysis : public IDETabulationProblem, public AnalysisBase { diff --git a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h index d3979aa1e..39cd52740 100644 --- a/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h +++ b/include/phasar/PhasarLLVM/DataFlow/PathSensitivity/Z3BasedPathSensitvityManager.h @@ -40,8 +40,6 @@ class Instruction; namespace psr { class LLVMPathConstraints; -/// A base class that can be used to implement a path solver based on the Z3 SMT -/// Solver. class Z3BasedPathSensitivityManagerBase : public PathSensitivityManagerBase { public: @@ -63,6 +61,12 @@ class Z3BasedPathSensitivityManagerBase static void deduplicatePaths(FlowPathSequence &Paths); }; +/// \brief An extension of the path-reconstruction mechanism of the +/// PathSensitivityManager that provides means to extract concrete combined +/// control- and data-flow paths. +/// +/// Filters out paths that are considered infeasible by the Z3 +/// constraint solver. template >> @@ -91,6 +95,14 @@ class Z3BasedPathSensitivityManager } } + /// Reconstruct the feasible combined control- and data-flow paths the lead to + /// the given data-flow fact Fact holding right after Inst. + /// + /// The result is given as list of paths, where cycles are unrolled once in an + /// implementation-defined way. + /// It is strongly recommended to Use the Z3BasedPathSensitivityConfig in the + /// Z3BasedPathSensitivityManager's ctor to limit the returned paths; + /// otherwise this function quickly becomes a performance bottleneck. FlowPathSequence pathsTo(n_t Inst, d_t Fact) const { if (Config.DAGSizeThreshold != SIZE_MAX) { PHASAR_LOG_LEVEL( diff --git a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h index a2f74c536..c4330c4e4 100644 --- a/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h +++ b/include/phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h @@ -25,8 +25,8 @@ class LLVMProjectIRDB; class LLVMBasedICFG; class LLVMBasedCFG; -/// A default analysis domain that can be either used outright, or be used as a -/// reference for custom analysis domains. +/// \brief An AnalysisDomain that specializes sensible defaults for LLVM-based +/// analysis struct LLVMAnalysisDomainDefault : public AnalysisDomain { using d_t = const llvm::Value *; using n_t = const llvm::Instruction *; @@ -38,6 +38,8 @@ struct LLVMAnalysisDomainDefault : public AnalysisDomain { using db_t = LLVMProjectIRDB; }; +/// \brief An AnalysisDomain that specializes sensible defaults for LLVM-based +/// IFDS analysis using LLVMIFDSAnalysisDomainDefault = WithBinaryValueDomain; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h index 326c3b4f0..a405b7aca 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h @@ -53,16 +53,15 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, using AllocationSiteSetPtrTy = traits_t::AllocationSiteSetPtrTy; using AliasSetMap = llvm::DenseMap>; - /** - * Creates points-to set(s) for all functions in the IRDB. If - * UseLazyEvaluation is true, computes points-to set(s) for functions that do - * not use global variables on the fly - */ + /// \brief Creates alias-sets for all functions in the IRDB. + /// + /// If UseLazyEvaluation is true, computes alias-sets only for functions that + /// use global variables directly and delays all others to when they are first + /// requested explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, bool UseLazyEvaluation = true, AliasAnalysisType PATy = AliasAnalysisType::CFLAnders); - /** - * Loads points-to set(s) from json file - */ + + /// Loads alias sets from JSON explicit LLVMAliasSet(LLVMProjectIRDB *IRDB, const nlohmann::json &SerializedPTS); @@ -105,22 +104,18 @@ class LLVMAliasSet : public AnalysisPropertiesMixin, return AnalysisProperties::None; } - /** - * Shows a parts of an alias set. Good for debugging when one wants to peak - * into a points to set. - * - * @param ValueSetPair a pair on a Value* and the corresponding points-to set - * @param Peak the amount of instructions shown from the points-to set - */ + /// Shows a parts of an alias set. Good for debugging when one wants to peak + /// into a points to set. + /// + /// \param ValueSetPair a pair on a Value* and the corresponding points-to set + /// \param Peak the amount of instructions shown from the points-to set static void peakIntoAliasSet(const AliasSetMap::value_type &ValueSetPair, int Peak); - /** - * Prints out the size distribution for all points to sets. - * - * @param Peak the amount of instructions shown from one of the biggest - * points-to sets, use 0 to show nothing. - */ + /// Prints out the size distribution for all points to sets. + /// + /// \param Peak the amount of instructions shown from one of the biggest + /// points-to sets, use 0 to show nothing. void drawAliasSetsDistribution(int Peak = 10) const; [[nodiscard]] inline bool empty() const { return AnalyzedFunctions.empty(); } diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h index 13cdfbf6b..d895741ad 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h @@ -16,8 +16,8 @@ namespace psr { -/// A data structure used for storing, serializing and deserializing -/// LLVMAliasSet data +/// A data structure used for storing, serializing and deserializing a +/// LLVMAliasSet struct LLVMAliasSetData { std::vector> AliasSets; std::vector AnalyzedFunctions; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h index 7b98b1d3c..a7532d20d 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMBasedAliasAnalysis.h @@ -25,16 +25,13 @@ namespace psr { class LLVMProjectIRDB; +/// \brief Wrapper over alias analyses that provide point-wise alias +/// information. +/// +/// Used to construct an LLVMAliasSet. class LLVMBasedAliasAnalysis { public: - /// @brief A class that can perform an analysis, which generates alias - /// informatioan from a given intermediate representation data base. - /// @param IRDB The project intermediate representation data base, which the - /// analysis will generate the alias information from. - /// @param UseLazyEvaluation Set to true to use a faster but less precise - /// algorithm. - /// @param PATy Boolean to determine the needed analysis type. explicit LLVMBasedAliasAnalysis( LLVMProjectIRDB &IRDB, bool UseLazyEvaluation, AliasAnalysisType PATy = AliasAnalysisType::Basic); diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h index e02f3091e..258bdbbc9 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h @@ -25,10 +25,10 @@ namespace psr { class LLVMProjectIRDB; -/// @brief A class that represents the type hierarchy of an intermediate -/// representation data base (IRDB) of a project. The algorithm uses Debug -/// Information (DI) to create the type hierarchy, therefore the given IRDB must -/// contain debug information for the hierarchy to be created. +/// \brief Represents the type hierarchy of a tha target program. +/// +/// \note This class only works, if the target program's IR was generated with +/// debug information. Pass `-g` to the compiler to achieve this. class DIBasedTypeHierarchy : public TypeHierarchy { public: @@ -43,16 +43,15 @@ class DIBasedTypeHierarchy static inline constexpr llvm::StringLiteral PureVirtualCallName = "__cxa_pure_virtual"; - /// @brief Creates a type hierarchy based on an intermediate representation + /// \brief Creates a type hierarchy based on an intermediate representation /// data base. - /// @param[in] IRDB The intermediate representation data base of which the - /// type hierarchy will be based upon. This MUST contain debug information for - /// the algorithm to work! + /// \param[in] IRDB The IR data base of which the type hierarchy will be based + /// upon. This MUST contain debug information for the algorithm to work! explicit DIBasedTypeHierarchy(const LLVMProjectIRDB &IRDB); - /// @brief Loads an already computed type hierarchy. - /// @param[in] IRDB The intermediate representation data base of the type - /// hierarchy. - /// @param[in] SerializedData The already existing type hierarchy, given by + + /// \brief Loads an already computed type hierarchy. + /// \param[in] IRDB The IR data base of the type hierarchy. + /// \param[in] SerializedData The already existing type hierarchy, given by /// the appropiate class DIBasedTypeHierarchyData, which contains all /// neccesary information. explicit DIBasedTypeHierarchy(const LLVMProjectIRDB *IRDB, @@ -108,16 +107,12 @@ class DIBasedTypeHierarchy void print(llvm::raw_ostream &OS = llvm::outs()) const override; - /** - * @brief Prints the class hierarchy to an ostream in dot format. - * @param OS outputstream - */ + /// \brief Prints the class hierarchy to an ostream in dot format. + /// \param OS outputstream void printAsDot(llvm::raw_ostream &OS = llvm::outs()) const; - /** - * @brief Prints the class hierarchy to an ostream in json format. - * @param an outputstream - */ + /// \brief Prints the class hierarchy to an ostream in JSON format. + /// \param OS outputstream void printAsJson(llvm::raw_ostream &OS = llvm::outs()) const override; private: diff --git a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h index ff59cfe3a..da579b8f8 100644 --- a/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h +++ b/include/phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchyData.h @@ -18,7 +18,7 @@ #include namespace psr { -/// @brief A structure that is used to store already calculated type hierarchy +/// \brief A structure that is used to store already calculated type hierarchy /// data, serialize that data or deserialize a json file with a previously /// serialized type hierarchy. struct DIBasedTypeHierarchyData { diff --git a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h index 4394fe1bb..286cae8dd 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMIRToSrc.h @@ -43,6 +43,7 @@ namespace psr { /// information about the source code or the intermediate representation of a /// project. +/// \brief Minimal source-code information, based on LLVM debug information struct DebugLocation { unsigned Line{}; unsigned Column{}; @@ -51,7 +52,7 @@ struct DebugLocation { [[nodiscard]] llvm::DILocalVariable *getDILocalVariable(const llvm::Value *V); -/// A struct that contains information about a source code line, what the +/// \brief A struct that contains information about a source code line, what the /// corresponding function name of that line is and what the file name is that /// the line is in. struct SourceCodeInfo { diff --git a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h index 809265ecb..129249946 100644 --- a/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h +++ b/include/phasar/PhasarLLVM/Utils/LLVMShorthands.h @@ -58,12 +58,18 @@ bool isHeapAllocatingFunction(const llvm::Function *F) noexcept; /// argument ExactMatch is set to true, which it is by default, the two provided /// arguments must also have the same type for each argument, for the function /// to return true. +/// +/// \note This function is less useful in practice than you may think. Consider +/// using isConsistentCall() instead. bool matchesSignature(const llvm::Function *F, const llvm::FunctionType *FType, bool ExactMatch = true); -// Returns true iff the provided functions are both not null and have the same -// number of paramters, the same return type and each parameter of both -// functions has the same type aswell. +/// Returns true iff the provided functions are both not null and have the same +/// number of paramters, the same return type and each parameter of both +/// functions has the same type aswell. +/// +/// \note This function is less useful in practice than you may think. Consider +/// using isConsistentCall() instead. bool matchesSignature(const llvm::FunctionType *FType1, const llvm::FunctionType *FType2); @@ -236,7 +242,7 @@ bool isGuardVariable(const llvm::Value *V); bool isStaticVariableLazyInitializationBranch(const llvm::BranchInst *Inst); /** - * Tests for https://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic + * Tests for * e.g. * int boo __attribute__((annotate("bar")); * @param F The function to test - Target of the call instruction @@ -245,7 +251,7 @@ bool isVarAnnotationIntrinsic(const llvm::Function *F); /** * Retrieves String annotation value as per - * https://llvm.org/docs/LangRef.html#llvm-var-annotation-intrinsic + * * Test the call function be tested by isVarAnnotationIntrinsic * */ diff --git a/include/phasar/PhasarPass/PhasarPass.h b/include/phasar/PhasarPass/PhasarPass.h index 93a0eb4c6..a7488a96c 100644 --- a/include/phasar/PhasarPass/PhasarPass.h +++ b/include/phasar/PhasarPass/PhasarPass.h @@ -22,6 +22,7 @@ namespace psr { /// @brief PhasarPass is an implementation of llvm passes for the PhASAR /// framework. +/// /// What is a pass? /// "The LLVM pass framework is an important part of the LLVM system, /// because LLVM passes are where most of the interesting parts of the compiler @@ -29,7 +30,7 @@ namespace psr { /// compiler, they build the analysis results that are used by these /// transformations, and they are, above all, a structuring technique for /// compiler code." -/// Source: https://llvm.org/docs/WritingAnLLVMNewPMPass.html +/// Source: class PhasarPass : public llvm::ModulePass { public: static inline char ID = 12; diff --git a/include/phasar/PhasarPass/PhasarPrinterPass.h b/include/phasar/PhasarPass/PhasarPrinterPass.h index d344ded13..76baa2f9a 100644 --- a/include/phasar/PhasarPass/PhasarPrinterPass.h +++ b/include/phasar/PhasarPass/PhasarPrinterPass.h @@ -19,8 +19,6 @@ class AnalysisUsage; namespace psr { -/// @brief A pass that can be used to print information while a PhASAR analysis -/// is running. Example usage would be for debugging. class PhasarPrinterPass : public llvm::ModulePass { public: static inline char ID = 12; // NOLINT FIXME: make const when LLVM supports it diff --git a/include/phasar/TypeHierarchy/VFTable.h b/include/phasar/TypeHierarchy/VFTable.h index fc36eb4f7..4fc331cf5 100644 --- a/include/phasar/TypeHierarchy/VFTable.h +++ b/include/phasar/TypeHierarchy/VFTable.h @@ -20,8 +20,7 @@ class raw_ostream; namespace psr { -/// @brief A class to store the data of a virtual function data from an -/// intermediate representation data base. +/// \brief A generic class to represent a virtual function table template class VFTable { public: virtual ~VFTable() = default; diff --git a/include/phasar/Utils/AnalysisPrinterBase.h b/include/phasar/Utils/AnalysisPrinterBase.h index e99bad914..ec95b47d7 100644 --- a/include/phasar/Utils/AnalysisPrinterBase.h +++ b/include/phasar/Utils/AnalysisPrinterBase.h @@ -10,8 +10,8 @@ namespace psr { -/// A class that serves as the basis for a custom analysis printer -/// implementation. +/// \brief A generic class that serves as the basis for a custom analysis +/// printer implementation. template class AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; using d_t = typename AnalysisDomainTy::d_t; diff --git a/include/phasar/Utils/DefaultAnalysisPrinter.h b/include/phasar/Utils/DefaultAnalysisPrinter.h index 5db54f46b..7efa4183f 100644 --- a/include/phasar/Utils/DefaultAnalysisPrinter.h +++ b/include/phasar/Utils/DefaultAnalysisPrinter.h @@ -30,7 +30,8 @@ template struct Warning { LatticeElement(std::move(Lattice)), AnalysisType(DfAnalysisType) {} }; -/// A default implementation of the AnalysisPrinterBase class. +/// \brief A default implementation of the AnalysisPrinterBase. Aggregates all +/// analysis results in a vector and prints them when the analysis has finished. template class DefaultAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; diff --git a/include/phasar/Utils/EquivalenceClassMap.h b/include/phasar/Utils/EquivalenceClassMap.h index e006686d0..c1ce7d0bf 100644 --- a/include/phasar/Utils/EquivalenceClassMap.h +++ b/include/phasar/Utils/EquivalenceClassMap.h @@ -23,11 +23,12 @@ namespace psr { -/// EquivalenceClassMap is a special map type that splits the keys into -/// equivalence classes regarding their mapped values. Meaning, that all keys -/// that are equivalent are mapped to the same value. Two keys are treated as -/// equivalent and merged into a equivalence class when they refer to Values -/// that compare equal. +/// \brief EquivalenceClassMap is a special map type that splits the keys into +/// equivalence classes regarding their mapped values. +/// +/// Meaning, that all keys that are equivalent are mapped to the same value. Two +/// keys are treated as equivalent and merged into a equivalence class when they +/// refer to Values that compare equal. template struct EquivalenceClassMap { template using SetType = std::set; using EquivalenceClassBucketT = std::pair, ValueT>; diff --git a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h index 0437f4951..75332cb30 100644 --- a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h +++ b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h @@ -13,8 +13,10 @@ #include namespace psr { -/// This class implements an AnalysisPrinterBase, which opens a file stream on -/// the fly to print analysis data. +/// This class implements the AnalysisPrinterBase that printf the analysis +/// results *while* the analysis is still running. +/// +/// Override doOnResult() to customize, how the results are printed. template class OnTheFlyAnalysisPrinter : public AnalysisPrinterBase { using n_t = typename AnalysisDomainTy::n_t; diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index f2c7504c7..d2eb99b6a 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -147,7 +147,7 @@ struct StringIDLess { bool operator()(const std::string &LHS, const std::string &RHS) const; }; -/// See "https://en.cppreference.com/w/cpp/experimental/scope_exit/scope_exit" +/// See template class scope_exit { // NOLINT public: template ()())> @@ -168,14 +168,14 @@ template class scope_exit { // NOLINT template scope_exit(Fn) -> scope_exit; -// Copied from "https://en.cppreference.com/w/cpp/utility/variant/visit" +// Copied from template struct Overloaded : Ts... { using Ts::operator()...; }; // explicit deduction guide (not needed as of C++20) template Overloaded(Ts...) -> Overloaded; /// Based on the reference implementation of std::remove_if -/// "https://en.cppreference.com/w/cpp/algorithm/remove" and optimized for the +/// and optimized for the /// case that a sorted list of indices is given instead of an unary predicate /// specifying the elements to be removed. template template [[nodiscard]] constexpr auto &&forward_like(U &&X) noexcept { // NOLINT // NOLINTNEXTLINE diff --git a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp index c2a4f3e17..fbdb82242 100644 --- a/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp +++ b/lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp @@ -366,7 +366,7 @@ void LLVMTaintConfig::forAllLeakCandidatesAtImpl( } } - // Do not iterate over the actual Parameters of Inst as we did in + // Do not iterate over the actual parameters of Inst as we did in // forAllGeneratedValuesAt, because sink-values are not propagated in the // current taint analyses. Handling sink-values should be done in the // SinkCallBack From 22c076bbb5071b5b26c953699c09916586abb50d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 12:24:44 +0200 Subject: [PATCH 08/17] Some more comments + update Doxyfile.in --- docs/Doxyfile.in | 38 +++++++++---------- .../DataFlow/IfdsIde/IFDSTabulationProblem.h | 16 ++++++++ .../phasar/DataFlow/IfdsIde/InitialSeeds.h | 3 ++ .../IfdsIde/Solver/GenericSolverResults.h | 2 +- .../phasar/DataFlow/IfdsIde/SolverResults.h | 18 +++++++++ .../phasar/PhasarLLVM/DB/LLVMProjectIRDB.h | 1 + 6 files changed, 58 insertions(+), 20 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index f3eaa58cc..2014d7364 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8 # title of most generated pages and in a few other places. # The default value is: My Project. -PROJECT_NAME = "PhASAR" +PROJECT_NAME = PhASAR # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version @@ -144,7 +144,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = +STRIP_FROM_PATH = @PHASAR_SRC_DIR@ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -153,8 +153,7 @@ STRIP_FROM_PATH = # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = - +STRIP_FROM_INC_PATH = @PHASAR_SRC_DIR@/include # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. @@ -398,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. @@ -743,7 +742,8 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ @CMAKE_CURRENT_SOURCE_DIR@/lib/ @CMAKE_CURRENT_SOURCE_DIR@/docs +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include/ \ + @CMAKE_CURRENT_SOURCE_DIR@/docs # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -905,7 +905,7 @@ INLINE_SOURCES = NO # Fortran comments will always remain visible. # The default value is: YES. -STRIP_CODE_COMMENTS = YES +STRIP_CODE_COMMENTS = NO # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. @@ -917,7 +917,7 @@ REFERENCED_BY_RELATION = YES # all documented entities called/used by that function will be listed. # The default value is: NO. -REFERENCES_RELATION = NO +REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and @@ -1407,7 +1407,7 @@ FORMULA_TRANSPARENT = YES # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -USE_MATHJAX = NO +USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: @@ -1521,7 +1521,7 @@ SEARCHDATA_FILE = searchdata.xml # projects and redirect the results back to the right project. # This tag requires that the tag SEARCHENGINE is set to YES. -EXTERNAL_SEARCH_ID = +EXTERNAL_SEARCH_ID = phasar # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are @@ -1540,7 +1540,7 @@ EXTRA_SEARCH_MAPPINGS = # If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. # The default value is: YES. -GENERATE_LATEX = YES +GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of @@ -1917,7 +1917,7 @@ SEARCH_INCLUDES = YES # preprocessor. # This tag requires that the tag SEARCH_INCLUDES is set to YES. -INCLUDE_PATH = +INCLUDE_PATH = @PHASAR_BINARY_DIR@/include # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the @@ -2040,7 +2040,7 @@ DIA_PATH = # and usage relations if the target is undocumented or is not a class. # The default value is: YES. -HIDE_UNDOC_RELATIONS = YES +HIDE_UNDOC_RELATIONS = NO # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz (see: @@ -2100,7 +2100,7 @@ CLASS_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -COLLABORATION_GRAPH = YES +COLLABORATION_GRAPH = NO # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for # groups, showing the direct groups dependencies. @@ -2136,7 +2136,7 @@ UML_LIMIT_NUM_FIELDS = 10 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -TEMPLATE_RELATIONS = NO +TEMPLATE_RELATIONS = YES # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to # YES then doxygen will generate a graph for each documented file showing the @@ -2145,7 +2145,7 @@ TEMPLATE_RELATIONS = NO # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDE_GRAPH = YES +INCLUDE_GRAPH = NO # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are # set to YES then doxygen will generate a graph for each documented file showing @@ -2154,7 +2154,7 @@ INCLUDE_GRAPH = YES # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. -INCLUDED_BY_GRAPH = YES +INCLUDED_BY_GRAPH = NO # If the CALL_GRAPH tag is set to YES then doxygen will generate a call # dependency graph for every global function or class method. @@ -2203,7 +2203,7 @@ DIRECTORY_GRAPH = YES # The default value is: png. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_IMAGE_FORMAT = png +DOT_IMAGE_FORMAT = svg # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. @@ -2276,7 +2276,7 @@ MAX_DOT_GRAPH_DEPTH = 0 # The default value is: NO. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_TRANSPARENT = NO +DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This diff --git a/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h index cbafa68c3..bf5263a2a 100644 --- a/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h @@ -19,6 +19,12 @@ namespace psr { +/// \brief The analysis problem interface for IFDS problems (solvable by the +/// IFDSSolver). Subclass this and override all pure-virtual functions to create +/// your own IFDS analysis. +/// +/// For more information on how to write an IFDS analysis, see [Writing an IFDS +/// Analysis](https://github.com/secure-software-engineering/phasar/wiki/Writing-an-IFDS-analysis) template > class IFDSTabulationProblem @@ -38,6 +44,16 @@ class IFDSTabulationProblem using typename Base::t_t; using typename Base::v_t; + /// Takes an IR data base (IRDB) and collects information from it to create a + /// tabulation problem. + /// @param[in] IRDB The project IR data base, that holds the code under + /// analysis + /// @param[in] EntryPoints The (mangled) names of all entry functions of the + /// project, given as a vector of strings. An example would simply be + /// `{"main"}`. To set every function as entry point, pass `"__ALL__"` + /// @param[in] ZeroValue Provides the special tautological zero value (aka. + /// Λ). + /// \endlink. explicit IFDSTabulationProblem(const ProjectIRDBBase *IRDB, std::vector EntryPoints, d_t ZeroValue) diff --git a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h index db7ea654e..1a0d18526 100644 --- a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h +++ b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h @@ -22,6 +22,9 @@ namespace psr { +/// \brief Represent the starting points of the analysi. +/// +/// The initial facts that should hold at the entry points. template class InitialSeeds { public: using GeneralizedSeeds = std::map>; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h index 744c8e7e3..20ff8b6c3 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/GenericSolverResults.h @@ -21,7 +21,7 @@ namespace psr { /// XXX (#734): When upgrading to C++20, create a concept checking valid /// SolverResults types -/// A type-erased version of the main functionality of SolverResults. +/// \brief A type-erased version of the main functionality of SolverResults. /// Can be accepted by consumers that don't need deep access to the internals /// (so, the usual ones). As we have now two kinds of solver-results /// (SolverResults and IdBasedSolverResults), we need a common way of accessing diff --git a/include/phasar/DataFlow/IfdsIde/SolverResults.h b/include/phasar/DataFlow/IfdsIde/SolverResults.h index 664d0e111..83bb3d8b7 100644 --- a/include/phasar/DataFlow/IfdsIde/SolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/SolverResults.h @@ -46,11 +46,22 @@ class SolverResultsBase { using d_t = D; using l_t = L; + /// Returns the result that the IDE analysis computed for the fact Node right + /// after the statement Stmt. + /// + /// A default-constructed l_t, if no analysis result was computed at this + /// point. [[nodiscard]] ByConstRef resultAt(ByConstRef Stmt, ByConstRef Node) const { return self().Results.get(Stmt, Node); } + /// Returns the results that the IDE analysis computed right after the + /// statement Stmt. + /// + /// \param Stmt The statement, where the analysis results are requested + /// \param StripZero Whether the special zero value should be stripped from + /// the result. [[nodiscard]] std::unordered_map resultsAt(ByConstRef Stmt, bool StripZero) const { std::unordered_map Result = self().Results.row(Stmt); @@ -60,19 +71,26 @@ class SolverResultsBase { return Result; } + /// Returns the results that the IDE analysis computed right after the + /// statement Stmt. + /// + /// Does not strip the special zero value from the result. [[nodiscard]] const std::unordered_map & resultsAt(ByConstRef Stmt) const { return self().Results.row(Stmt); } + /// The internal representation of this SolverResults object. [[nodiscard]] const auto &rowMapView() const { return self().Results.rowMapView(); } + /// Whether the analysis has computed any results for the statement Stmt. [[nodiscard]] bool containsNode(ByConstRef Stmt) const { return self().Results.containsRow(Stmt); } + /// Similar to resultsAt(ByConstRef). [[nodiscard]] const auto &row(ByConstRef Stmt) const { return self().Results.row(Stmt); } diff --git a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h index 677b26639..0b970fcfe 100644 --- a/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h +++ b/include/phasar/PhasarLLVM/DB/LLVMProjectIRDB.h @@ -36,6 +36,7 @@ template <> struct ProjectIRDBTraits { using g_t = const llvm::GlobalVariable *; }; +/// \brief Project IR Database that manages a LLVM IR module. class LLVMProjectIRDB : public ProjectIRDBBase { friend ProjectIRDBBase; From df1fc48460e2f00064676012ee8d6b94f6330ea9 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 17:56:34 +0200 Subject: [PATCH 09/17] Add gh-pages deploy script --- .github/workflows/deploy-docs.yml | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/deploy-docs.yml diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 000000000..1e22794c8 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,37 @@ +name: Build and Deploy Doxagen Docs +on: + push: + branches: [ development ] + pull_request: # For testing only. TODO: Remove before merge! + branches: [ development ] + +jobs: + build-and-deploy: + runs-on: ubuntu-24.04 + strategy: + fail-fast: true + continue-on-error: false + permissions: + contents: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Install Phasar Dependencies + shell: bash + run: | + ./utils/InstallAptDependencies.sh --noninteractive tzdata clang-19 + + - name: Build Doxygen Docs + shell: bash + run: | + cmake -S . -B build -DPHASAR_BUILD_DOC=ON + cmake --build ./build --target doc_doxygen + + - name: Deploy Doxygen Docs on GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: build/docs/html From d9b194f4ab91a6ca940f57a2b981d2e8b1e8c614 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:02:29 +0200 Subject: [PATCH 10/17] pre-commit --- .github/workflows/deploy-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 1e22794c8..212ee4a95 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -1,4 +1,4 @@ -name: Build and Deploy Doxagen Docs +name: Build and Deploy Doxygen Docs on: push: branches: [ development ] From 0dd1d815a807ff0b6a7f04d4b85b346f8fdd188b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:08:27 +0200 Subject: [PATCH 11/17] Small compiler fix --- .github/workflows/deploy-docs.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 212ee4a95..aa8c080f6 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -23,10 +23,13 @@ jobs: - name: Install Phasar Dependencies shell: bash run: | - ./utils/InstallAptDependencies.sh --noninteractive tzdata clang-19 + ./utils/InstallAptDependencies.sh --noninteractive tzdata - name: Build Doxygen Docs shell: bash + env: + CXX: clang++-15 + CC: clang-15 run: | cmake -S . -B build -DPHASAR_BUILD_DOC=ON cmake --build ./build --target doc_doxygen From eb86390afdb6c46ea566c2506f09ed0be8ae642f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:14:52 +0200 Subject: [PATCH 12/17] pre-commit with newer clang version --- include/phasar/ControlFlow/ICFGBase.h | 7 ++++--- include/phasar/DataFlow/IfdsIde/EdgeFunction.h | 18 ++++++++++-------- .../phasar/DataFlow/IfdsIde/SolverResults.h | 4 ++-- .../phasar/DataFlow/IfdsIde/SpecialSummaries.h | 16 ++++++++-------- .../ControlFlow/LLVMBasedBackwardICFG.h | 2 +- include/phasar/Pointer/AliasInfo.h | 8 ++++---- include/phasar/Utils/DefaultAnalysisPrinter.h | 3 ++- include/phasar/Utils/OnTheFlyAnalysisPrinter.h | 5 +++-- include/phasar/Utils/Utilities.h | 4 +++- 9 files changed, 37 insertions(+), 30 deletions(-) diff --git a/include/phasar/ControlFlow/ICFGBase.h b/include/phasar/ControlFlow/ICFGBase.h index 251ac3082..ba6f9dc1f 100644 --- a/include/phasar/ControlFlow/ICFGBase.h +++ b/include/phasar/ControlFlow/ICFGBase.h @@ -124,9 +124,10 @@ template class ICFGBase { /// from the given analysis-Domain template // NOLINTNEXTLINE(readability-identifier-naming) -PSR_CONCEPT is_icfg_v = is_crtp_base_of_v - &&std::is_same_v - &&std::is_same_v; +PSR_CONCEPT is_icfg_v = + is_crtp_base_of_v && + std::is_same_v && + std::is_same_v; } // namespace psr diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h index 6a94c0f24..ca6c0ad70 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunction.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunction.h @@ -65,7 +65,7 @@ concept IsEdgeFunction = requires(const T &EF, const EdgeFunction std::same_as>; {T::join(CEF, TEEF)} -> std::same_as>; }; - // clang-format on +// clang-format on #endif @@ -78,10 +78,10 @@ enum class EdgeFunctionAllocationPolicy { class EdgeFunctionBase { public: template - static constexpr bool - IsSOOCandidate = sizeof(ConcreteEF) <= sizeof(void *) && // NOLINT - alignof(ConcreteEF) <= alignof(void *) && - std::is_trivially_copyable_v; + static constexpr bool IsSOOCandidate = + sizeof(ConcreteEF) <= sizeof(void *) && // NOLINT + alignof(ConcreteEF) <= alignof(void *) && + std::is_trivially_copyable_v; using AllocationPolicy = EdgeFunctionAllocationPolicy; @@ -89,7 +89,9 @@ class EdgeFunctionBase { struct RefCountedBase { mutable std::atomic_size_t Rc = 0; }; - template struct RefCounted : RefCountedBase { T Value; }; + template struct RefCounted : RefCountedBase { + T Value; + }; template struct CachedRefCounted : RefCounted { EdgeFunctionSingletonCache *Cache{}; @@ -265,8 +267,8 @@ class [[clang::trivial_abi]] EdgeFunction final : EdgeFunctionBase { explicit EdgeFunction( std::in_place_type_t /*unused*/, ArgTys &&...Args) noexcept(IsSOOCandidate> && - std::is_nothrow_constructible_v) + std::is_nothrow_constructible_v) : EdgeFunction( [](auto &&...Args) { if constexpr (IsSOOCandidate>) { diff --git a/include/phasar/DataFlow/IfdsIde/SolverResults.h b/include/phasar/DataFlow/IfdsIde/SolverResults.h index 83bb3d8b7..51b8b3ce9 100644 --- a/include/phasar/DataFlow/IfdsIde/SolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/SolverResults.h @@ -265,12 +265,12 @@ class OwningSolverResults D ZV) noexcept(std::is_nothrow_move_constructible_v) : Results(std::move(ResTab)), ZV(std::move(ZV)) {} - [[nodiscard]] SolverResults get() const &noexcept { + [[nodiscard]] SolverResults get() const & noexcept { return {Results, ZV}; } SolverResults get() && = delete; - [[nodiscard]] operator SolverResults() const &noexcept { + [[nodiscard]] operator SolverResults() const & noexcept { return get(); } diff --git a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h index 96675a4d3..f7cbd0cfc 100644 --- a/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h +++ b/include/phasar/DataFlow/IfdsIde/SpecialSummaries.h @@ -97,8 +97,8 @@ class [[deprecated("This ancient API is not maintained and should not be used " return SpecialFlowFunctions.count(Name); } - FlowFunctionPtrType getSpecialFlowFunctionSummary( - const llvm::Function *Func) { + FlowFunctionPtrType + getSpecialFlowFunctionSummary(const llvm::Function *Func) { return getSpecialFlowFunctionSummary(Func->getName()); } @@ -106,18 +106,18 @@ class [[deprecated("This ancient API is not maintained and should not be used " return SpecialFlowFunctions[Name]; } - std::shared_ptr> getSpecialEdgeFunctionSummary( - const llvm::Function *Func) { + std::shared_ptr> + getSpecialEdgeFunctionSummary(const llvm::Function *Func) { return getSpecialEdgeFunctionSummary(Func->getName()); } - std::shared_ptr> getSpecialEdgeFunctionSummary( - const std::string &Name) { + std::shared_ptr> + getSpecialEdgeFunctionSummary(const std::string &Name) { return SpecialEdgeFunctions[Name]; } - friend llvm::raw_ostream &operator<<( - llvm::raw_ostream &OS, const SpecialSummaries &SpecialSumms) { + friend llvm::raw_ostream & + operator<<(llvm::raw_ostream &OS, const SpecialSummaries &SpecialSumms) { OS << "SpecialSummaries:\n"; for (auto &Entry : SpecialSumms.SpecialFunctionNames) { OS << Entry.first << " "; diff --git a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h index 9368cc204..670a6a6c9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h +++ b/include/phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h @@ -37,7 +37,7 @@ class LLVMBasedBackwardICFG : public LLVMBasedBackwardCFG, public: LLVMBackwardRet(llvm::LLVMContext &Ctx) - : Instance(llvm::ReturnInst::Create(Ctx)){}; + : Instance(llvm::ReturnInst::Create(Ctx)) {}; [[nodiscard]] const llvm::ReturnInst *getInstance() const noexcept { return Instance; } diff --git a/include/phasar/Pointer/AliasInfo.h b/include/phasar/Pointer/AliasInfo.h index dce8fe5b1..a5f2f5f57 100644 --- a/include/phasar/Pointer/AliasInfo.h +++ b/include/phasar/Pointer/AliasInfo.h @@ -315,13 +315,13 @@ class [[clang::trivial_abi]] AliasInfo final : public AliasInfoRef { } } - [[nodiscard]] base_t asRef() &noexcept { return *this; } - [[nodiscard]] AliasInfoRef asRef() const &noexcept { return *this; } + [[nodiscard]] base_t asRef() & noexcept { return *this; } + [[nodiscard]] AliasInfoRef asRef() const & noexcept { return *this; } [[nodiscard]] AliasInfoRef asRef() && = delete; /// For better interoperability with unique_ptr - [[nodiscard]] base_t get() &noexcept { return asRef(); } - [[nodiscard]] AliasInfoRef get() const &noexcept { return asRef(); } + [[nodiscard]] base_t get() & noexcept { return asRef(); } + [[nodiscard]] AliasInfoRef get() const & noexcept { return asRef(); } [[nodiscard]] AliasInfoRef get() && = delete; }; diff --git a/include/phasar/Utils/DefaultAnalysisPrinter.h b/include/phasar/Utils/DefaultAnalysisPrinter.h index 7efa4183f..2eee3eac4 100644 --- a/include/phasar/Utils/DefaultAnalysisPrinter.h +++ b/include/phasar/Utils/DefaultAnalysisPrinter.h @@ -43,7 +43,8 @@ class DefaultAnalysisPrinter : public AnalysisPrinterBase { : OS(&OS) {} explicit DefaultAnalysisPrinter(const llvm::Twine &Filename) - : AnalysisPrinterBase(), OS(openFileStream(Filename)){}; + : AnalysisPrinterBase(), + OS(openFileStream(Filename)) {}; ~DefaultAnalysisPrinter() override = default; diff --git a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h index 75332cb30..bf574ceca 100644 --- a/include/phasar/Utils/OnTheFlyAnalysisPrinter.h +++ b/include/phasar/Utils/OnTheFlyAnalysisPrinter.h @@ -25,10 +25,11 @@ class OnTheFlyAnalysisPrinter : public AnalysisPrinterBase { public: explicit OnTheFlyAnalysisPrinter(llvm::raw_ostream &OS) - : AnalysisPrinterBase(), OS(&OS){}; + : AnalysisPrinterBase(), OS(&OS) {}; explicit OnTheFlyAnalysisPrinter(const llvm::Twine &Filename) - : AnalysisPrinterBase(), OS(openFileStream(Filename)){}; + : AnalysisPrinterBase(), + OS(openFileStream(Filename)) {}; OnTheFlyAnalysisPrinter() = default; ~OnTheFlyAnalysisPrinter() = default; diff --git a/include/phasar/Utils/Utilities.h b/include/phasar/Utils/Utilities.h index d2eb99b6a..612732679 100644 --- a/include/phasar/Utils/Utilities.h +++ b/include/phasar/Utils/Utilities.h @@ -169,7 +169,9 @@ template class scope_exit { // NOLINT template scope_exit(Fn) -> scope_exit; // Copied from -template struct Overloaded : Ts... { using Ts::operator()...; }; +template struct Overloaded : Ts... { + using Ts::operator()...; +}; // explicit deduction guide (not needed as of C++20) template Overloaded(Ts...) -> Overloaded; From 322ecc6ea7ef78015bfff9ecb0cf382163c62ed7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:17:02 +0200 Subject: [PATCH 13/17] install Doxygen in CI --- .github/workflows/deploy-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index aa8c080f6..4ad6017c0 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -23,7 +23,7 @@ jobs: - name: Install Phasar Dependencies shell: bash run: | - ./utils/InstallAptDependencies.sh --noninteractive tzdata + ./utils/InstallAptDependencies.sh --noninteractive tzdata doxygen - name: Build Doxygen Docs shell: bash From c8d626ac77bd1fe4976292fab33629025a376395 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:23:39 +0200 Subject: [PATCH 14/17] test --- .github/workflows/deploy-docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 4ad6017c0..c558d799c 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -4,15 +4,15 @@ on: branches: [ development ] pull_request: # For testing only. TODO: Remove before merge! branches: [ development ] - +permissions: + contents: write jobs: build-and-deploy: runs-on: ubuntu-24.04 strategy: fail-fast: true continue-on-error: false - permissions: - contents: write + steps: - name: Checkout uses: actions/checkout@v4 From 50e0fb93ed933e46105120960f77a95332101f48 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:30:59 +0200 Subject: [PATCH 15/17] add token --- .github/workflows/deploy-docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index c558d799c..520a1ea83 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -38,3 +38,4 @@ jobs: uses: JamesIves/github-pages-deploy-action@v4 with: folder: build/docs/html + token: ${{ secrets.GITHUB_TOKEN }} From e698cf0d7c5ea9c43f3465b7488a09304f30cc5b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:51:54 +0200 Subject: [PATCH 16/17] Remove gh-pages deploy trigger on pull-request --- .github/workflows/deploy-docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 520a1ea83..8478cb3d3 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -2,8 +2,8 @@ name: Build and Deploy Doxygen Docs on: push: branches: [ development ] - pull_request: # For testing only. TODO: Remove before merge! - branches: [ development ] + # pull_request: # For testing only. Remove before merge! + # branches: [ development ] permissions: contents: write jobs: From 1972d843ed23151d05b81587741ee5a9c9dbcaee Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 28 Apr 2025 18:57:18 +0200 Subject: [PATCH 17/17] install graphviz in CI to draw pretty class diagrams --- .github/workflows/deploy-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index 8478cb3d3..7588c25fd 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -23,7 +23,7 @@ jobs: - name: Install Phasar Dependencies shell: bash run: | - ./utils/InstallAptDependencies.sh --noninteractive tzdata doxygen + ./utils/InstallAptDependencies.sh --noninteractive tzdata doxygen graphviz - name: Build Doxygen Docs shell: bash