diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll index 001375b4dc54..fb3d7bf828f2 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll @@ -55,9 +55,10 @@ private module Cached { ) } - /** Gets a type tracker with no content and the call bit set to the given value. */ - cached - TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) } + pragma[nomagic] + private TypeTracker noContentTypeTracker(boolean hasCall) { + result = MkTypeTracker(hasCall, noContent()) + } /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ cached @@ -317,8 +318,6 @@ class StepSummary extends TStepSummary { /** Provides predicates for updating step summaries (`StepSummary`s). */ module StepSummary { - predicate append = Cached::append/2; - /** * Gets the summary that corresponds to having taken a forwards * inter-procedural step from `nodeFrom` to `nodeTo`. @@ -379,35 +378,6 @@ module StepSummary { } deprecated predicate localSourceStoreStep = flowsToStoreStep/3; - - /** Gets the step summary for a level step. */ - StepSummary levelStep() { result = LevelStep() } - - /** Gets the step summary for a call step. */ - StepSummary callStep() { result = CallStep() } - - /** Gets the step summary for a return step. */ - StepSummary returnStep() { result = ReturnStep() } - - /** Gets the step summary for storing into `content`. */ - StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) } - - /** Gets the step summary for loading from `content`. */ - StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) } - - /** Gets the step summary for loading from `load` and then storing into `store`. */ - StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) { - result = LoadStoreStep(load, store) - } - - /** Gets the step summary for a step that only permits contents matched by `filter`. */ - StepSummary withContent(ContentFilter filter) { result = WithContent(filter) } - - /** Gets the step summary for a step that blocks contents matched by `filter`. */ - StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) } - - /** Gets the step summary for a jump step. */ - StepSummary jumpStep() { result = JumpStep() } } /** @@ -570,13 +540,6 @@ module TypeTracker { * Gets a valid end point of type tracking. */ TypeTracker end() { result.end() } - - /** - * INTERNAL USE ONLY. - * - * Gets a valid end point of type tracking with the call bit set to the given value. - */ - predicate end = Cached::noContentTypeTracker/1; } pragma[nomagic] diff --git a/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md b/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md deleted file mode 100644 index 5aa79d5e2f31..000000000000 --- a/ruby/ql/lib/change-notes/2023-06-28-tracking-on-demand.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -category: majorAnalysis ---- -* The API graph library (`codeql.ruby.ApiGraphs`) has been significantly improved, with better support for inheritance, - and data-flow nodes can now be converted to API nodes by calling `.track()` or `.backtrack()` on the node. - API graphs allow for efficient modelling of how a given value is used by the code base, or how values produced by the code base - are consumed by a library. See the documentation for `API::Node` for details and examples. diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index 0d6e669b1dee..1cf4c4457817 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -1,13 +1,13 @@ /** - * Provides an implementation of _API graphs_, which allow efficient modelling of how a given - * value is used by the code base or how values produced by the code base are consumed by a library. + * Provides an implementation of _API graphs_, which are an abstract representation of the API + * surface used and/or defined by a code base. * - * See `API::Node` for more details. + * The nodes of the API graph represent definitions and uses of API components. The edges are + * directed and labeled; they specify how the components represented by nodes relate to each other. */ private import codeql.ruby.AST private import codeql.ruby.DataFlow -private import codeql.ruby.typetracking.ApiGraphShared private import codeql.ruby.typetracking.TypeTracker private import codeql.ruby.typetracking.TypeTrackerSpecific as TypeTrackerSpecific private import codeql.ruby.controlflow.CfgNodes @@ -19,140 +19,85 @@ private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatc */ module API { /** - * A node in the API graph, that is, a value that can be tracked interprocedurally. + * A node in the API graph, representing a value that has crossed the boundary between this + * codebase and an external library (or in general, any external codebase). * - * The API graph is a graph for tracking values of certain types in a way that accounts for inheritance - * and interprocedural data flow. + * ### Basic usage * * API graphs are typically used to identify "API calls", that is, calls to an external function * whose implementation is not necessarily part of the current codebase. * - * ### Basic usage - * * The most basic use of API graphs is typically as follows: * 1. Start with `API::getTopLevelMember` for the relevant library. * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function. - * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource`, `asSink`, or `asCall`. - * - * The following examples demonstrate how to identify the expression `x` in various basic cases: - * ```rb - * # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).asSink() - * Foo.bar(x) - * - * # API::getTopLevelMember("Foo").getMethod("bar").getKeywordArgument("foo").asSink() - * Foo.bar(foo: x) - * - * # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() - * Foo.new.bar(x) - * - * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource() - * end - * ``` + * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource` or `asSink`. * - * ### Data flow - * - * The members predicates on this class generally take inheritance and data flow into account. - * - * The following example demonstrates a case where data flow was used to find the sink `x`: - * ```ruby - * def doSomething f - * f.bar(x) # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() - * end - * doSomething Foo.new - * ``` - * The call `API::getTopLevelMember("Foo").getInstance()` identifies the `Foo.new` call, and `getMethod("bar")` - * then follows data flow from there to find calls to `bar` where that object flows to the receiver. - * This results in the `f.bar` call. - * - * ### Backward data flow - * - * When inspecting the arguments of a call, the data flow direction is backwards. - * The following example illustrates this when we match the `x` parameter of a block: - * ```ruby - * def doSomething &blk - * Foo.bar &blk - * end - * doSomething do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource() - * end + * For example, a simplified way to get arguments to `Foo.bar` would be + * ```ql + * API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink() * ``` - * When `getParameter(0)` is evaluated, the API graph backtracks the `&blk` argument to the block argument a few - * lines below. As a result, it eventually matches the `x` parameter of that block. * - * ### Inheritance + * The most commonly used accessors are `getMember`, `getMethod`, `getParameter`, and `getReturn`. * - * When a class or module object is tracked, inheritance is taken into account. + * ### API graph nodes * - * In the following example, a call to `Foo.bar` was found via a subclass of `Foo`, - * because classes inherit singleton methods from their base class: - * ```ruby - * class Subclass < Foo - * def self.doSomething - * bar(x) # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).asSink() - * end - * end - * ``` - * - * Similarly, instance methods can be found in subclasses, or ancestors of subclases in cases of multiple inheritance: - * ```rb - * module Mixin - * def doSomething - * bar(x) # API::getTopLevelMember("Foo").getInstance().getMethod("bar").getArgument(0).asSink() - * end - * end - * class Subclass < Foo - * include Mixin - * end - * ``` - * The value of `self` in `Mixin#doSomething` is seen as a potential instance of `Foo`, and is thus found by `getTopLevelMember("Foo").getInstance()`. - * This eventually results in finding the call `bar`, due to its implicit `self` receiver, and finally its argument `x` is found as the sink. + * There are two kinds of nodes in the API graphs, distinguished by who is "holding" the value: + * - **Use-nodes** represent values held by the current codebase, which came from an external library. + * (The current codebase is "using" a value that came from the library). + * - **Def-nodes** represent values held by the external library, which came from this codebase. + * (The current codebase "defines" the value seen by the library). * - * ### Backward data flow and classes + * API graph nodes are associated with data-flow nodes in the current codebase. + * (Since external libraries are not part of the database, there is no way to associate with concrete + * data-flow nodes from the external library). + * - **Use-nodes** are associated with data-flow nodes where a value enters the current codebase, + * such as the return value of a call to an external function. + * - **Def-nodes** are associated with data-flow nodes where a value leaves the current codebase, + * such as an argument passed in a call to an external function. * - * When inspecting the arguments of a call, and the value flowing into that argument is a user-defined class (or an instance thereof), - * uses of `getMethod` will find method definitions in that class (including inherited ones) rather than finding method calls. * - * This example illustrates how this can be used to model cases where the library calls a specific named method on a user-defined class: - * ```rb - * class MyClass - * def doSomething - * x # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("doSomething").getReturn().asSink() - * end - * end - * Foo.bar MyClass.new - * ``` + * ### Access paths and edge labels * - * When modeling an external library that is known to call a specific method on a parameter (in this case `doSomething`), this makes - * it possible to find the corresponding method definition in user code. + * Nodes in the API graph are associated with a set of access paths, describing a series of operations + * that may be performed to obtain that value. * - * ### Strict left-to-right evaluation + * For example, the access path `API::getTopLevelMember("Foo").getMethod("bar")` represents the action of + * reading the top-level constant `Foo` and then accessing the method `bar` on the resulting object. + * It would be associated with a call such as `Foo.bar()`. * - * Most member predicates on this class are intended to be chained, and are always evaluated from left to right, which means - * the caller should restrict the initial set of values. + * Each edge in the graph is labelled by such an "operation". For an edge `A->B`, the type of the `A` node + * determines who is performing the operation, and the type of the `B` node determines who ends up holding + * the result: + * - An edge starting from a use-node describes what the current codebase is doing to a value that + * came from a library. + * - An edge starting from a def-node describes what the external library might do to a value that + * came from the current codebase. + * - An edge ending in a use-node means the result ends up in the current codebase (at its associated data-flow node). + * - An edge ending in a def-node means the result ends up in external code (its associated data-flow node is + * the place where it was "last seen" in the current codebase before flowing out) * - * For example, in the following snippet, we always find the uses of `Foo` before finding calls to `bar`: - * ```ql - * API::getTopLevelMember("Foo").getMethod("bar") - * ``` - * In particular, the implementation will never look for calls to `bar` and work backward from there. + * Because the implementation of the external library is not visible, it is not known exactly what operations + * it will perform on values that flow there. Instead, the edges starting from a def-node are operations that would + * lead to an observable effect within the current codebase; without knowing for certain if the library will actually perform + * those operations. (When constructing these edges, we assume the library is somewhat well-behaved). * - * Beware of the footgun that is to use API graphs with an unrestricted receiver: - * ```ql - * API::Node barCall(API::Node base) { - * result = base.getMethod("bar") // Do not do this! - * } + * For example, given this snippet: + * ```ruby + * Foo.bar(->(x) { doSomething(x) }) * ``` - * The above predicate does not restrict the receiver, and will thus perform an interprocedural data flow - * search starting at every node in the graph, which is very expensive. + * A callback is passed to the external function `Foo.bar`. We can't know if `Foo.bar` will actually invoke this callback. + * But _if_ the library should decide to invoke the callback, then a value will flow into the current codebase via the `x` parameter. + * For that reason, an edge is generated representing the argument-passing operation that might be performed by `Foo.bar`. + * This edge is going from the def-node associated with the callback to the use-node associated with the parameter `x` of the lambda. */ class Node extends Impl::TApiNode { /** - * Gets a data-flow node where this value may flow interprocedurally. + * Gets a data-flow node where this value may flow after entering the current codebase. * * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow. * See `asSource()` for examples. */ - bindingset[this] - pragma[inline_late] + pragma[inline] DataFlow::Node getAValueReachableFromSource() { result = getAValueReachableFromSourceInline(this) } @@ -174,14 +119,16 @@ module API { * end * ``` */ - bindingset[this] - pragma[inline_late] - DataFlow::LocalSourceNode asSource() { result = asSourceInline(this) } + pragma[inline] + DataFlow::LocalSourceNode asSource() { + result = pragma[only_bind_out](this).(Node::Internal).asSourceInternal() + } /** - * Gets a data-flow node where this value potentially flows into an external library. + * Gets a data-flow node where this value leaves the current codebase and flows into an + * external library (or in general, any external codebase). * - * This is usually the argument of a call, but can also be the return value of a callback. + * Concretely, this corresponds to an argument passed to a call to external code. * * For example: * ```ruby @@ -196,395 +143,216 @@ module API { * }) * ``` */ - bindingset[this] - pragma[inline_late] - DataFlow::Node asSink() { result = asSinkInline(this) } - - /** - * Gets a callable that can reach this sink. - * - * For example: - * ```ruby - * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().asCallable() - * end - * - * class Baz - * def m # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("m").asCallable() - * end - * end - * Foo.bar(Baz.new) - * ``` - */ - bindingset[this] - pragma[inline_late] - DataFlow::CallableNode asCallable() { Impl::asCallable(this.getAnEpsilonSuccessor(), result) } + DataFlow::Node asSink() { Impl::def(this, result) } /** - * Get a data-flow node that transitively flows to this value, provided that this value corresponds - * to a sink. + * Get a data-flow node that transitively flows to an external library (or in general, any external codebase). * * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow. * See `asSink()` for examples. */ - bindingset[this] - pragma[inline_late] - DataFlow::Node getAValueReachingSink() { result = getAValueReachingSinkInline(this) } + DataFlow::Node getAValueReachingSink() { result = Impl::trackDefNode(this.asSink()) } - /** - * Gets a module or class referred to by this API node. - * - * For example: - * ```ruby - * module Foo - * class Bar # API::getTopLevelMember("Foo").getMember("Bar").asModule() - * end - * end - * ``` - */ - bindingset[this] - pragma[inline_late] - DataFlow::ModuleNode asModule() { this = Impl::MkModuleObjectDown(result) } + /** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */ + deprecated DataFlow::Node getAUse() { result = this.getAValueReachableFromSource() } - /** - * Gets the call referred to by this API node. - * - * For example: - * ```ruby - * # API::getTopLevelMember("Foo").getMethod("bar").asCall() - * Foo.bar - * - * class Bar < Foo - * def doSomething - * # API::getTopLevelMember("Foo").getInstance().getMethod("baz").asCall() - * baz - * end - * end - * ``` - */ - bindingset[this] - pragma[inline_late] - DataFlow::CallNode asCall() { this = Impl::MkMethodAccessNode(result) } + /** DEPRECATED. This predicate has been renamed to `asSource()`. */ + deprecated DataFlow::LocalSourceNode getAnImmediateUse() { result = this.asSource() } - /** - * DEPRECATED. Use `asCall()` instead. - */ - pragma[inline] - deprecated DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) } + /** DEPRECATED. This predicate has been renamed to `asSink()`. */ + deprecated DataFlow::Node getARhs() { result = this.asSink() } - /** - * Gets a module or class that descends from the module or class referenced by this API node. - */ - bindingset[this] - pragma[inline_late] - DataFlow::ModuleNode getADescendentModule() { result = this.getAnEpsilonSuccessor().asModule() } + /** DEPRECATED. This predicate has been renamed to `getAValueReachingSink()`. */ + deprecated DataFlow::Node getAValueReachingRhs() { result = this.getAValueReachingSink() } /** - * Gets a call to a method on the receiver represented by this API node. - * - * This is a shorthand for `getMethod(method).asCall()`, and thus returns a data-flow node - * rather than an API node. - * - * For example: - * ```ruby - * # API::getTopLevelMember("Foo").getAMethodCall("bar") - * Foo.bar - * ``` + * Gets a call to a method on the receiver represented by this API component. */ pragma[inline] - DataFlow::CallNode getAMethodCall(string method) { - // This predicate is currently not 'inline_late' because 'method' can be an input or output - result = this.getMethod(method).asCall() - } + DataFlow::CallNode getAMethodCall(string method) { result = this.getReturn(method).asSource() } /** - * Gets an access to the constant `m` with this value as the base of the access. + * Gets a node representing member `m` of this API component. * - * For example: - * ```ruby - * A::B # API::getATopLevelMember("A").getMember("B") + * For example, a member can be: * - * module A - * class B # API::getATopLevelMember("A").getMember("B") - * end - * end - * ``` + * - A submodule of a module + * - An attribute of an object */ pragma[inline] Node getMember(string m) { - // This predicate is currently not 'inline_late' because 'm' can be an input or output - Impl::memberEdge(this.getAnEpsilonSuccessor(), m, result) + result = pragma[only_bind_out](this).(Node::Internal).getMemberInternal(m) } /** - * Gets an access to a constant with this value as the base of the access. - * - * This is equivalent to `getMember(_)` but can be more efficient. + * Gets a node representing a member of this API component where the name of the member may + * or may not be known statically. */ - bindingset[this] - pragma[inline_late] - Node getAMember() { Impl::anyMemberEdge(this.getAnEpsilonSuccessor(), result) } + cached + Node getAMember() { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::member(_)) + } /** - * Gets a node that may refer to an instance of the module or class represented by this API node. - * - * This includes the following: - * - Calls to `new` on this module or class or a descendent thereof - * - References to `self` in instance methods declared in any ancestor of any descendent of this module or class - * - * For example: - * ```ruby - * A.new # API::getTopLevelMember("A").getInstance() - * - * class B < A - * def m - * self # API::getTopLevelMember("A").getInstance() - * end - * end + * Gets a node representing an instance of this API component, that is, an object whose + * constructor is the function represented by this node. * - * B.new # API::getTopLevelMember("A").getInstance() + * For example, if this node represents a use of some class `A`, then there might be a node + * representing instances of `A`, typically corresponding to expressions `A.new` at the + * source level. * - * class C < A - * include Mixin - * end - * module Mixin - * def m - * # Although 'Mixin' is not directly related to 'A', 'self' may refer to an instance of 'A' - * # due to its inclusion in a subclass of 'A'. - * self # API::getTopLevelMember("A").getInstance() - * end - * end - * ``` + * This predicate may have multiple results when there are multiple constructor calls invoking this API component. + * Consider using `getAnInstantiation()` if there is a need to distinguish between individual constructor calls. */ - bindingset[this] - pragma[inline_late] - Node getInstance() { Impl::instanceEdge(this.getAnEpsilonSuccessor(), result) } + pragma[inline] + Node getInstance() { result = this.getASubclass().getReturn("new") } /** - * Gets a call to `method` with this value as the receiver, or the definition of `method` on - * an object that can reach this sink. - * - * If the receiver represents a module or class object, this includes calls on descendents of that module or class. - * - * For example: - * ```ruby - * # API::getTopLevelMember("Foo").getMethod("bar") - * Foo.bar - * - * # API::getTopLevelMember("Foo").getInstance().getMethod("bar") - * Foo.new.bar - * - * class B < Foo - * end - * B.bar # API::getTopLevelMember("Foo").getMethod("bar") - * - * class C - * def m # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getMethod("m") - * end - * end - * Foo.bar(C.new) - * ``` + * Gets a node representing a call to `method` on the receiver represented by this node. */ pragma[inline] - Node getMethod(string method) { - // TODO: Consider 'getMethodTarget(method)' for looking up method definitions? - // This predicate is currently not 'inline_late' because 'method' can be an input or output - Impl::methodEdge(this.getAnEpsilonSuccessor(), method, result) + MethodAccessNode getMethod(string method) { + result = pragma[only_bind_out](this).(Node::Internal).getMethodInternal(method) } /** - * Gets the result of this call, or the return value of this callable. + * Gets a node representing the result of this call. */ - bindingset[this] - pragma[inline_late] - Node getReturn() { Impl::returnEdge(this.getAnEpsilonSuccessor(), result) } + pragma[inline] + Node getReturn() { result = pragma[only_bind_out](this).(Node::Internal).getReturnInternal() } /** - * Gets the result of a call to `method` with this value as the receiver, or the return value of `method` defined on - * an object that can reach this sink. - * - * This is a shorthand for `getMethod(method).getReturn()`. + * Gets a node representing the result of calling a method on the receiver represented by this node. */ pragma[inline] - Node getReturn(string method) { - // This predicate is currently not 'inline_late' because 'method' can be an input or output - result = this.getMethod(method).getReturn() + Node getReturn(string method) { result = this.getMethod(method).getReturn() } + + /** Gets an API node representing the `n`th positional parameter. */ + cached + Node getParameter(int n) { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::parameter(n)) } - /** - * Gets the `n`th positional argument to this call. - * - * For example, this would get `x` in the following snippet: - * ```ruby - * Foo.bar(x) # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0) - * ``` - */ - pragma[inline] - Node getArgument(int n) { - // This predicate is currently not 'inline_late' because 'n' can be an input or output - Impl::positionalArgumentEdge(this, n, result) + /** Gets an API node representing the given keyword parameter. */ + cached + Node getKeywordParameter(string name) { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::keywordParameter(name)) + } + + /** Gets an API node representing the block parameter. */ + cached + Node getBlock() { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::blockParameter()) } /** - * Gets the given keyword argument to this call. - * - * For example, this would get `x` in the following snippet: - * ```ruby - * Foo.bar(baz: x) # API::getTopLevelMember("Foo").getMethod("bar").getKeywordArgument("baz") - * ``` + * Gets a `new` call to the function represented by this API component. */ pragma[inline] - Node getKeywordArgument(string name) { - // This predicate is currently not 'inline_late' because 'name' can be an input or output - Impl::keywordArgumentEdge(this, name, result) - } + DataFlow::ExprNode getAnInstantiation() { result = this.getInstance().asSource() } /** - * Gets the block parameter of a callable that can reach this sink. - * - * For example, this would get the `&blk` in the following snippet: - * ```ruby - * # API::getTopLevelMember("Foo").getMethod("bar").getArgument(0).getBlockParameter() - * Foo.bar(->(&blk) {}) - * end + * Gets a node representing a (direct or indirect) subclass of the class represented by this node. + * ```rb + * class A; end + * class B < A; end + * class C < B; end * ``` + * In the example above, `getMember("A").getASubclass()` will return uses of `A`, `B` and `C`. */ - bindingset[this] - pragma[inline_late] - Node getBlockParameter() { Impl::blockParameterEdge(this.getAnEpsilonSuccessor(), result) } + Node getASubclass() { result = this.getAnImmediateSubclass*() } /** - * Gets the `n`th positional parameter of this callable, or the `n`th positional argument to this call. - * - * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. - * When referring to an argument, it is recommended to use `getArgument(n)` instead. + * Gets a node representing a direct subclass of the class represented by this node. + * ```rb + * class A; end + * class B < A; end + * class C < B; end + * ``` + * In the example above, `getMember("A").getAnImmediateSubclass()` will return uses of `B` only. */ - pragma[inline] - Node getParameter(int n) { - // This predicate is currently not 'inline_late' because 'n' can be an input or output - Impl::positionalParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), n, result) + cached + Node getAnImmediateSubclass() { + Impl::forceCachingInSameStage() and result = this.getASuccessor(Label::subclass()) } /** - * Gets the given keyword parameter of this callable, or keyword argument to this call. - * - * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. - * When referring to an argument, it is recommended to use `getKeywordArgument(n)` instead. + * Gets a node representing the `content` stored on the base object. */ - pragma[inline] - Node getKeywordParameter(string name) { - // This predicate is currently not 'inline_late' because 'name' can be an input or output - Impl::keywordParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), name, result) + cached + Node getContent(DataFlow::Content content) { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::content(content)) } /** - * Gets the block argument to this call, or the block parameter of this callable. - * - * Note: this predicate may refer to either an argument or a parameter. When referring to a block parameter, - * it is recommended to use `getBlockParameter()` instead. - * - * For example: - * ```ruby - * Foo.bar do |x| # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0) - * end - * ``` - */ - bindingset[this] - pragma[inline_late] - Node getBlock() { Impl::blockParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), result) } - - /** - * Gets the argument passed in argument position `pos` at this call. + * Gets a node representing the `contents` stored on the base object. */ pragma[inline] - Node getArgumentAtPosition(DataFlowDispatch::ArgumentPosition pos) { - // This predicate is currently not 'inline_late' because 'pos' can be an input or output - Impl::argumentEdge(pragma[only_bind_out](this), pos, result) // note: no need for epsilon step since 'this' must be a call + Node getContents(DataFlow::ContentSet contents) { + // We always use getAStoreContent when generating the graph, and we always use getAReadContent when querying the graph. + result = this.getContent(contents.getAReadContent()) } - /** - * Gets the parameter at position `pos` of this callable. - */ - pragma[inline] - Node getParameterAtPosition(DataFlowDispatch::ParameterPosition pos) { - // This predicate is currently not 'inline_late' because 'pos' can be an input or output - Impl::parameterEdge(this.getAnEpsilonSuccessor(), pos, result) + /** Gets a node representing the instance field of the given `name`, which must include the `@` character. */ + cached + Node getField(string name) { + Impl::forceCachingInSameStage() and + result = this.getContent(DataFlowPrivate::TFieldContent(name)) + } + + /** Gets a node representing an element of this collection (known or unknown). */ + cached + Node getAnElement() { + Impl::forceCachingInSameStage() and + result = this.getContents(any(DataFlow::ContentSet set | set.isAnyElement())) } /** - * Gets a `new` call with this value as the receiver. + * Gets a string representation of the lexicographically least among all shortest access paths + * from the root to this node. */ - bindingset[this] - pragma[inline_late] - DataFlow::ExprNode getAnInstantiation() { result = this.getReturn("new").asSource() } + string getPath() { + result = min(string p | p = this.getAPath(Impl::distanceFromRoot(this)) | p) + } /** - * Gets a representative for the `content` of this value. - * - * When possible, it is preferrable to use one of the specialized variants of this predicate, such as `getAnElement`. - * - * Concretely, this gets sources where `content` is read from this value, and as well as sinks where - * `content` is stored onto this value or onto an object that can reach this sink. + * Gets a node such that there is an edge in the API graph between this node and the other + * one, and that edge is labeled with `lbl`. */ - pragma[inline] - Node getContent(DataFlow::Content content) { - // This predicate is currently not 'inline_late' because 'content' can be an input or output - Impl::contentEdge(this.getAnEpsilonSuccessor(), content, result) - } + Node getASuccessor(Label::ApiLabel lbl) { Impl::edge(this, lbl, result) } /** - * Gets a representative for the `contents` of this value. - * - * See `getContent()` for more details. + * Gets a node such that there is an edge in the API graph between that other node and + * this one, and that edge is labeled with `lbl` */ - bindingset[this, contents] - pragma[inline_late] - Node getContents(DataFlow::ContentSet contents) { - // We always use getAStoreContent when generating content edges, and we always use getAReadContent when querying the graph. - result = this.getContent(contents.getAReadContent()) - } + Node getAPredecessor(Label::ApiLabel lbl) { this = result.getASuccessor(lbl) } /** - * Gets a representative for the instance field of the given `name`, which must include the `@` character. - * - * This can be used to find cases where a class accesses the fields used by a base class. - * - * ```ruby - * class A < B - * def m - * @foo # API::getTopLevelMember("B").getInstance().getField("@foo") - * end - * end - * ``` + * Gets a node such that there is an edge in the API graph between this node and the other + * one. */ - pragma[inline] - Node getField(string name) { - // This predicate is currently not 'inline_late' because 'name' can be an input or output - Impl::fieldEdge(this.getAnEpsilonSuccessor(), name, result) - } + Node getAPredecessor() { result = this.getAPredecessor(_) } /** - * Gets a representative for an arbitrary element of this collection. - * - * For example: - * ```ruby - * Foo.bar.each do |x| # API::getTopLevelMember("Foo").getMethod("bar").getReturn().getAnElement() - * end - * - * Foo.bar[0] # API::getTopLevelMember("Foo").getMethod("bar").getReturn().getAnElement() - * ``` + * Gets a node such that there is an edge in the API graph between that other node and + * this one. */ - bindingset[this] - pragma[inline_late] - Node getAnElement() { Impl::elementEdge(this.getAnEpsilonSuccessor(), result) } + Node getASuccessor() { result = this.getASuccessor(_) } /** * Gets the data-flow node that gives rise to this node, if any. */ DataFlow::Node getInducingNode() { - this = Impl::MkMethodAccessNode(result) or - this = Impl::MkBackwardNode(result, _) or - this = Impl::MkForwardNode(result, _) or - this = Impl::MkSinkNode(result) + this = Impl::MkUse(result) + or + this = Impl::MkDef(result) + or + this = Impl::MkMethodAccessNode(result) } /** Gets the location of this node. */ @@ -592,14 +360,13 @@ module API { result = this.getInducingNode().getLocation() or exists(DataFlow::ModuleNode mod | - this = Impl::MkModuleObjectDown(mod) - or - this = Impl::MkModuleInstanceUp(mod) - | + this = Impl::MkModuleObject(mod) and result = mod.getLocation() ) or - this instanceof RootNode and + // For nodes that do not have a meaningful location, `path` is the empty string and all other + // parameters are zero. + not exists(this.getInducingNode()) and result instanceof EmptyLocation } @@ -609,181 +376,116 @@ module API { string toString() { none() } /** - * Gets a node representing a (direct or indirect) subclass of the class represented by this node. - * ```rb - * class A; end - * class B < A; end - * class C < B; end - * ``` - * In the example above, `getMember("A").getASubclass()` will return uses of `A`, `B` and `C`. - */ - pragma[inline] - deprecated Node getASubclass() { result = this } - - /** - * Gets a node representing a direct subclass of the class represented by this node. - * ```rb - * class A; end - * class B < A; end - * class C < B; end - * ``` - * In the example above, `getMember("A").getAnImmediateSubclass()` will return uses of `B` only. + * Gets a path of the given `length` from the root to this node. */ - pragma[inline] - deprecated Node getAnImmediateSubclass() { - result = this.asModule().getAnImmediateDescendent().trackModule() + private string getAPath(int length) { + this instanceof Impl::MkRoot and + length = 0 and + result = "" + or + exists(Node pred, Label::ApiLabel lbl, string predpath | + Impl::edge(pred, lbl, this) and + predpath = pred.getAPath(length - 1) and + exists(string dot | if length = 1 then dot = "" else dot = "." | + result = predpath + dot + lbl and + // avoid producing strings longer than 1MB + result.length() < 1000 * 1000 + ) + ) and + length in [1 .. Impl::distanceFromRoot(this)] } - /** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */ - deprecated DataFlow::Node getAUse() { result = this.getAValueReachableFromSource() } - - /** DEPRECATED. This predicate has been renamed to `asSource()`. */ - deprecated DataFlow::LocalSourceNode getAnImmediateUse() { result = this.asSource() } - - /** DEPRECATED. This predicate has been renamed to `asSink()`. */ - deprecated DataFlow::Node getARhs() { result = this.asSink() } - - /** DEPRECATED. This predicate has been renamed to `getAValueReachingSink()`. */ - deprecated DataFlow::Node getAValueReachingRhs() { result = this.getAValueReachingSink() } + /** Gets the shortest distance from the root to this node in the API graph. */ + int getDepth() { result = Impl::distanceFromRoot(this) } + } + /** Companion module to the `Node` class. */ + module Node { /** - * DEPRECATED. API graph nodes are no longer associated with specific paths. + * INTERNAL USE ONLY. * - * Gets a string representation of the lexicographically least among all shortest access paths - * from the root to this node. + * An API node, with some internal predicates exposed. */ - deprecated string getPath() { none() } + class Internal extends Node { + /** + * INTERNAL USE ONLY. + * + * Same as `asSource()` but without join-order hints. + */ + cached + DataFlow::LocalSourceNode asSourceInternal() { + Impl::forceCachingInSameStage() and + Impl::use(this, result) + } - /** - * DEPRECATED. Use label-specific predicates in this class, such as `getMember`, instead of using `getASuccessor`. - * - * Gets a node such that there is an edge in the API graph between this node and the other - * one, and that edge is labeled with `lbl`. - */ - pragma[inline] - deprecated Node getASuccessor(Label::ApiLabel lbl) { - labelledEdge(this.getAnEpsilonSuccessor(), lbl, result) - } - - /** - * DEPRECATED. API graphs no longer support backward traversal of edges. If possible use `.backtrack()` to get - * a node intended for backtracking. - * - * Gets a node such that there is an edge in the API graph between that other node and - * this one, and that edge is labeled with `lbl` - */ - deprecated Node getAPredecessor(Label::ApiLabel lbl) { this = result.getASuccessor(lbl) } - - /** - * DEPRECATED. API graphs no longer support backward traversal of edges. If possible use `.backtrack()` to get - * a node intended for backtracking. - * - * Gets a node such that there is an edge in the API graph between this node and the other - * one. - */ - deprecated Node getAPredecessor() { result = this.getAPredecessor(_) } - - /** - * Gets a node such that there is an edge in the API graph between that other node and - * this one. - */ - pragma[inline] - deprecated Node getASuccessor() { result = this.getASuccessor(_) } - - /** DEPRECATED. API graphs are no longer associated with a depth. */ - deprecated int getDepth() { none() } - - pragma[inline] - private Node getAnEpsilonSuccessor() { result = getAnEpsilonSuccessorInline(this) } - } - - /** DEPRECATED. Use `API::root()` to access the root node. */ - deprecated class Root = RootNode; - - /** DEPRECATED. A node corresponding to the use of an API component. */ - deprecated class Use = ForwardNode; - - /** DEPRECATED. A node corresponding to a value escaping into an API component. */ - deprecated class Def = SinkNode; - - /** The root node of an API graph. */ - private class RootNode extends Node, Impl::MkRoot { - override string toString() { result = "Root()" } - } - - /** A node representing a given type-tracking state when tracking forwards. */ - private class ForwardNode extends Node, Impl::MkForwardNode { - private DataFlow::LocalSourceNode node; - private TypeTracker tracker; + /** + * Same as `getMember` but without join-order hints. + */ + cached + Node getMemberInternal(string m) { + Impl::forceCachingInSameStage() and + result = this.getASuccessor(Label::member(m)) + } - ForwardNode() { this = Impl::MkForwardNode(node, tracker) } + /** + * Same as `getMethod` but without join-order hints. + */ + cached + MethodAccessNode getMethodInternal(string method) { + Impl::forceCachingInSameStage() and + result = this.getASubclass().getASuccessor(Label::method(method)) + } - override string toString() { - if tracker.start() - then result = "ForwardNode(" + node + ")" - else result = "ForwardNode(" + node + ", " + tracker + ")" + /** + * INTERNAL USE ONLY. + * + * Same as `getReturn()` but without join-order hints. + */ + cached + Node getReturnInternal() { + Impl::forceCachingInSameStage() and result = this.getASuccessor(Label::return()) + } } } - /** A node representing a given type-tracking state when tracking backwards. */ - private class BackwardNode extends Node, Impl::MkBackwardNode { - private DataFlow::LocalSourceNode node; - private TypeTracker tracker; - - BackwardNode() { this = Impl::MkBackwardNode(node, tracker) } - - override string toString() { - if tracker.start() - then result = "BackwardNode(" + node + ")" - else result = "BackwardNode(" + node + ", " + tracker + ")" - } + bindingset[node] + pragma[inline_late] + private DataFlow::Node getAValueReachableFromSourceInline(Node node) { + exists(DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode dst | + Impl::use(node, pragma[only_bind_into](src)) and + pragma[only_bind_into](dst) = Impl::trackUseNode(src) and + dst.flowsTo(result) + ) } - /** A node representing a module/class object with epsilon edges to its descendents. */ - private class ModuleObjectDownNode extends Node, Impl::MkModuleObjectDown { - /** Gets the module represented by this API node. */ - DataFlow::ModuleNode getModule() { this = Impl::MkModuleObjectDown(result) } - - override string toString() { result = "ModuleObjectDown(" + this.getModule() + ")" } + /** The root node of an API graph. */ + class Root extends Node, Impl::MkRoot { + override string toString() { result = "root" } } - /** A node representing a module/class object with epsilon edges to its ancestors. */ - private class ModuleObjectUpNode extends Node, Impl::MkModuleObjectUp { - /** Gets the module represented by this API node. */ - DataFlow::ModuleNode getModule() { this = Impl::MkModuleObjectUp(result) } - - override string toString() { result = "ModuleObjectUp(" + this.getModule() + ")" } + private string tryGetPath(Node node) { + result = node.getPath() + or + not exists(node.getPath()) and + result = "with no path" } - /** A node representing instances of a module/class with epsilon edges to its ancestors. */ - private class ModuleInstanceUpNode extends Node, Impl::MkModuleInstanceUp { - /** Gets the module whose instances are represented by this API node. */ - DataFlow::ModuleNode getModule() { this = Impl::MkModuleInstanceUp(result) } - - override string toString() { result = "ModuleInstanceUp(" + this.getModule() + ")" } + /** A node corresponding to the use of an API component. */ + class Use extends Node, Impl::MkUse { + override string toString() { result = "Use " + tryGetPath(this) } } - /** A node representing instances of a module/class with epsilon edges to its descendents. */ - private class ModuleInstanceDownNode extends Node, Impl::MkModuleInstanceDown { - /** Gets the module whose instances are represented by this API node. */ - DataFlow::ModuleNode getModule() { this = Impl::MkModuleInstanceDown(result) } - - override string toString() { result = "ModuleInstanceDown(" + this.getModule() + ")" } + /** A node corresponding to a value escaping into an API component. */ + class Def extends Node, Impl::MkDef { + override string toString() { result = "Def " + tryGetPath(this) } } /** A node corresponding to the method being invoked at a method call. */ class MethodAccessNode extends Node, Impl::MkMethodAccessNode { - override string toString() { result = "MethodAccessNode(" + this.asCall() + ")" } - } + override string toString() { result = "MethodAccessNode " + tryGetPath(this) } - /** - * A node corresponding to an argument, right-hand side of a store, or return value from a callable. - * - * Such a node may serve as the starting-point of backtracking, and has epsilon edges going to - * the backward nodes corresponding to `getALocalSource`. - */ - private class SinkNode extends Node, Impl::MkSinkNode { - override string toString() { result = "SinkNode(" + this.getInducingNode() + ")" } + /** Gets the call node corresponding to this method access. */ + DataFlow::CallNode getCallNode() { this = Impl::MkMethodAccessNode(result) } } /** @@ -797,8 +499,6 @@ module API { * additional entry points may be added by extending this class. */ abstract class EntryPoint extends string { - // Note: this class can be deprecated in Ruby, but is still referenced by shared code in ApiGraphModels.qll, - // where it can't be removed since other languages are still dependent on the EntryPoint class. bindingset[this] EntryPoint() { any() } @@ -818,7 +518,7 @@ module API { DataFlow::CallNode getACall() { none() } /** Gets an API-node for this entry point. */ - API::Node getANode() { Impl::entryPointEdge(this, result) } + API::Node getANode() { result = root().getASuccessor(Label::entryPoint(this)) } } // Ensure all entry points are imported from ApiGraphs.qll @@ -827,590 +527,389 @@ module API { } /** Gets the root node. */ - Node root() { result instanceof RootNode } + Root root() { any() } /** - * Gets an access to the top-level constant `name`. + * Gets a node corresponding to a top-level member `m` (typically a module). + * + * This is equivalent to `root().getAMember("m")`. * - * To access nested constants, use `getMember()` on the resulting node. For example, for nodes corresponding to the class `Gem::Version`, + * Note: You should only use this predicate for top level modules or classes. If you want nodes corresponding to a nested module or class, + * you should use `.getMember` on the parent module/class. For example, for nodes corresponding to the class `Gem::Version`, * use `getTopLevelMember("Gem").getMember("Version")`. */ - pragma[inline] - Node getTopLevelMember(string name) { Impl::topLevelMember(name, result) } + cached + Node getTopLevelMember(string m) { + Impl::forceCachingInSameStage() and result = root().(Node::Internal).getMemberInternal(m) + } /** - * Gets an unqualified call at the top-level with the given method name. + * Provides the actual implementation of API graphs, cached for performance. + * + * Ideally, we'd like nodes to correspond to (global) access paths, with edge labels + * corresponding to extending the access path by one element. We also want to be able to map + * nodes to their definitions and uses in the data-flow graph, and this should happen modulo + * (inter-procedural) data flow. + * + * This, however, is not easy to implement, since access paths can have unbounded length + * and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing + * a condition like "this node hasn't been involved in constructing any predecessor of + * this node in the API graph" without negative recursion is tricky. + * + * So instead most nodes are directly associated with a data-flow node, representing + * either a use or a definition of an API component. This ensures that we only have a finite + * number of nodes. However, we can now have multiple nodes with the same access + * path, which are essentially indistinguishable for a client of the API. + * + * On the other hand, a single node can have multiple access paths (which is, of + * course, unavoidable). We pick as canonical the alphabetically least access path with + * shortest length. */ - pragma[inline] - MethodAccessNode getTopLevelCall(string name) { Impl::toplevelCall(name, result) } - - pragma[nomagic] - private predicate isReachable(DataFlow::LocalSourceNode node, TypeTracker t) { - t.start() and exists(node) - or - exists(DataFlow::LocalSourceNode prev, TypeTracker t2 | - isReachable(prev, t2) and - node = prev.track(t2, t) and - notSelfParameter(node) - ) - } - - bindingset[node] - pragma[inline_late] - private predicate notSelfParameter(DataFlow::Node node) { - not node instanceof DataFlow::SelfParameterNode - } - - private module SharedArg implements ApiGraphSharedSig { - class ApiNode = Node; + cached + private module Impl { + cached + predicate forceCachingInSameStage() { any() } - ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { - result = Impl::MkForwardNode(node, t) + cached + predicate forceCachingBackref() { + 1 = 1 + or + exists(getTopLevelMember(_)) + or + exists( + any(Node n) + .(Node::Internal) + .getMemberInternal("foo") + .getAMember() + .(Node::Internal) + .getMethodInternal("foo") + .(Node::Internal) + .getReturnInternal() + .getParameter(0) + .getKeywordParameter("foo") + .getBlock() + .getAnImmediateSubclass() + .getContent(_) + .getField(_) + .getAnElement() + .(Node::Internal) + .asSourceInternal() + ) } - ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { - result = Impl::MkBackwardNode(node, t) - } + cached + newtype TApiNode = + /** The root of the API graph. */ + MkRoot() or + /** The method accessed at `call`, synthetically treated as a separate object. */ + MkMethodAccessNode(DataFlow::CallNode call) { isUse(call) } or + /** A use of an API member at the node `nd`. */ + MkUse(DataFlow::Node nd) { isUse(nd) } or + /** A value that escapes into an external library at the node `nd` */ + MkDef(DataFlow::Node nd) { isDef(nd) } or + /** A module object seen as a use node. */ + MkModuleObject(DataFlow::ModuleNode mod) - ApiNode getSinkNode(DataFlow::Node node) { result = Impl::MkSinkNode(node) } + private string resolveTopLevel(ConstantReadAccess read) { + result = read.getModule().getQualifiedName() and + not result.matches("%::%") + } + /** + * Holds if `ref` is a use of a node that should have an incoming edge from the root + * node labeled `lbl` in the API graph (not including those from API::EntryPoint). + */ pragma[nomagic] - predicate specificEpsilonEdge(ApiNode pred, ApiNode succ) { - exists(DataFlow::ModuleNode mod | - moduleReferenceEdge(mod, pred, succ) - or - moduleInheritanceEdge(mod, pred, succ) + private predicate useRoot(Label::ApiLabel lbl, DataFlow::Node ref) { + exists(string name, ConstantReadAccess read | + read = ref.asExpr().getExpr() and + lbl = Label::member(read.getName()) + | + name = resolveTopLevel(read) or - pred = getForwardEndNode(getSuperClassNode(mod)) and - succ = Impl::MkModuleObjectDown(mod) + name = read.getName() and + not exists(resolveTopLevel(read)) and + not exists(read.getScopeExpr()) ) - or - implicitCallEdge(pred, succ) - or - exists(DataFlow::HashLiteralNode splat | hashSplatEdge(splat, pred, succ)) } /** - * Holds if the epsilon edge `pred -> succ` should be generated, to handle inheritance relations of `mod`. + * Holds if `ref` is a use of a node that should have an incoming edge labeled `lbl`, + * from a use node that flows to `node`. */ - pragma[inline] - private predicate moduleInheritanceEdge(DataFlow::ModuleNode mod, ApiNode pred, ApiNode succ) { - pred = Impl::MkModuleObjectDown(mod) and - succ = Impl::MkModuleObjectDown(mod.getAnImmediateDescendent()) - or - pred = Impl::MkModuleInstanceDown(mod) and - succ = Impl::MkModuleInstanceDown(mod.getAnImmediateDescendent()) - or - exists(DataFlow::ModuleNode ancestor | - ancestor = mod.getAnImmediateAncestor() and - // Restrict flow back to Object to avoid spurious flow for methods that happen - // to exist on Object, such as top-level methods. - not ancestor.getQualifiedName() = "Object" - | - pred = Impl::MkModuleInstanceUp(mod) and - succ = Impl::MkModuleInstanceUp(ancestor) - or - pred = Impl::MkModuleObjectUp(mod) and - succ = Impl::MkModuleObjectUp(ancestor) + private predicate useStep(Label::ApiLabel lbl, DataFlow::Node node, DataFlow::Node ref) { + // // Referring to an attribute on a node that is a use of `base`: + // pred = `Rails` part of `Rails::Whatever` + // lbl = `Whatever` + // ref = `Rails::Whatever` + exists(ExprNodes::ConstantAccessCfgNode c, ConstantReadAccess read | + not exists(resolveTopLevel(read)) and + node.asExpr() = c.getScopeExpr() and + lbl = Label::member(read.getName()) and + ref.asExpr() = c and + read = c.getExpr() ) or - // Due to multiple inheritance, allow upwards traversal after downward traversal, - // so we can detect calls sideways in the hierarchy. - // Note that a similar case does not exist for ModuleObject since singleton methods are only inherited - // from the superclass, and there can only be one superclass. - pred = Impl::MkModuleInstanceDown(mod) and - succ = Impl::MkModuleInstanceUp(mod) + exists(TypeTrackerSpecific::TypeTrackerContent c | + TypeTrackerSpecific::basicLoadStep(node, ref, c) and + lbl = Label::content(c.getAStoreContent()) and + not c.isSingleton(any(DataFlow::Content::AttributeNameContent k)) + ) + // note: method calls are not handled here as there is no DataFlow::Node for the intermediate MkMethodAccessNode API node } /** - * Holds if the epsilon `pred -> succ` should be generated, to associate `mod` with its references in the codebase. + * Holds if `rhs` is a definition of a node that should have an incoming edge labeled `lbl`, + * from a def node that is reachable from `node`. */ - bindingset[mod] - pragma[inline_late] - private predicate moduleReferenceEdge(DataFlow::ModuleNode mod, ApiNode pred, ApiNode succ) { - pred = Impl::MkModuleObjectDown(mod) and - succ = getForwardStartNode(getAModuleReference(mod)) + private predicate defStep(Label::ApiLabel lbl, DataFlow::Node node, DataFlow::Node rhs) { + exists(TypeTrackerSpecific::TypeTrackerContent c | + TypeTrackerSpecific::basicStoreStep(rhs, node, c) and + lbl = Label::content(c.getAStoreContent()) + ) + } + + pragma[nomagic] + private predicate isUse(DataFlow::Node nd) { + useRoot(_, nd) or - pred = getBackwardEndNode(getAModuleReference(mod)) and - ( - succ = Impl::MkModuleObjectUp(mod) - or - succ = Impl::MkModuleObjectDown(mod) + exists(DataFlow::Node node | + useCandFwd().flowsTo(node) and + useStep(_, node, nd) ) or - pred = Impl::MkModuleInstanceUp(mod) and - succ = getAModuleInstanceUseNode(mod) + useCandFwd().flowsTo(nd.(DataFlow::CallNode).getReceiver()) or - pred = getAModuleInstanceDefNode(mod) and - succ = Impl::MkModuleInstanceUp(mod) + parameterStep(_, defCand(), nd) or - pred = getAModuleDescendentInstanceDefNode(mod) and - succ = Impl::MkModuleInstanceDown(mod) + nd = any(EntryPoint entry).getASource() + or + nd = any(EntryPoint entry).getACall() } /** - * Holds if the epsilon step `pred -> succ` should be generated to account for the fact that `getMethod("call")` - * may be omitted when dealing with blocks, lambda, or procs. - * - * For example, a block may be invoked by a `yield`, or can be converted to a proc and then invoked via `.call`. - * To simplify this, the implicit proc conversion is seen as a no-op and the `.call` is omitted. + * Holds if `ref` is a use of node `nd`. */ - pragma[nomagic] - private predicate implicitCallEdge(ApiNode pred, ApiNode succ) { - // Step from &block parameter to yield call without needing `getMethod("call")`. - exists(DataFlow::MethodNode method | - pred = getForwardEndNode(method.getBlockParameter()) and - succ = Impl::MkMethodAccessNode(method.getABlockCall()) - ) - or - // Step from x -> x.call (the call itself, not its return value), without needing `getMethod("call")`. - exists(DataFlow::CallNode call | - call.getMethodName() = "call" and - pred = getForwardEndNode(getALocalSourceStrict(call.getReceiver())) and - succ = Impl::MkMethodAccessNode(call) - ) + cached + predicate use(TApiNode nd, DataFlow::Node ref) { + nd = MkUse(ref) or exists(DataFlow::ModuleNode mod | - // Step from module/class to its own `call` method without needing `getMethod("call")`. - (pred = Impl::MkModuleObjectDown(mod) or pred = Impl::MkModuleObjectUp(mod)) and - succ = getBackwardEndNode(mod.getOwnSingletonMethod("call")) - or - pred = Impl::MkModuleInstanceUp(mod) and - succ = getBackwardEndNode(mod.getOwnInstanceMethod("call")) + nd = MkModuleObject(mod) and + ref = mod.getAnImmediateReference() ) } - pragma[nomagic] - private DataFlow::Node getHashSplatArgument(DataFlow::HashLiteralNode literal) { - result = DataFlowPrivate::TSynthHashSplatArgumentNode(literal.asExpr()) - } - /** - * Holds if the epsilon edge `pred -> succ` should be generated to account for the members of a hash literal. - * - * This currently exists because hash literals are desugared to `Hash.[]` calls, whose summary relies on `WithContent`. - * However, `contentEdge` does not currently generate edges for `WithContent` steps. + * Holds if `rhs` is a RHS of node `nd`. */ - bindingset[literal] - pragma[inline_late] - private predicate hashSplatEdge(DataFlow::HashLiteralNode literal, ApiNode pred, ApiNode succ) { - exists(TypeTracker t | - pred = Impl::MkForwardNode(getALocalSourceStrict(getHashSplatArgument(literal)), t) and - succ = Impl::MkForwardNode(pragma[only_bind_out](literal), pragma[only_bind_out](t)) - or - succ = Impl::MkBackwardNode(getALocalSourceStrict(getHashSplatArgument(literal)), t) and - pred = Impl::MkBackwardNode(pragma[only_bind_out](literal), pragma[only_bind_out](t)) - ) + cached + predicate def(TApiNode nd, DataFlow::Node rhs) { nd = MkDef(rhs) } + + /** Gets a node reachable from a use-node. */ + private DataFlow::LocalSourceNode useCandFwd(TypeTracker t) { + t.start() and + isUse(result) + or + exists(TypeTracker t2 | result = useCandFwd(t2).track(t2, t)) } - pragma[nomagic] - private DataFlow::LocalSourceNode getAModuleReference(DataFlow::ModuleNode mod) { - result = mod.getAnImmediateReference() + /** Gets a node reachable from a use-node. */ + private DataFlow::LocalSourceNode useCandFwd() { result = useCandFwd(TypeTracker::end()) } + + private predicate isDef(DataFlow::Node rhs) { + // If a call node is relevant as a use-node, treat its arguments as def-nodes + argumentStep(_, useCandFwd(), rhs) + or + defStep(_, defCand(), rhs) + or + rhs = any(EntryPoint entry).getASink() + } + + /** Gets a data flow node that flows to the RHS of a def-node. */ + private DataFlow::LocalSourceNode defCand(TypeBackTracker t) { + t.start() and + exists(DataFlow::Node rhs | + isDef(rhs) and + result = rhs.getALocalSource() + ) or - mod.getAnAncestor().getAnOwnInstanceSelf() = getANodeReachingClassCall(result) + exists(TypeBackTracker t2 | result = defCand(t2).backtrack(t2, t)) } + /** Gets a data flow node that flows to the RHS of a def-node. */ + private DataFlow::LocalSourceNode defCand() { result = defCand(TypeBackTracker::end()) } + /** - * Gets an API node that may refer to an instance of `mod`. + * Holds if there should be a `lbl`-edge from the given call to an argument. */ - bindingset[mod] - pragma[inline_late] - private ApiNode getAModuleInstanceUseNode(DataFlow::ModuleNode mod) { - result = getForwardStartNode(mod.getAnOwnInstanceSelf()) + pragma[nomagic] + private predicate argumentStep( + Label::ApiLabel lbl, DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument + ) { + exists(DataFlowDispatch::ArgumentPosition argPos | + argument.sourceArgumentOf(call.asExpr(), argPos) and + lbl = Label::getLabelFromArgumentPosition(argPos) + ) } /** - * Gets a node that can be backtracked to an instance of `mod`. + * Holds if there should be a `lbl`-edge from the given callable to a parameter. */ - bindingset[mod] - pragma[inline_late] - private ApiNode getAModuleInstanceDefNode(DataFlow::ModuleNode mod) { - result = getBackwardEndNode(mod.getAnImmediateReference().getAMethodCall("new")) + pragma[nomagic] + private predicate parameterStep( + Label::ApiLabel lbl, DataFlow::Node callable, DataFlowPrivate::ParameterNodeImpl paramNode + ) { + exists(DataFlowDispatch::ParameterPosition paramPos | + paramNode.isSourceParameterOf(callable.asExpr().getExpr(), paramPos) and + lbl = Label::getLabelFromParameterPosition(paramPos) + ) } /** - * Gets a node that can be backtracked to an instance of `mod` or any of its descendents. + * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. + * + * The flow from `src` to the returned node may be inter-procedural. */ - bindingset[mod] - pragma[inline_late] - private ApiNode getAModuleDescendentInstanceDefNode(DataFlow::ModuleNode mod) { - result = getBackwardEndNode(mod.getAnOwnInstanceSelf()) + private DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src, TypeTracker t) { + result = src and + isUse(src) and + t.start() + or + exists(TypeTracker t2 | + result = trackUseNode(src, t2).track(t2, t) and + not result instanceof DataFlow::SelfParameterNode + ) } /** - * Holds if `superclass` is the superclass of `mod`. + * Gets a data-flow node to which `src`, which is a use of an API-graph node, flows. + * + * The flow from `src` to the returned node may be inter-procedural. */ - pragma[nomagic] - private DataFlow::LocalSourceNode getSuperClassNode(DataFlow::ModuleNode mod) { - result.getALocalUse().asExpr().getExpr() = - mod.getADeclaration().(ClassDeclaration).getSuperclassExpr() + cached + DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) { + result = trackUseNode(src, TypeTracker::end()) } - /** Gets a node that can reach the receiver of the given `.class` call. */ - private DataFlow::LocalSourceNode getANodeReachingClassCall( - DataFlow::CallNode call, TypeBackTracker t - ) { + /** Gets a data flow node reaching the RHS of the given def node. */ + private DataFlow::LocalSourceNode trackDefNode(DataFlow::Node rhs, TypeBackTracker t) { t.start() and - call.getMethodName() = "class" and - result = getALocalSourceStrict(call.getReceiver()) + isDef(rhs) and + result = rhs.getALocalSource() or - exists(DataFlow::LocalSourceNode prev, TypeBackTracker t2 | - prev = getANodeReachingClassCall(call, t2) and - result = prev.backtrack(t2, t) and - notSelfParameter(prev) + exists(TypeBackTracker t2, DataFlow::LocalSourceNode mid | + mid = trackDefNode(rhs, t2) and + not mid instanceof DataFlow::SelfParameterNode and + result = mid.backtrack(t2, t) ) } - /** Gets a node that can reach the receiver of the given `.class` call. */ - private DataFlow::LocalSourceNode getANodeReachingClassCall(DataFlow::CallNode call) { - result = getANodeReachingClassCall(call, TypeBackTracker::end()) - } - } - - /** INTERNAL USE ONLY. */ - module Internal { - private module Shared = ApiGraphShared; - - import Shared - - /** Gets the API node corresponding to the module/class object for `mod`. */ - bindingset[mod] - pragma[inline_late] - Node getModuleNode(DataFlow::ModuleNode mod) { result = Impl::MkModuleObjectDown(mod) } - - /** Gets the API node corresponding to instances of `mod`. */ - bindingset[mod] - pragma[inline_late] - Node getModuleInstance(DataFlow::ModuleNode mod) { result = getModuleNode(mod).getInstance() } - } - - private import Internal - import Internal::Public - - cached - private module Impl { + /** Gets a data flow node reaching the RHS of the given def node. */ cached - newtype TApiNode = - /** The root of the API graph. */ - MkRoot() or - /** The method accessed at `call`, synthetically treated as a separate object. */ - MkMethodAccessNode(DataFlow::CallNode call) or - /** The module object `mod` with epsilon edges to its ancestors. */ - MkModuleObjectUp(DataFlow::ModuleNode mod) or - /** The module object `mod` with epsilon edges to its descendents. */ - MkModuleObjectDown(DataFlow::ModuleNode mod) or - /** Instances of `mod` with epsilon edges to its ancestors. */ - MkModuleInstanceUp(DataFlow::ModuleNode mod) or - /** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */ - MkModuleInstanceDown(DataFlow::ModuleNode mod) or - /** Intermediate node for following forward data flow. */ - MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or - /** Intermediate node for following backward data flow. */ - MkBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or - MkSinkNode(DataFlow::Node node) { needsSinkNode(node) } - - private predicate needsSinkNode(DataFlow::Node node) { - node instanceof DataFlowPrivate::ArgumentNode - or - TypeTrackerSpecific::basicStoreStep(node, _, _) - or - node = any(DataFlow::CallableNode callable).getAReturnNode() - or - node = any(EntryPoint e).getASink() + DataFlow::LocalSourceNode trackDefNode(DataFlow::Node rhs) { + result = trackDefNode(rhs, TypeBackTracker::end()) } - bindingset[e] - pragma[inline_late] - private DataFlow::Node getNodeFromExpr(Expr e) { result.asExpr().getExpr() = e } - - private string resolveTopLevel(ConstantReadAccess read) { - result = read.getModule().getQualifiedName() and - not result.matches("%::%") + pragma[nomagic] + private predicate useNodeReachesReceiver(DataFlow::Node use, DataFlow::CallNode call) { + trackUseNode(use).flowsTo(call.getReceiver()) } /** - * Holds `pred` should have a member edge to `mod`. + * Holds if `superclass` is the superclass of `mod`. */ pragma[nomagic] - private predicate moduleScope(DataFlow::ModuleNode mod, Node pred, string name) { - exists(Namespace namespace | - name = namespace.getName() and - namespace = mod.getADeclaration() - | - exists(DataFlow::Node scopeNode | - scopeNode.asExpr().getExpr() = namespace.getScopeExpr() and - pred = getForwardEndNode(getALocalSourceStrict(scopeNode)) - ) - or - not exists(namespace.getScopeExpr()) and - if namespace.hasGlobalScope() or namespace.getEnclosingModule() instanceof Toplevel - then pred = MkRoot() - else pred = MkModuleObjectDown(namespace.getEnclosingModule().getModule()) - ) + private predicate superclassNode(DataFlow::ModuleNode mod, DataFlow::Node superclass) { + superclass.asExpr().getExpr() = mod.getADeclaration().(ClassDeclaration).getSuperclassExpr() } + /** + * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. + */ cached - predicate memberEdge(Node pred, string name, Node succ) { - exists(ConstantReadAccess read | succ = getForwardStartNode(getNodeFromExpr(read)) | - name = resolveTopLevel(read) and - pred = MkRoot() + predicate edge(TApiNode pred, Label::ApiLabel lbl, TApiNode succ) { + /* Every node that is a use of an API component is itself added to the API graph. */ + exists(DataFlow::LocalSourceNode ref | succ = MkUse(ref) | + pred = MkRoot() and + useRoot(lbl, ref) or - not exists(resolveTopLevel(read)) and - not exists(read.getScopeExpr()) and - name = read.getName() and - pred = MkRoot() + exists(DataFlow::Node node, DataFlow::Node src | + use(pred, src) and + trackUseNode(src).flowsTo(node) and + useStep(lbl, node, ref) + ) or - pred = getForwardEndNode(getALocalSourceStrict(getNodeFromExpr(read.getScopeExpr()))) and - name = read.getName() + exists(DataFlow::Node callback | + def(pred, callback) and + parameterStep(lbl, trackDefNode(callback), ref) + ) ) or - exists(DataFlow::ModuleNode mod | - moduleScope(mod, pred, name) and - (succ = MkModuleObjectDown(mod) or succ = MkModuleObjectUp(mod)) - ) - } - - cached - predicate topLevelMember(string name, Node node) { memberEdge(root(), name, node) } - - cached - predicate toplevelCall(string name, Node node) { - exists(DataFlow::CallNode call | - call.asExpr().getExpr().getCfgScope() instanceof Toplevel and - call.getMethodName() = name and - node = MkMethodAccessNode(call) + exists(DataFlow::Node predNode, DataFlow::Node succNode | + def(pred, predNode) and + succ = MkDef(succNode) and + defStep(lbl, trackDefNode(predNode), succNode) ) - } - - cached - predicate anyMemberEdge(Node pred, Node succ) { memberEdge(pred, _, succ) } - - cached - predicate methodEdge(Node pred, string name, Node succ) { - exists(DataFlow::ModuleNode mod, DataFlow::CallNode call | - // Treat super calls as if they were calls to the module object/instance. - succ = MkMethodAccessNode(call) and - name = call.getMethodName() - | - pred = MkModuleObjectDown(mod) and - call = mod.getAnOwnSingletonMethod().getASuperCall() - or - pred = MkModuleInstanceUp(mod) and - call = mod.getAnOwnInstanceMethod().getASuperCall() + or + exists(DataFlow::Node predNode, DataFlow::Node superclassNode, DataFlow::ModuleNode mod | + use(pred, predNode) and + trackUseNode(predNode).flowsTo(superclassNode) and + superclassNode(mod, superclassNode) and + succ = MkModuleObject(mod) and + lbl = Label::subclass() ) or exists(DataFlow::CallNode call | // from receiver to method call node - pred = getForwardEndNode(getALocalSourceStrict(call.getReceiver())) and - succ = MkMethodAccessNode(call) and - name = call.getMethodName() - ) - or - exists(DataFlow::ModuleNode mod | - (pred = MkModuleObjectDown(mod) or pred = MkModuleObjectUp(mod)) and - succ = getBackwardStartNode(mod.getOwnSingletonMethod(name)) - or - pred = MkModuleInstanceUp(mod) and - succ = getBackwardStartNode(mod.getOwnInstanceMethod(name)) - ) - } - - cached - predicate asCallable(Node apiNode, DataFlow::CallableNode callable) { - apiNode = getBackwardStartNode(callable) - } - - cached - predicate contentEdge(Node pred, DataFlow::Content content, Node succ) { - exists( - DataFlow::Node object, DataFlow::Node value, TypeTrackerSpecific::TypeTrackerContent c - | - TypeTrackerSpecific::basicLoadStep(object, value, c) and - content = c.getAStoreContent() and - not c.isSingleton(any(DataFlow::Content::AttributeNameContent k)) and - // `x -> x.foo` with content "foo" - pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and - succ = getForwardStartNode(value) + exists(DataFlow::Node receiver | + use(pred, receiver) and + useNodeReachesReceiver(receiver, call) and + lbl = Label::method(call.getMethodName()) and + succ = MkMethodAccessNode(call) + ) or - // Based on `object.c = value` generate `object -> value` with content `c` - TypeTrackerSpecific::basicStoreStep(value, object, c) and - content = c.getAStoreContent() and - pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and - succ = MkSinkNode(value) - ) - } - - cached - predicate fieldEdge(Node pred, string name, Node succ) { - Impl::contentEdge(pred, DataFlowPrivate::TFieldContent(name), succ) - } - - cached - predicate elementEdge(Node pred, Node succ) { - contentEdge(pred, any(DataFlow::ContentSet set | set.isAnyElement()).getAReadContent(), succ) - } - - cached - predicate parameterEdge(Node pred, DataFlowDispatch::ParameterPosition paramPos, Node succ) { - exists(DataFlowPrivate::ParameterNodeImpl parameter, DataFlow::CallableNode callable | - parameter.isSourceParameterOf(callable.asExpr().getExpr(), paramPos) and - pred = getBackwardEndNode(callable) and - succ = getForwardStartNode(parameter) - ) - } - - cached - predicate argumentEdge(Node pred, DataFlowDispatch::ArgumentPosition argPos, Node succ) { - exists(DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument | - argument.sourceArgumentOf(call.asExpr(), argPos) and + // from method call node to return and arguments pred = MkMethodAccessNode(call) and - succ = MkSinkNode(argument) - ) - } - - cached - predicate positionalArgumentEdge(Node pred, int n, Node succ) { - argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isPositional(n)), succ) - } - - cached - predicate keywordArgumentEdge(Node pred, string name, Node succ) { - argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name)), succ) - } - - private predicate blockArgumentEdge(Node pred, Node succ) { - argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isBlock()), succ) - } - - private predicate positionalParameterEdge(Node pred, int n, Node succ) { - parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isPositional(n)), succ) - } - - private predicate keywordParameterEdge(Node pred, string name, Node succ) { - parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isKeyword(name)), succ) - } - - cached - predicate blockParameterEdge(Node pred, Node succ) { - parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isBlock()), succ) - } - - cached - predicate positionalParameterOrArgumentEdge(Node pred, int n, Node succ) { - positionalArgumentEdge(pred, n, succ) - or - positionalParameterEdge(pred, n, succ) - } - - cached - predicate keywordParameterOrArgumentEdge(Node pred, string name, Node succ) { - keywordArgumentEdge(pred, name, succ) - or - keywordParameterEdge(pred, name, succ) - } - - cached - predicate blockParameterOrArgumentEdge(Node pred, Node succ) { - blockArgumentEdge(pred, succ) - or - blockParameterEdge(pred, succ) - } - - pragma[nomagic] - private predicate newCall(DataFlow::LocalSourceNode receiver, DataFlow::CallNode call) { - call = receiver.getAMethodCall("new") - } - - cached - predicate instanceEdge(Node pred, Node succ) { - exists(DataFlow::ModuleNode mod | - pred = MkModuleObjectDown(mod) and - succ = MkModuleInstanceUp(mod) + ( + lbl = Label::return() and + succ = MkUse(call) + or + exists(DataFlow::Node rhs | + argumentStep(lbl, call, rhs) and + succ = MkDef(rhs) + ) + ) ) or - exists(DataFlow::LocalSourceNode receiver, DataFlow::CallNode call | - newCall(receiver, call) and - pred = getForwardEndNode(receiver) and - succ = getForwardStartNode(call) + exists(EntryPoint entry | + pred = root() and + lbl = Label::entryPoint(entry) + | + succ = MkDef(entry.getASink()) + or + succ = MkUse(entry.getASource()) + or + succ = MkMethodAccessNode(entry.getACall()) ) } - cached - predicate returnEdge(Node pred, Node succ) { - exists(DataFlow::CallNode call | - pred = MkMethodAccessNode(call) and - succ = getForwardStartNode(call) - ) - or - exists(DataFlow::CallableNode callable | - pred = getBackwardEndNode(callable) and - succ = MkSinkNode(callable.getAReturnNode()) - ) - } + /** + * Holds if there is an edge from `pred` to `succ` in the API graph. + */ + private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) } + /** Gets the shortest distance from the root to `nd` in the API graph. */ cached - predicate entryPointEdge(EntryPoint entry, Node node) { - node = MkSinkNode(entry.getASink()) or - node = getForwardStartNode(entry.getASource()) or - node = MkMethodAccessNode(entry.getACall()) - } - } - - /** - * Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`. - */ - pragma[nomagic] - deprecated private predicate labelledEdge(Node pred, Label::ApiLabel lbl, Node succ) { - exists(string name | - Impl::memberEdge(pred, name, succ) and - lbl = Label::member(name) - ) - or - exists(string name | - Impl::methodEdge(pred, name, succ) and - lbl = Label::method(name) - ) - or - exists(DataFlow::Content content | - Impl::contentEdge(pred, content, succ) and - lbl = Label::content(content) - ) - or - exists(DataFlowDispatch::ParameterPosition pos | - Impl::parameterEdge(pred, pos, succ) and - lbl = Label::getLabelFromParameterPosition(pos) - ) - or - exists(DataFlowDispatch::ArgumentPosition pos | - Impl::argumentEdge(pred, pos, succ) and - lbl = Label::getLabelFromArgumentPosition(pos) - ) - or - Impl::instanceEdge(pred, succ) and - lbl = Label::instance() - or - Impl::returnEdge(pred, succ) and - lbl = Label::return() - or - exists(EntryPoint entry | - Impl::entryPointEdge(entry, succ) and - pred = root() and - lbl = Label::entryPoint(entry) - ) - } + int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result) - /** - * DEPRECATED. Treating the API graph as an explicit labelled graph is deprecated - instead use the methods on `API:Node` directly. - * - * Provides classes modeling the various edges (labels) in the API graph. - */ - deprecated module Label { /** All the possible labels in the API graph. */ - private newtype TLabel = + cached + newtype TLabel = MkLabelMember(string member) { member = any(ConstantReadAccess a).getName() } or MkLabelMethod(string m) { m = any(DataFlow::CallNode c).getMethodName() } or MkLabelReturn() or - MkLabelInstance() or + MkLabelSubclass() or MkLabelKeywordParameter(string name) { any(DataFlowDispatch::ArgumentPosition arg).isKeyword(name) or @@ -1424,9 +923,12 @@ module API { MkLabelBlockParameter() or MkLabelEntryPoint(EntryPoint name) or MkLabelContent(DataFlow::Content content) + } + /** Provides classes modeling the various edges (labels) in the API graph. */ + module Label { /** A label in the API-graph */ - class ApiLabel extends TLabel { + class ApiLabel extends Impl::TLabel { /** Gets a string representation of this label. */ string toString() { result = "???" } } @@ -1465,9 +967,9 @@ module API { override string toString() { result = "getReturn()" } } - /** A label for getting instances of a module/class. */ - class LabelInstance extends ApiLabel, MkLabelInstance { - override string toString() { result = "getInstance()" } + /** A label for the subclass relationship. */ + class LabelSubclass extends ApiLabel, MkLabelSubclass { + override string toString() { result = "getASubclass()" } } /** A label for a keyword parameter. */ @@ -1535,8 +1037,8 @@ module API { /** Gets the `return` edge label. */ LabelReturn return() { any() } - /** Gets the `instance` edge label. */ - LabelInstance instance() { any() } + /** Gets the `subclass` edge label. */ + LabelSubclass subclass() { any() } /** Gets the label representing the given keyword argument/parameter. */ LabelKeywordParameter keywordParameter(string name) { result.getName() = name } diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll index 7772e5235d76..a98238e85d9c 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll @@ -6,17 +6,12 @@ private import codeql.ruby.typetracking.TypeTracker private import codeql.ruby.dataflow.SSA private import FlowSummaryImpl as FlowSummaryImpl private import SsaImpl as SsaImpl -private import codeql.ruby.ApiGraphs /** * An element, viewed as a node in a data flow graph. Either an expression * (`ExprNode`) or a parameter (`ParameterNode`). */ class Node extends TNode { - /** Starts backtracking from this node using API graphs. */ - pragma[inline] - API::Node backtrack() { result = API::Internal::getNodeForBacktracking(this) } - /** Gets the expression corresponding to this node, if any. */ CfgNodes::ExprCfgNode asExpr() { result = this.(ExprNode).getExprNode() } @@ -81,11 +76,6 @@ class Node extends TNode { result.asCallableAstNode() = c.asCallable() ) } - - /** Gets the enclosing method, if any. */ - MethodNode getEnclosingMethod() { - result.asCallableAstNode() = this.asExpr().getExpr().getEnclosingMethod() - } } /** A data-flow node corresponding to a call in the control-flow graph. */ @@ -154,18 +144,6 @@ class CallNode extends LocalSourceNode, ExprNode { result.asExpr() = pair.getValue() ) } - - /** - * Gets a potential target of this call, if any. - */ - final CallableNode getATarget() { - result.asCallableAstNode() = this.asExpr().getExpr().(Call).getATarget() - } - - /** - * Holds if this is a `super` call. - */ - final predicate isSuperCall() { this.asExpr().getExpr() instanceof SuperCall } } /** @@ -239,10 +217,6 @@ class SelfParameterNode extends ParameterNode instanceof SelfParameterNodeImpl { class LocalSourceNode extends Node { LocalSourceNode() { isLocalSourceNode(this) } - /** Starts tracking this node forward using API graphs. */ - pragma[inline] - API::Node track() { result = API::Internal::getNodeForForwardTracking(this) } - /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ pragma[inline] predicate flowsTo(Node nodeTo) { hasLocalSource(nodeTo, this) } @@ -385,11 +359,6 @@ private module Cached { ) } - cached - predicate methodHasSuperCall(MethodNode method, CallNode call) { - call.isSuperCall() and method = call.getEnclosingMethod() - } - /** * A place in which a named constant can be looked up during constant lookup. */ @@ -418,39 +387,6 @@ private module Cached { result.asExpr().getExpr() = access } - /** - * Gets a module for which `constRef` is the reference to an ancestor module. - * - * For example, `M` is the ancestry target of `C` in the following examples: - * ```rb - * class M < C {} - * - * module M - * include C - * end - * - * module M - * prepend C - * end - * ``` - */ - private ModuleNode getAncestryTarget(ConstRef constRef) { result.getAnAncestorExpr() = constRef } - - /** - * Gets a scope in which a constant lookup may access the contents of the module referenced by `constRef`. - */ - cached - TConstLookupScope getATargetScope(ConstRef constRef) { - result = MkAncestorLookup(getAncestryTarget(constRef).getAnImmediateDescendent*()) - or - constRef.asConstantAccess() = any(ConstantAccess ac).getScopeExpr() and - result = MkQualifiedLookup(constRef.asConstantAccess()) - or - result = MkNestedLookup(getAncestryTarget(constRef)) - or - result = MkExactLookup(constRef.asConstantAccess().(Namespace).getModule()) - } - cached predicate forceCachingInSameStage() { any() } @@ -1092,33 +1028,6 @@ class ModuleNode instanceof Module { * this predicate. */ ModuleNode getNestedModule(string name) { result = super.getNestedModule(name) } - - /** - * Starts tracking the module object using API graphs. - * - * Concretely, this tracks forward from the following starting points: - * - A constant access that resolves to this module. - * - `self` in the module scope or in a singleton method of the module. - * - A call to `self.class` in an instance method of this module or an ancestor module. - */ - bindingset[this] - pragma[inline] - API::Node trackModule() { result = API::Internal::getModuleNode(this) } - - /** - * Starts tracking instances of this module forward using API graphs. - * - * Concretely, this tracks forward from the following starting points: - * - `self` in instance methods of this module and ancestor modules - * - Calls to `new` on the module object - * - * Note that this includes references to `self` in ancestor modules, but not in descendent modules. - * This is usually the desired behavior, particularly if this module was itself found using - * a call to `getADescendentModule()`. - */ - bindingset[this] - pragma[inline] - API::Node trackInstance() { result = API::Internal::getModuleInstance(this) } } /** @@ -1307,9 +1216,6 @@ class MethodNode extends CallableNode { /** Holds if this method is protected. */ predicate isProtected() { this.asCallableAstNode().isProtected() } - - /** Gets a `super` call in this method. */ - CallNode getASuperCall() { methodHasSuperCall(this, result) } } /** @@ -1428,6 +1334,24 @@ class ConstRef extends LocalSourceNode { not exists(access.getScopeExpr()) } + /** + * Gets a module for which this constant is the reference to an ancestor module. + * + * For example, `M` is the ancestry target of `C` in the following examples: + * ```rb + * class M < C {} + * + * module M + * include C + * end + * + * module M + * prepend C + * end + * ``` + */ + private ModuleNode getAncestryTarget() { result.getAnAncestorExpr() = this } + /** * Gets the known target module. * @@ -1435,6 +1359,22 @@ class ConstRef extends LocalSourceNode { */ private Module getExactTarget() { result.getAnImmediateReference() = access } + /** + * Gets a scope in which a constant lookup may access the contents of the module referenced by this constant. + */ + cached + private TConstLookupScope getATargetScope() { + forceCachingInSameStage() and + result = MkAncestorLookup(this.getAncestryTarget().getAnImmediateDescendent*()) + or + access = any(ConstantAccess ac).getScopeExpr() and + result = MkQualifiedLookup(access) + or + result = MkNestedLookup(this.getAncestryTarget()) + or + result = MkExactLookup(access.(Namespace).getModule()) + } + /** * Gets the scope expression, or the immediately enclosing `Namespace` (skipping over singleton classes). * @@ -1496,7 +1436,7 @@ class ConstRef extends LocalSourceNode { pragma[inline] ConstRef getConstant(string name) { exists(TConstLookupScope scope | - pragma[only_bind_into](scope) = getATargetScope(pragma[only_bind_out](this)) and + pragma[only_bind_into](scope) = pragma[only_bind_out](this).getATargetScope() and result.accesses(pragma[only_bind_out](scope), name) ) } @@ -1518,9 +1458,7 @@ class ConstRef extends LocalSourceNode { * end * ``` */ - bindingset[this] - pragma[inline_late] - ModuleNode getADescendentModule() { MkAncestorLookup(result) = getATargetScope(this) } + ModuleNode getADescendentModule() { MkAncestorLookup(result) = this.getATargetScope() } } /** diff --git a/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll b/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll index 449e05ad9e86..5c24978c4c38 100644 --- a/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll +++ b/ruby/ql/lib/codeql/ruby/experimental/UnicodeBypassValidationQuery.qll @@ -91,19 +91,19 @@ class Configuration extends TaintTracking::Configuration { // unicode_utils exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("UnicodeUtils").getMethod(["nfkd", "nfc", "nfd", "nfkc"]) and - sink = mac.getArgument(0).asSink() + sink = mac.getParameter(0).asSink() ) or // eprun exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("Eprun").getMethod("normalize") and - sink = mac.getArgument(0).asSink() + sink = mac.getParameter(0).asSink() ) or // unf exists(API::MethodAccessNode mac | mac = API::getTopLevelMember("UNF").getMember("Normalizer").getMethod("normalize") and - sink = mac.getArgument(0).asSink() + sink = mac.getParameter(0).asSink() ) or // ActiveSupport::Multibyte::Chars @@ -113,7 +113,7 @@ class Configuration extends TaintTracking::Configuration { .getMember("Multibyte") .getMember("Chars") .getMethod("new") - .asCall() and + .getCallNode() and n = cn.getAMethodCall("normalize") and sink = cn.getArgument(0) ) diff --git a/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll b/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll index 656ceedbe2b6..e94cabb414c4 100644 --- a/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll +++ b/ruby/ql/lib/codeql/ruby/experimental/ZipSlipCustomizations.qll @@ -89,7 +89,7 @@ module ZipSlip { // If argument refers to a string object, then it's a hardcoded path and // this file is safe. not zipOpen - .asCall() + .getCallNode() .getArgument(0) .getALocalSource() .getConstantValue() diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll index a687837f8fdf..31bdc42e3507 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionController.qll @@ -83,8 +83,8 @@ class ActionControllerClass extends DataFlow::ClassNode { } } -private API::Node actionControllerInstance() { - result = any(ActionControllerClass cls).getSelf().track() +private DataFlow::LocalSourceNode actionControllerInstance() { + result = any(ActionControllerClass cls).getSelf() } /** @@ -222,19 +222,19 @@ private class ActionControllerRenderToCall extends RenderToCallImpl { } } -pragma[nomagic] -private DataFlow::CallNode renderCall() { - // ActionController#render is an alias for ActionController::Renderer#render - result = - [ - any(ActionControllerClass c).trackModule().getAMethodCall("render"), - any(ActionControllerClass c).trackModule().getReturn("renderer").getAMethodCall("render") - ] -} - /** A call to `ActionController::Renderer#render`. */ private class RendererRenderCall extends RenderCallImpl { - RendererRenderCall() { this = renderCall().asExpr().getExpr() } + RendererRenderCall() { + this = + [ + // ActionController#render is an alias for ActionController::Renderer#render + any(ActionControllerClass c).getAnImmediateReference().getAMethodCall("render"), + any(ActionControllerClass c) + .getAnImmediateReference() + .getAMethodCall("renderer") + .getAMethodCall("render") + ].asExpr().getExpr() + } } /** A call to `html_escape` from within a controller. */ @@ -260,7 +260,6 @@ class RedirectToCall extends MethodCall { this = controller .getSelf() - .track() .getAMethodCall(["redirect_to", "redirect_back", "redirect_back_or_to"]) .asExpr() .getExpr() @@ -601,7 +600,9 @@ private module ParamsSummaries { * response. */ private module Response { - API::Node response() { result = actionControllerInstance().getReturn("response") } + DataFlow::LocalSourceNode response() { + result = actionControllerInstance().getAMethodCall("response") + } class BodyWrite extends DataFlow::CallNode, Http::Server::HttpResponse::Range { BodyWrite() { this = response().getAMethodCall("body=") } @@ -627,7 +628,7 @@ private module Response { HeaderWrite() { // response.header[key] = val // response.headers[key] = val - this = response().getReturn(["header", "headers"]).getAMethodCall("[]=") + this = response().getAMethodCall(["header", "headers"]).getAMethodCall("[]=") or // response.set_header(key) = val // response[header] = val @@ -672,12 +673,18 @@ private module Response { } } +private class ActionControllerLoggerInstance extends DataFlow::Node { + ActionControllerLoggerInstance() { + this = actionControllerInstance().getAMethodCall("logger") + or + any(ActionControllerLoggerInstance i).(DataFlow::LocalSourceNode).flowsTo(this) + } +} + private class ActionControllerLoggingCall extends DataFlow::CallNode, Logging::Range { ActionControllerLoggingCall() { - this = - actionControllerInstance() - .getReturn("logger") - .getAMethodCall(["debug", "error", "fatal", "info", "unknown", "warn"]) + this.getReceiver() instanceof ActionControllerLoggerInstance and + this.getMethodName() = ["debug", "error", "fatal", "info", "unknown", "warn"] } // Note: this is identical to the definition `stdlib.Logger.LoggerInfoStyleCall`. diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll index f237e42a9a9d..13607f67926a 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailbox.qll @@ -27,8 +27,8 @@ module ActionMailbox { Mail() { this = [ - controller().trackInstance().getReturn("inbound_email").getAMethodCall("mail"), - controller().trackInstance().getAMethodCall("mail") + controller().getAnInstanceSelf().getAMethodCall("inbound_email").getAMethodCall("mail"), + controller().getAnInstanceSelf().getAMethodCall("mail") ] } } @@ -40,7 +40,7 @@ module ActionMailbox { RemoteContent() { this = any(Mail m) - .track() + .(DataFlow::LocalSourceNode) .getAMethodCall([ "body", "to", "from", "raw_source", "subject", "from_address", "recipients_addresses", "cc_addresses", "bcc_addresses", "in_reply_to", diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll index 4884f46aac35..af183333d3d1 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActionMailer.qll @@ -4,26 +4,12 @@ private import codeql.ruby.AST private import codeql.ruby.ApiGraphs -private import codeql.ruby.DataFlow private import codeql.ruby.frameworks.internal.Rails /** * Provides modeling for the `ActionMailer` library. */ module ActionMailer { - private DataFlow::ClassNode actionMailerClass() { - result = - [ - DataFlow::getConstant("ActionMailer").getConstant("Base"), - // In Rails applications `ApplicationMailer` typically extends - // `ActionMailer::Base`, but we treat it separately in case the - // `ApplicationMailer` definition is not in the database. - DataFlow::getConstant("ApplicationMailer") - ].getADescendentModule() - } - - private API::Node actionMailerInstance() { result = actionMailerClass().trackInstance() } - /** * A `ClassDeclaration` for a class that extends `ActionMailer::Base`. * For example, @@ -35,11 +21,33 @@ module ActionMailer { * ``` */ class MailerClass extends ClassDeclaration { - MailerClass() { this = actionMailerClass().getADeclaration() } + MailerClass() { + this.getSuperclassExpr() = + [ + API::getTopLevelMember("ActionMailer").getMember("Base"), + // In Rails applications `ApplicationMailer` typically extends + // `ActionMailer::Base`, but we treat it separately in case the + // `ApplicationMailer` definition is not in the database. + API::getTopLevelMember("ApplicationMailer") + ].getASubclass().getAValueReachableFromSource().asExpr().getExpr() + } + } + + /** A method call with a `self` receiver from within a mailer class */ + private class ContextCall extends MethodCall { + private MailerClass mailerClass; + + ContextCall() { + this.getReceiver() instanceof SelfVariableAccess and + this.getEnclosingModule() = mailerClass + } + + /** Gets the mailer class containing this method. */ + MailerClass getMailerClass() { result = mailerClass } } /** A call to `params` from within a mailer. */ - class ParamsCall extends ParamsCallImpl { - ParamsCall() { this = actionMailerInstance().getAMethodCall("params").asExpr().getExpr() } + class ParamsCall extends ContextCall, ParamsCallImpl { + ParamsCall() { this.getMethodName() = "params" } } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index 4a5d342eeba6..fcca078f933f 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -30,8 +30,10 @@ private predicate isBuiltInMethodForActiveRecordModelInstance(string methodName) methodName = objectInstanceMethodName() } -private API::Node activeRecordBaseClass() { +private API::Node activeRecordClassApiNode() { result = + // class Foo < ActiveRecord::Base + // class Bar < Foo [ API::getTopLevelMember("ActiveRecord").getMember("Base"), // In Rails applications `ApplicationRecord` typically extends `ActiveRecord::Base`, but we @@ -40,46 +42,6 @@ private API::Node activeRecordBaseClass() { ] } -/** - * Gets an object with methods from the ActiveRecord query interface. - */ -private API::Node activeRecordQueryBuilder() { - result = activeRecordBaseClass() - or - result = activeRecordBaseClass().getInstance() - or - // Assume any method call might return an ActiveRecord::Relation - // These are dynamically generated - result = activeRecordQueryBuilderMethodAccess(_).getReturn() -} - -/** Gets a call targeting the ActiveRecord query interface. */ -private API::MethodAccessNode activeRecordQueryBuilderMethodAccess(string name) { - result = activeRecordQueryBuilder().getMethod(name) and - // Due to the heuristic tracking of query builder objects, add a restriction for methods with a known call target - not isUnlikelyExternalCall(result) -} - -/** Gets a call targeting the ActiveRecord query interface. */ -private DataFlow::CallNode activeRecordQueryBuilderCall(string name) { - result = activeRecordQueryBuilderMethodAccess(name).asCall() -} - -/** - * Holds if `call` is unlikely to call into an external library, since it has a possible - * call target in its enclosing module. - */ -private predicate isUnlikelyExternalCall(API::MethodAccessNode node) { - exists(DataFlow::ModuleNode mod, DataFlow::CallNode call | call = node.asCall() | - call.getATarget() = [mod.getAnOwnSingletonMethod(), mod.getAnOwnInstanceMethod()] and - call.getEnclosingMethod() = [mod.getAnOwnSingletonMethod(), mod.getAnOwnInstanceMethod()] - ) -} - -private API::Node activeRecordConnectionInstance() { - result = activeRecordBaseClass().getReturn("connection") -} - /** * A `ClassDeclaration` for a class that inherits from `ActiveRecord::Base`. For example, * @@ -93,19 +55,20 @@ private API::Node activeRecordConnectionInstance() { * ``` */ class ActiveRecordModelClass extends ClassDeclaration { - private DataFlow::ClassNode cls; - ActiveRecordModelClass() { - cls = activeRecordBaseClass().getADescendentModule() and this = cls.getADeclaration() + this.getSuperclassExpr() = + activeRecordClassApiNode().getASubclass().getAValueReachableFromSource().asExpr().getExpr() } // Gets the class declaration for this class and all of its super classes - private ModuleBase getAllClassDeclarations() { result = cls.getAnAncestor().getADeclaration() } + private ModuleBase getAllClassDeclarations() { + result = this.getModule().getSuperClass*().getADeclaration() + } /** * Gets methods defined in this class that may access a field from the database. */ - deprecated Method getAPotentialFieldAccessMethod() { + Method getAPotentialFieldAccessMethod() { // It's a method on this class or one of its super classes result = this.getAllClassDeclarations().getAMethod() and // There is a value that can be returned by this method which may include field data @@ -127,84 +90,58 @@ class ActiveRecordModelClass extends ClassDeclaration { ) ) } - - /** Gets the class as a `DataFlow::ClassNode`. */ - DataFlow::ClassNode getClassNode() { result = cls } } -/** - * Gets a potential reference to an ActiveRecord class object. - */ -deprecated private API::Node getAnActiveRecordModelClassRef() { - result = any(ActiveRecordModelClass cls).getClassNode().trackModule() - or - // For methods with an unknown call target, assume this might be a database field, thus returning another ActiveRecord object. - // In this case we do not know which class it belongs to, which is why this predicate can't associate the reference with a specific class. - result = getAnUnknownActiveRecordModelClassCall().getReturn() -} +/** A class method call whose receiver is an `ActiveRecordModelClass`. */ +class ActiveRecordModelClassMethodCall extends MethodCall { + private ActiveRecordModelClass recvCls; -/** - * Gets a call performed on an ActiveRecord class object, without a known call target in the codebase. - */ -deprecated private API::MethodAccessNode getAnUnknownActiveRecordModelClassCall() { - result = getAnActiveRecordModelClassRef().getMethod(_) and - result.asCall().asExpr().getExpr() instanceof UnknownMethodCall -} - -/** - * DEPRECATED. Use `ActiveRecordModelClass.getClassNode().trackModule().getMethod()` instead. - * - * A class method call whose receiver is an `ActiveRecordModelClass`. - */ -deprecated class ActiveRecordModelClassMethodCall extends MethodCall { ActiveRecordModelClassMethodCall() { - this = getAnUnknownActiveRecordModelClassCall().asCall().asExpr().getExpr() - } - - /** Gets the `ActiveRecordModelClass` of the receiver of this method, if it can be determined. */ - ActiveRecordModelClass getReceiverClass() { - this = result.getClassNode().trackModule().getMethod(_).asCall().asExpr().getExpr() + // e.g. Foo.where(...) + recvCls.getModule() = this.getReceiver().(ConstantReadAccess).getModule() + or + // e.g. Foo.joins(:bars).where(...) + recvCls = this.getReceiver().(ActiveRecordModelClassMethodCall).getReceiverClass() + or + // e.g. self.where(...) within an ActiveRecordModelClass + this.getReceiver() instanceof SelfVariableAccess and + this.getEnclosingModule() = recvCls } -} -private predicate sqlFragmentArgumentInner(DataFlow::CallNode call, DataFlow::Node sink) { - call = - activeRecordQueryBuilderCall([ - "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!", - "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from", - "group", "having", "joins", "lock", "not", "order", "reorder", "pluck", "where", "rewhere", - "select", "reselect", "update_all" - ]) and - sink = call.getArgument(0) - or - call = activeRecordQueryBuilderCall("calculate") and - sink = call.getArgument(1) - or - call = - activeRecordQueryBuilderCall(["average", "count", "maximum", "minimum", "sum", "count_by_sql"]) and - sink = call.getArgument(0) - or - // This format was supported until Rails 2.3.8 - call = activeRecordQueryBuilderCall(["all", "find", "first", "last"]) and - sink = call.getKeywordArgument("conditions") - or - call = activeRecordQueryBuilderCall("reload") and - sink = call.getKeywordArgument("lock") - or - // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to - // SQLi if user supplied input is passed in as an argument. - call = activeRecordQueryBuilderCall("annotate") and - sink = call.getArgument(_) - or - call = activeRecordConnectionInstance().getAMethodCall("execute") and - sink = call.getArgument(0) + /** The `ActiveRecordModelClass` of the receiver of this method. */ + ActiveRecordModelClass getReceiverClass() { result = recvCls } } -private predicate sqlFragmentArgument(DataFlow::CallNode call, DataFlow::Node sink) { - exists(DataFlow::Node arg | - sqlFragmentArgumentInner(call, arg) and - sink = [arg, arg.(DataFlow::ArrayLiteralNode).getElement(0)] and - unsafeSqlExpr(sink.asExpr().getExpr()) +private Expr sqlFragmentArgument(MethodCall call) { + exists(string methodName | + methodName = call.getMethodName() and + ( + methodName = + [ + "delete_all", "delete_by", "destroy_all", "destroy_by", "exists?", "find_by", "find_by!", + "find_or_create_by", "find_or_create_by!", "find_or_initialize_by", "find_by_sql", "from", + "group", "having", "joins", "lock", "not", "order", "reorder", "pluck", "where", + "rewhere", "select", "reselect", "update_all" + ] and + result = call.getArgument(0) + or + methodName = "calculate" and result = call.getArgument(1) + or + methodName in ["average", "count", "maximum", "minimum", "sum", "count_by_sql"] and + result = call.getArgument(0) + or + // This format was supported until Rails 2.3.8 + methodName = ["all", "find", "first", "last"] and + result = call.getKeywordArgument("conditions") + or + methodName = "reload" and + result = call.getKeywordArgument("lock") + or + // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to + // SQLi if user supplied input is passed in as an argument. + methodName = "annotate" and + result = call.getArgument(_) + ) ) } @@ -225,8 +162,6 @@ private predicate unsafeSqlExpr(Expr sqlFragmentExpr) { } /** - * DEPRECATED. Use the `SqlExecution` concept or `ActiveRecordSqlExecutionRange`. - * * A method call that may result in executing unintended user-controlled SQL * queries if the `getSqlFragmentSinkArgument()` expression is tainted by * unsanitized user-controlled input. For example, supposing that `User` is an @@ -240,32 +175,55 @@ private predicate unsafeSqlExpr(Expr sqlFragmentExpr) { * as `"') OR 1=1 --"` could result in the application looking up all users * rather than just one with a matching name. */ -deprecated class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall { - private DataFlow::CallNode call; +class PotentiallyUnsafeSqlExecutingMethodCall extends ActiveRecordModelClassMethodCall { + // The SQL fragment argument itself + private Expr sqlFragmentExpr; PotentiallyUnsafeSqlExecutingMethodCall() { - call.asExpr().getExpr() = this and sqlFragmentArgument(call, _) + exists(Expr arg | + arg = sqlFragmentArgument(this) and + unsafeSqlExpr(sqlFragmentExpr) and + ( + sqlFragmentExpr = arg + or + sqlFragmentExpr = arg.(ArrayLiteral).getElement(0) + ) and + // Check that method has not been overridden + not exists(SingletonMethod m | + m.getName() = this.getMethodName() and + m.getOuterScope() = this.getReceiverClass() + ) + ) } /** * Gets the SQL fragment argument of this method call. */ - Expr getSqlFragmentSinkArgument() { - exists(DataFlow::Node sink | - sqlFragmentArgument(call, sink) and result = sink.asExpr().getExpr() - ) - } + Expr getSqlFragmentSinkArgument() { result = sqlFragmentExpr } } /** - * A SQL execution arising from a call to the ActiveRecord library. + * An `SqlExecution::Range` for an argument to a + * `PotentiallyUnsafeSqlExecutingMethodCall` that may be vulnerable to being + * controlled by user input. */ class ActiveRecordSqlExecutionRange extends SqlExecution::Range { - ActiveRecordSqlExecutionRange() { sqlFragmentArgument(_, this) } + ActiveRecordSqlExecutionRange() { + exists(PotentiallyUnsafeSqlExecutingMethodCall mc | + this.asExpr().getNode() = mc.getSqlFragmentSinkArgument() + ) + or + this = activeRecordConnectionInstance().getAMethodCall("execute").getArgument(0) and + unsafeSqlExpr(this.asExpr().getExpr()) + } override DataFlow::Node getSql() { result = this } } +private API::Node activeRecordConnectionInstance() { + result = activeRecordClassApiNode().getReturn("connection") +} + // TODO: model `ActiveRecord` sanitizers // https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html /** @@ -283,8 +241,15 @@ abstract class ActiveRecordModelInstantiation extends OrmInstantiation::Range, override predicate methodCallMayAccessField(string methodName) { // The method is not a built-in, and... not isBuiltInMethodForActiveRecordModelInstance(methodName) and - // ...There is no matching method definition in the class - not exists(this.getClass().getMethod(methodName)) + ( + // ...There is no matching method definition in the class, or... + not exists(this.getClass().getMethod(methodName)) + or + // ...the called method can access a field. + exists(Method m | m = this.getClass().getAPotentialFieldAccessMethod() | + m.getName() = methodName + ) + ) } } @@ -352,10 +317,21 @@ private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation } // A `self` reference that may resolve to an active record model object -private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation { +private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInstantiation, + DataFlow::SelfParameterNode +{ private ActiveRecordModelClass cls; - ActiveRecordModelClassSelfReference() { this = cls.getClassNode().getAnOwnInstanceSelf() } + ActiveRecordModelClassSelfReference() { + exists(MethodBase m | + m = this.getCallable() and + m.getEnclosingModule() = cls and + m = cls.getAMethod() + ) and + // In a singleton method, `self` refers to the class itself rather than an + // instance of that class + not this.getSelfVariable().getDeclaringScope() instanceof SingletonMethod + } final override ActiveRecordModelClass getClass() { result = cls } } @@ -366,7 +342,7 @@ private class ActiveRecordModelClassSelfReference extends ActiveRecordModelInsta class ActiveRecordInstance extends DataFlow::Node { private ActiveRecordModelInstantiation instantiation; - ActiveRecordInstance() { this = instantiation.track().getAValueReachableFromSource() } + ActiveRecordInstance() { this = instantiation or instantiation.flowsTo(this) } /** Gets the `ActiveRecordModelClass` that this is an instance of. */ ActiveRecordModelClass getClass() { result = instantiation.getClass() } @@ -404,12 +380,12 @@ private module Persistence { /** A call to e.g. `User.create(name: "foo")` */ private class CreateLikeCall extends DataFlow::CallNode, PersistentWriteAccess::Range { CreateLikeCall() { - this = - activeRecordBaseClass() - .getAMethodCall([ - "create", "create!", "create_or_find_by", "create_or_find_by!", "find_or_create_by", - "find_or_create_by!", "insert", "insert!" - ]) + exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and + this.getMethodName() = + [ + "create", "create!", "create_or_find_by", "create_or_find_by!", "find_or_create_by", + "find_or_create_by!", "insert", "insert!" + ] } override DataFlow::Node getValue() { @@ -426,7 +402,8 @@ private module Persistence { /** A call to e.g. `User.update(1, name: "foo")` */ private class UpdateLikeClassMethodCall extends DataFlow::CallNode, PersistentWriteAccess::Range { UpdateLikeClassMethodCall() { - this = activeRecordBaseClass().getAMethodCall(["update", "update!", "upsert"]) + exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and + this.getMethodName() = ["update", "update!", "upsert"] } override DataFlow::Node getValue() { @@ -471,7 +448,10 @@ private module Persistence { * ``` */ private class TouchAllCall extends DataFlow::CallNode, PersistentWriteAccess::Range { - TouchAllCall() { this = activeRecordQueryBuilderCall("touch_all") } + TouchAllCall() { + exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and + this.getMethodName() = "touch_all" + } override DataFlow::Node getValue() { result = this.getKeywordArgument("time") } } @@ -481,7 +461,8 @@ private module Persistence { private ExprNodes::ArrayLiteralCfgNode arr; InsertAllLikeCall() { - this = activeRecordBaseClass().getAMethodCall(["insert_all", "insert_all!", "upsert_all"]) and + exists(this.asExpr().getExpr().(ActiveRecordModelClassMethodCall).getReceiverClass()) and + this.getMethodName() = ["insert_all", "insert_all!", "upsert_all"] and arr = this.getArgument(0).asExpr() } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll index 9f0e0f4b8598..962199157707 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveResource.qll @@ -18,12 +18,8 @@ module ActiveResource { * An ActiveResource model class. This is any (transitive) subclass of ActiveResource. */ pragma[nomagic] - private API::Node activeResourceBaseClass() { - result = API::getTopLevelMember("ActiveResource").getMember("Base") - } - - private DataFlow::ClassNode activeResourceClass() { - result = activeResourceBaseClass().getADescendentModule() + private API::Node modelApiNode() { + result = API::getTopLevelMember("ActiveResource").getMember("Base").getASubclass() } /** @@ -34,56 +30,24 @@ module ActiveResource { * end * ``` */ - class ModelClassNode extends DataFlow::ClassNode { - ModelClassNode() { this = activeResourceClass() } - - /** Gets a call to `site=`, which sets the base URL for this model. */ - SiteAssignCall getASiteAssignment() { result.getModelClass() = this } + class ModelClass extends ClassDeclaration { + API::Node model; - /** Holds if `c` sets a base URL which does not use HTTPS. */ - predicate disablesCertificateValidation(SiteAssignCall c) { - c = this.getASiteAssignment() and - c.disablesCertificateValidation() + ModelClass() { + model = modelApiNode() and + this.getSuperclassExpr() = model.getAValueReachableFromSource().asExpr().getExpr() } - /** Gets a method call on this class that returns an instance of the class. */ - private DataFlow::CallNode getAChainedCall() { - result.(FindCall).getModelClass() = this - or - result.(CreateCall).getModelClass() = this - or - result.(CustomHttpCall).getModelClass() = this - or - result.(CollectionCall).getCollection().getModelClass() = this and - result.getMethodName() = ["first", "last"] - } - - /** Gets an API node referring to an instance of this class. */ - API::Node getAnInstanceReference() { - result = this.trackInstance() - or - result = this.getAChainedCall().track() - } - } - - /** DEPRECATED. Use `ModelClassNode` instead. */ - deprecated class ModelClass extends ClassDeclaration { - private ModelClassNode cls; - - ModelClass() { this = cls.getADeclaration() } - - /** Gets the class for which this is a declaration. */ - ModelClassNode getClassNode() { result = cls } - - /** Gets the API node for this class object. */ - deprecated API::Node getModelApiNode() { result = cls.trackModule() } + /** Gets the API node for this model */ + API::Node getModelApiNode() { result = model } /** Gets a call to `site=`, which sets the base URL for this model. */ - SiteAssignCall getASiteAssignment() { result = cls.getASiteAssignment() } + SiteAssignCall getASiteAssignment() { result.getModelClass() = this } /** Holds if `c` sets a base URL which does not use HTTPS. */ predicate disablesCertificateValidation(SiteAssignCall c) { - cls.disablesCertificateValidation(c) + c = this.getASiteAssignment() and + c.disablesCertificateValidation() } } @@ -98,20 +62,25 @@ module ActiveResource { * ``` */ class ModelClassMethodCall extends DataFlow::CallNode { - private ModelClassNode cls; + API::Node model; - ModelClassMethodCall() { this = cls.trackModule().getAMethodCall(_) } + ModelClassMethodCall() { + model = modelApiNode() and + this = classMethodCall(model, _) + } /** Gets the model class for this call. */ - ModelClassNode getModelClass() { result = cls } + ModelClass getModelClass() { result.getModelApiNode() = model } } /** * A call to `site=` on an ActiveResource model class. * This sets the base URL for all HTTP requests made by this class. */ - private class SiteAssignCall extends ModelClassMethodCall { - SiteAssignCall() { this.getMethodName() = "site=" } + private class SiteAssignCall extends DataFlow::CallNode { + API::Node model; + + SiteAssignCall() { model = modelApiNode() and this = classMethodCall(model, "site=") } /** * Gets a node that contributes to the URLs used for HTTP requests by the parent @@ -119,10 +88,12 @@ module ActiveResource { */ DataFlow::Node getAUrlPart() { result = this.getArgument(0) } + /** Gets the model class for this call. */ + ModelClass getModelClass() { result.getModelApiNode() = model } + /** Holds if this site value specifies HTTP rather than HTTPS. */ predicate disablesCertificateValidation() { this.getAUrlPart() - // TODO: We should not need all this just to get the string value .asExpr() .(ExprNodes::AssignExprCfgNode) .getRhs() @@ -170,70 +141,87 @@ module ActiveResource { } /** - * DEPRECATED. Use `ModelClassNode.getAnInstanceReference()` instead. - * * An ActiveResource model object. */ - deprecated class ModelInstance extends DataFlow::Node { - private ModelClassNode cls; - - ModelInstance() { this = cls.getAnInstanceReference().getAValueReachableFromSource() } + class ModelInstance extends DataFlow::Node { + ModelClass cls; + + ModelInstance() { + exists(API::Node model | model = modelApiNode() | + this = model.getInstance().getAValueReachableFromSource() and + cls.getModelApiNode() = model + ) + or + exists(FindCall call | call.flowsTo(this) | cls = call.getModelClass()) + or + exists(CreateCall call | call.flowsTo(this) | cls = call.getModelClass()) + or + exists(CustomHttpCall call | call.flowsTo(this) | cls = call.getModelClass()) + or + exists(CollectionCall call | + call.getMethodName() = ["first", "last"] and + call.flowsTo(this) + | + cls = call.getCollection().getModelClass() + ) + } /** Gets the model class for this instance. */ - ModelClassNode getModelClass() { result = cls } + ModelClass getModelClass() { result = cls } } /** * A call to a method on an ActiveResource model object. */ class ModelInstanceMethodCall extends DataFlow::CallNode { - private ModelClassNode cls; + ModelInstance i; - ModelInstanceMethodCall() { this = cls.getAnInstanceReference().getAMethodCall(_) } + ModelInstanceMethodCall() { this.getReceiver() = i } /** Gets the model instance for this call. */ - deprecated ModelInstance getInstance() { result = this.getReceiver() } + ModelInstance getInstance() { result = i } /** Gets the model class for this call. */ - ModelClassNode getModelClass() { result = cls } + ModelClass getModelClass() { result = i.getModelClass() } } /** - * DEPRECATED. Use `CollectionSource` instead. - * - * A data flow node that may refer to a collection of ActiveResource model objects. + * A collection of ActiveResource model objects. */ - deprecated class Collection extends DataFlow::Node { - Collection() { this = any(CollectionSource src).track().getAValueReachableFromSource() } - } - - /** - * A call that returns a collection of ActiveResource model objects. - */ - class CollectionSource extends ModelClassMethodCall { - CollectionSource() { - this.getMethodName() = "all" - or - this.getArgument(0).asExpr().getConstantValue().isStringlikeValue("all") + class Collection extends DataFlow::Node { + ModelClassMethodCall classMethodCall; + + Collection() { + classMethodCall.flowsTo(this) and + ( + classMethodCall.getMethodName() = "all" + or + classMethodCall.getMethodName() = "find" and + classMethodCall.getArgument(0).asExpr().getConstantValue().isStringlikeValue("all") + ) } + + /** Gets the model class for this collection. */ + ModelClass getModelClass() { result = classMethodCall.getModelClass() } } /** * A method call on a collection. */ class CollectionCall extends DataFlow::CallNode { - private CollectionSource collection; - - CollectionCall() { this = collection.track().getAMethodCall(_) } + CollectionCall() { this.getReceiver() instanceof Collection } /** Gets the collection for this call. */ - CollectionSource getCollection() { result = collection } + Collection getCollection() { result = this.getReceiver() } } private class ModelClassMethodCallAsHttpRequest extends Http::Client::Request::Range, ModelClassMethodCall { + ModelClass cls; + ModelClassMethodCallAsHttpRequest() { + this.getModelClass() = cls and this.getMethodName() = ["all", "build", "create", "create!", "find", "first", "last"] } @@ -242,14 +230,12 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - this.getModelClass().disablesCertificateValidation(disablingNode) and + cls.disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } - override DataFlow::Node getAUrlPart() { - result = this.getModelClass().getASiteAssignment().getAUrlPart() - } + override DataFlow::Node getAUrlPart() { result = cls.getASiteAssignment().getAUrlPart() } override DataFlow::Node getResponseBody() { result = this } } @@ -257,7 +243,10 @@ module ActiveResource { private class ModelInstanceMethodCallAsHttpRequest extends Http::Client::Request::Range, ModelInstanceMethodCall { + ModelClass cls; + ModelInstanceMethodCallAsHttpRequest() { + this.getModelClass() = cls and this.getMethodName() = [ "exists?", "reload", "save", "save!", "destroy", "delete", "get", "patch", "post", "put", @@ -270,15 +259,42 @@ module ActiveResource { override predicate disablesCertificateValidation( DataFlow::Node disablingNode, DataFlow::Node argumentOrigin ) { - this.getModelClass().disablesCertificateValidation(disablingNode) and + cls.disablesCertificateValidation(disablingNode) and // TODO: highlight real argument origin argumentOrigin = disablingNode } - override DataFlow::Node getAUrlPart() { - result = this.getModelClass().getASiteAssignment().getAUrlPart() - } + override DataFlow::Node getAUrlPart() { result = cls.getASiteAssignment().getAUrlPart() } override DataFlow::Node getResponseBody() { result = this } } + + /** + * A call to a class method. + * + * TODO: is this general enough to be useful elsewhere? + * + * Examples: + * ```rb + * class A + * def self.m; end + * + * m # call + * end + * + * A.m # call + * ``` + */ + private DataFlow::CallNode classMethodCall(API::Node classNode, string methodName) { + // A.m + result = classNode.getAMethodCall(methodName) + or + // class A + // A.m + // end + result.getReceiver().asExpr() instanceof ExprNodes::SelfVariableAccessCfgNode and + result.asExpr().getExpr().getEnclosingModule().(ClassDeclaration).getSuperclassExpr() = + classNode.getAValueReachableFromSource().asExpr().getExpr() and + result.getMethodName() = methodName + } } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll index 98fbe241404e..8e673c4255d9 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/GraphQL.qll @@ -39,8 +39,13 @@ private API::Node graphQlSchema() { result = API::getTopLevelMember("GraphQL").g */ private class GraphqlRelayClassicMutationClass extends ClassDeclaration { GraphqlRelayClassicMutationClass() { - this = - graphQlSchema().getMember("RelayClassicMutation").getADescendentModule().getADeclaration() + this.getSuperclassExpr() = + graphQlSchema() + .getMember("RelayClassicMutation") + .getASubclass() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } } @@ -69,7 +74,13 @@ private class GraphqlRelayClassicMutationClass extends ClassDeclaration { */ private class GraphqlSchemaResolverClass extends ClassDeclaration { GraphqlSchemaResolverClass() { - this = graphQlSchema().getMember("Resolver").getADescendentModule().getADeclaration() + this.getSuperclassExpr() = + graphQlSchema() + .getMember("Resolver") + .getASubclass() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } } @@ -92,7 +103,13 @@ private string getASupportedHttpMethod() { result = ["get", "post"] } */ class GraphqlSchemaObjectClass extends ClassDeclaration { GraphqlSchemaObjectClass() { - this = graphQlSchema().getMember("Object").getADescendentModule().getADeclaration() + this.getSuperclassExpr() = + graphQlSchema() + .getMember("Object") + .getASubclass() + .getAValueReachableFromSource() + .asExpr() + .getExpr() } /** Gets a `GraphqlFieldDefinitionMethodCall` called in this class. */ diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll index dfb93a0e6e52..70744d6fcc80 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Sqlite3.qll @@ -15,20 +15,21 @@ private import codeql.ruby.Concepts * https://github.com/sparklemotion/sqlite3-ruby */ module Sqlite3 { - private API::Node databaseConst() { - result = API::getTopLevelMember("SQLite3").getMember("Database") - } - - private API::Node dbInstance() { - result = databaseConst().getInstance() - or - // e.g. SQLite3::Database.new("foo.db") |db| { db.some_method } - result = databaseConst().getMethod("new").getBlock().getParameter(0) - } - /** Gets a method call with a receiver that is a database instance. */ private DataFlow::CallNode getADatabaseMethodCall(string methodName) { - result = dbInstance().getAMethodCall(methodName) + exists(API::Node dbInstance | + dbInstance = API::getTopLevelMember("SQLite3").getMember("Database").getInstance() and + ( + result = dbInstance.getAMethodCall(methodName) + or + // e.g. SQLite3::Database.new("foo.db") |db| { db.some_method } + exists(DataFlow::BlockNode block | + result.getMethodName() = methodName and + block = dbInstance.getAValueReachableFromSource().(DataFlow::CallNode).getBlock() and + block.getParameter(0).flowsTo(result.getReceiver()) + ) + ) + ) } /** A prepared but unexecuted SQL statement. */ diff --git a/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll b/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll index 7b8648bd2b17..73ef87f5fd5c 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/Twirp.qll @@ -16,28 +16,50 @@ module Twirp { /** * A Twirp service instantiation */ - deprecated class ServiceInstantiation extends DataFlow::CallNode { + class ServiceInstantiation extends DataFlow::CallNode { ServiceInstantiation() { this = API::getTopLevelMember("Twirp").getMember("Service").getAnInstantiation() } /** - * Gets a handler's method. + * Gets a local source node for the Service instantiation argument (the service handler). + */ + private DataFlow::LocalSourceNode getHandlerSource() { + result = this.getArgument(0).getALocalSource() + } + + /** + * Gets the API::Node for the service handler's class. + */ + private API::Node getAHandlerClassApiNode() { + result.getAnInstantiation() = this.getHandlerSource() + } + + /** + * Gets the AST module for the service handler's class. */ - DataFlow::MethodNode getAHandlerMethodNode() { - result = this.getArgument(0).backtrack().getMethod(_).asCallable() + private Ast::Module getAHandlerClassAstNode() { + result = + this.getAHandlerClassApiNode() + .asSource() + .asExpr() + .(CfgNodes::ExprNodes::ConstantReadAccessCfgNode) + .getExpr() + .getModule() } /** - * Gets a handler's method as an AST node. + * Gets a handler's method. */ - Ast::Method getAHandlerMethod() { result = this.getAHandlerMethodNode().asCallableAstNode() } + Ast::Method getAHandlerMethod() { + result = this.getAHandlerClassAstNode().getAnInstanceMethod() + } } /** * A Twirp client */ - deprecated class ClientInstantiation extends DataFlow::CallNode { + class ClientInstantiation extends DataFlow::CallNode { ClientInstantiation() { this = API::getTopLevelMember("Twirp").getMember("Client").getAnInstantiation() } @@ -45,10 +67,7 @@ module Twirp { /** The URL of a Twirp service, considered as a sink. */ class ServiceUrlAsSsrfSink extends ServerSideRequestForgery::Sink { - ServiceUrlAsSsrfSink() { - this = - API::getTopLevelMember("Twirp").getMember("Client").getMethod("new").getArgument(0).asSink() - } + ServiceUrlAsSsrfSink() { exists(ClientInstantiation c | c.getArgument(0) = this) } } /** A parameter that will receive parts of the url when handling an incoming request. */ @@ -56,14 +75,7 @@ module Twirp { DataFlow::ParameterNode { UnmarshaledParameter() { - this = - API::getTopLevelMember("Twirp") - .getMember("Service") - .getMethod("new") - .getArgument(0) - .getMethod(_) - .getParameter(0) - .asSource() + exists(ServiceInstantiation i | i.getAHandlerMethod().getParameter(0) = this.asParameter()) } override string getSourceType() { result = "Twirp Unmarhaled Parameter" } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll index f7345b22ed10..4095beb10af8 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Gem.qll @@ -24,7 +24,7 @@ module Gem { GemSpec() { this.getExtension() = "gemspec" and - specCall = API::getTopLevelMember("Gem").getMember("Specification").getMethod("new") and + specCall = API::root().getMember("Gem").getMember("Specification").getMethod("new") and specCall.getLocation().getFile() = this } @@ -42,7 +42,7 @@ module Gem { .getBlock() .getParameter(0) .getMethod(name + "=") - .getArgument(0) + .getParameter(0) .asSink() .asExpr() .getExpr() diff --git a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll index ad87ee37ecd7..b445521adb86 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/core/Kernel.qll @@ -19,8 +19,7 @@ module Kernel { */ class KernelMethodCall extends DataFlow::CallNode { KernelMethodCall() { - // Match Kernel calls using local flow, to avoid finding singleton calls on subclasses - this = DataFlow::getConstant("Kernel").getAMethodCall(_) + this = API::getTopLevelMember("Kernel").getAMethodCall(_) or this.asExpr().getExpr() instanceof UnknownMethodCall and ( diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll index 21bc5f69dcb3..2e3fa21a45b4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll @@ -44,7 +44,7 @@ private class SummarizedCallableFromModel extends SummarizedCallable { override Call getACall() { exists(API::MethodAccessNode base | ModelOutput::resolvedSummaryBase(type, path, base) and - result = base.asCall().asExpr().getExpr() + result = base.getCallNode().asExpr().getExpr() ) } diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll index ed7a331c4523..4c03522a9c55 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -99,10 +99,9 @@ API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { // A row of form `any;Method[foo]` should match any method named `foo`. type = "any" and n = 1 and - exists(string methodName, DataFlow::CallNode call | - methodMatchedByName(path, methodName) and - call.getMethodName() = methodName and - result.(API::MethodAccessNode).asCall() = call + exists(EntryPointFromAnyType entry | + methodMatchedByName(path, entry.getName()) and + result = entry.getANode() ) } @@ -113,10 +112,20 @@ API::Node getExtraNodeFromType(string type) { constRef = getConstantFromConstPath(consts) | suffix = "!" and - result = constRef.track() + ( + result.(API::Node::Internal).asSourceInternal() = constRef + or + result.(API::Node::Internal).asSourceInternal() = + constRef.getADescendentModule().getAnOwnModuleSelf() + ) or suffix = "" and - result = constRef.track().getInstance() + ( + result.(API::Node::Internal).asSourceInternal() = constRef.getAMethodCall("new") + or + result.(API::Node::Internal).asSourceInternal() = + constRef.getADescendentModule().getAnInstanceSelf() + ) ) or type = "" and @@ -136,6 +145,21 @@ private predicate methodMatchedByName(AccessPath path, string methodName) { ) } +/** + * An API graph entry point corresponding to a method name such as `foo` in `;any;Method[foo]`. + * + * This ensures that the API graph rooted in that method call is materialized. + */ +private class EntryPointFromAnyType extends API::EntryPoint { + string name; + + EntryPointFromAnyType() { this = "AnyMethod[" + name + "]" and methodMatchedByName(_, name) } + + override DataFlow::CallNode getACall() { result.getMethodName() = name } + + string getName() { result = name } +} + /** * Gets a Ruby-specific API graph successor of `node` reachable by resolving `token`. */ @@ -151,11 +175,9 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { result = node.getInstance() or token.getName() = "Parameter" and - exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | - argPos = FlowSummaryImplSpecific::parseParamBody(token.getAnArgument()) and - DataFlowDispatch::parameterMatch(paramPos, argPos) and - result = node.getParameterAtPosition(paramPos) - ) + result = + node.getASuccessor(API::Label::getLabelFromParameterPosition(FlowSummaryImplSpecific::parseArgBody(token + .getAnArgument()))) or exists(DataFlow::ContentSet contents | SummaryComponent::content(contents) = FlowSummaryImplSpecific::interpretComponentSpecific(token) and @@ -169,11 +191,9 @@ API::Node getExtraSuccessorFromNode(API::Node node, AccessPathToken token) { bindingset[token] API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathToken token) { token.getName() = "Argument" and - exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | - paramPos = FlowSummaryImplSpecific::parseArgBody(token.getAnArgument()) and - DataFlowDispatch::parameterMatch(paramPos, argPos) and - result = node.getArgumentAtPosition(argPos) - ) + result = + node.getASuccessor(API::Label::getLabelFromArgumentPosition(FlowSummaryImplSpecific::parseParamBody(token + .getAnArgument()))) } /** @@ -191,7 +211,7 @@ predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathToke /** An API graph node representing a method call. */ class InvokeNode extends API::MethodAccessNode { /** Gets the number of arguments to the call. */ - int getNumArgument() { result = this.asCall().getNumberOfArguments() } + int getNumArgument() { result = this.getCallNode().getNumberOfArguments() } } /** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */ diff --git a/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll b/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll index 3653fb23cee6..31c9055d7cc0 100644 --- a/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/KernelOpenQuery.qll @@ -55,8 +55,7 @@ class AmbiguousPathCall extends DataFlow::CallNode { } private predicate methodCallOnlyOnIO(DataFlow::CallNode node, string methodName) { - // Use local flow to find calls to 'IO' without subclasses - node = DataFlow::getConstant("IO").getAMethodCall(methodName) and + node = API::getTopLevelMember("IO").getAMethodCall(methodName) and not node = API::getTopLevelMember("File").getAMethodCall(methodName) // needed in e.g. opal/opal, where some calls have both paths (opal implements an own corelib) } diff --git a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll index 637f7dab04aa..261a7af462ff 100644 --- a/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll +++ b/ruby/ql/lib/codeql/ruby/security/OpenSSL.qll @@ -597,7 +597,7 @@ private module Digest { call = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("new") | this = call.getReturn().getAMethodCall(["digest", "update", "<<"]) and - algo.matchesName(call.asCall() + algo.matchesName(call.getCallNode() .getArgument(0) .asExpr() .getExpr() @@ -619,7 +619,7 @@ private module Digest { Cryptography::HashingAlgorithm algo; DigestCallDirect() { - this = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("digest").asCall() and + this = API::getTopLevelMember("OpenSSL").getMember("Digest").getMethod("digest").getCallNode() and algo.matchesName(this.getArgument(0).asExpr().getExpr().getConstantValue().getString()) } diff --git a/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll b/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll deleted file mode 100644 index 07b10cb13037..000000000000 --- a/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll +++ /dev/null @@ -1,328 +0,0 @@ -/** - * Parts of API graphs that can be shared with other dynamic languages. - * - * Depends on TypeTrackerSpecific for the corresponding language. - */ - -private import codeql.Locations -private import codeql.ruby.typetracking.TypeTracker -private import TypeTrackerSpecific - -/** - * The signature to use when instantiating `ApiGraphShared`. - * - * The implementor should define a newtype with at least three branches as follows: - * ```ql - * newtype TApiNode = - * MkForwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or - * MkBackwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or - * MkSinkNode(Node node) { ... } or - * ... - * ``` - * - * The three branches should be exposed through `getForwardNode`, `getBackwardNode`, and `getSinkNode`, respectively. - */ -signature module ApiGraphSharedSig { - /** A node in the API graph. */ - class ApiNode { - /** Gets a string representation of this API node. */ - string toString(); - - /** Gets the location associated with this API node, if any. */ - Location getLocation(); - } - - /** - * Gets the forward node with the given type-tracking state. - * - * This node will have outgoing epsilon edges to its type-tracking successors. - */ - ApiNode getForwardNode(TypeTrackingNode node, TypeTracker t); - - /** - * Gets the backward node with the given type-tracking state. - * - * This node will have outgoing epsilon edges to its type-tracking predecessors. - */ - ApiNode getBackwardNode(TypeTrackingNode node, TypeTracker t); - - /** - * Gets the sink node corresponding to `node`. - * - * Since sinks are not generally `LocalSourceNode`s, such nodes are materialised separately in order for - * the API graph to include representatives for sinks. Note that there is no corresponding case for "source" - * nodes as these are represented as forward nodes with initial-state type-trackers. - * - * Sink nodes have outgoing epsilon edges to the backward nodes corresponding to their local sources. - */ - ApiNode getSinkNode(Node node); - - /** - * Holds if a language-specific epsilon edge `pred -> succ` should be generated. - */ - predicate specificEpsilonEdge(ApiNode pred, ApiNode succ); -} - -/** - * Parts of API graphs that can be shared between language implementations. - */ -module ApiGraphShared { - private import S - - /** Gets a local source of `node`. */ - bindingset[node] - pragma[inline_late] - TypeTrackingNode getALocalSourceStrict(Node node) { result = node.getALocalSource() } - - cached - private module Cached { - /** - * Holds if there is an epsilon edge `pred -> succ`. - * - * That relation is reflexive, so `fastTC` produces the equivalent of a reflexive, transitive closure. - */ - pragma[noopt] - cached - predicate epsilonEdge(ApiNode pred, ApiNode succ) { - exists( - StepSummary summary, TypeTrackingNode predNode, TypeTracker predState, - TypeTrackingNode succNode, TypeTracker succState - | - StepSummary::stepCall(predNode, succNode, summary) - or - StepSummary::stepNoCall(predNode, succNode, summary) - | - pred = getForwardNode(predNode, predState) and - succState = StepSummary::append(predState, summary) and - succ = getForwardNode(succNode, succState) - or - succ = getBackwardNode(predNode, predState) and // swap order for backward flow - succState = StepSummary::append(predState, summary) and - pred = getBackwardNode(succNode, succState) // swap order for backward flow - ) - or - exists(Node sink, TypeTrackingNode localSource | - pred = getSinkNode(sink) and - localSource = getALocalSourceStrict(sink) and - succ = getBackwardStartNode(localSource) - ) - or - specificEpsilonEdge(pred, succ) - or - succ instanceof ApiNode and - succ = pred - } - - /** - * Holds if `pred` can reach `succ` by zero or more epsilon edges. - */ - cached - predicate epsilonStar(ApiNode pred, ApiNode succ) = fastTC(epsilonEdge/2)(pred, succ) - - /** Gets the API node to use when starting forward flow from `source` */ - cached - ApiNode forwardStartNode(TypeTrackingNode source) { - result = getForwardNode(source, TypeTracker::end(false)) - } - - /** Gets the API node to use when starting backward flow from `sink` */ - cached - ApiNode backwardStartNode(TypeTrackingNode sink) { - // There is backward flow A->B iff there is forward flow B->A. - // The starting point of backward flow corresponds to the end of a forward flow, and vice versa. - result = getBackwardNode(sink, TypeTracker::end(_)) - } - - /** Gets `node` as a data flow source. */ - cached - TypeTrackingNode asSourceCached(ApiNode node) { node = forwardEndNode(result) } - - /** Gets `node` as a data flow sink. */ - cached - Node asSinkCached(ApiNode node) { node = getSinkNode(result) } - } - - private import Cached - - /** Gets an API node corresponding to the end of forward-tracking to `localSource`. */ - pragma[nomagic] - private ApiNode forwardEndNode(TypeTrackingNode localSource) { - result = getForwardNode(localSource, TypeTracker::end(_)) - } - - /** Gets an API node corresponding to the end of backtracking to `localSource`. */ - pragma[nomagic] - private ApiNode backwardEndNode(TypeTrackingNode localSource) { - result = getBackwardNode(localSource, TypeTracker::end(false)) - } - - /** Gets a node reachable from `node` by zero or more epsilon edges, including `node` itself. */ - bindingset[node] - pragma[inline_late] - ApiNode getAnEpsilonSuccessorInline(ApiNode node) { epsilonStar(node, result) } - - /** Gets `node` as a data flow sink. */ - bindingset[node] - pragma[inline_late] - Node asSinkInline(ApiNode node) { result = asSinkCached(node) } - - /** Gets `node` as a data flow source. */ - bindingset[node] - pragma[inline_late] - TypeTrackingNode asSourceInline(ApiNode node) { result = asSourceCached(node) } - - /** Gets a value reachable from `source`. */ - bindingset[source] - pragma[inline_late] - Node getAValueReachableFromSourceInline(ApiNode source) { - exists(TypeTrackingNode src | - src = asSourceInline(getAnEpsilonSuccessorInline(source)) and - src.flowsTo(pragma[only_bind_into](result)) - ) - } - - /** Gets a value that can reach `sink`. */ - bindingset[sink] - pragma[inline_late] - Node getAValueReachingSinkInline(ApiNode sink) { - result = asSinkInline(getAnEpsilonSuccessorInline(sink)) - } - - /** - * Gets the starting point for forward-tracking at `node`. - * - * Should be used to obtain the successor of an edge when constructing labelled edges. - */ - bindingset[node] - pragma[inline_late] - ApiNode getForwardStartNode(Node node) { result = forwardStartNode(node) } - - /** - * Gets the starting point of backtracking from `node`. - * - * Should be used to obtain the successor of an edge when constructing labelled edges. - */ - bindingset[node] - pragma[inline_late] - ApiNode getBackwardStartNode(Node node) { result = backwardStartNode(node) } - - /** - * Gets a possible ending point of forward-tracking at `node`. - * - * Should be used to obtain the predecessor of an edge when constructing labelled edges. - * - * This is not backed by a `cached` predicate, and should only be used for materialising `cached` - * predicates in the API graph implementation - it should not be called in later stages. - */ - bindingset[node] - pragma[inline_late] - ApiNode getForwardEndNode(Node node) { result = forwardEndNode(node) } - - /** - * Gets a possible ending point backtracking to `node`. - * - * Should be used to obtain the predecessor of an edge when constructing labelled edges. - * - * This is not backed by a `cached` predicate, and should only be used for materialising `cached` - * predicates in the API graph implementation - it should not be called in later stages. - */ - bindingset[node] - pragma[inline_late] - ApiNode getBackwardEndNode(Node node) { result = backwardEndNode(node) } - - /** - * Gets a possible eding point of forward or backward tracking at `node`. - * - * Should be used to obtain the predecessor of an edge generated from store or load edges. - */ - bindingset[node] - pragma[inline_late] - ApiNode getForwardOrBackwardEndNode(Node node) { - result = getForwardEndNode(node) or result = getBackwardEndNode(node) - } - - /** Gets an API node for tracking forward starting at `node`. This is the implementation of `DataFlow::LocalSourceNode.track()` */ - bindingset[node] - pragma[inline_late] - ApiNode getNodeForForwardTracking(Node node) { result = forwardStartNode(node) } - - /** Gets an API node for backtracking starting at `node`. The implementation of `DataFlow::Node.backtrack()`. */ - bindingset[node] - pragma[inline_late] - ApiNode getNodeForBacktracking(Node node) { - result = getBackwardStartNode(getALocalSourceStrict(node)) - } - - /** Parts of the shared module to be re-exported by the user-facing `API` module. */ - module Public { - /** - * The signature to use when instantiating the `ExplainFlow` module. - */ - signature module ExplainFlowSig { - /** Holds if `node` should be a source. */ - predicate isSource(ApiNode node); - - /** Holds if `node` should be a sink. */ - default predicate isSink(ApiNode node) { any() } - - /** Holds if `node` should be skipped in the generated paths. */ - default predicate isHidden(ApiNode node) { none() } - } - - /** - * Module to help debug and visualize the data flows underlying API graphs. - * - * This module exports the query predicates for a path-problem query, and should be imported - * into the top-level of such a query. - * - * The module argument should specify source and sink API nodes, and the resulting query - * will show paths of epsilon edges that go from a source to a sink. Only epsilon edges are visualized. - * - * To condense the output a bit, paths in which the source and sink are the same node are omitted. - */ - module ExplainFlow { - private import T - - private ApiNode relevantNode() { - isSink(result) and - result = getAnEpsilonSuccessorInline(any(ApiNode node | isSource(node))) - or - epsilonEdge(result, relevantNode()) - } - - /** Holds if `node` is part of the graph to visualize. */ - query predicate nodes(ApiNode node) { node = relevantNode() and not isHidden(node) } - - private predicate edgeToHiddenNode(ApiNode pred, ApiNode succ) { - epsilonEdge(pred, succ) and - isHidden(succ) and - pred = relevantNode() and - succ = relevantNode() - } - - /** Holds if `pred -> succ` is an edge in the graph to visualize. */ - query predicate edges(ApiNode pred, ApiNode succ) { - nodes(pred) and - nodes(succ) and - exists(ApiNode mid | - edgeToHiddenNode*(pred, mid) and - epsilonEdge(mid, succ) - ) - } - - /** Holds for each source/sink pair to visualize in the graph. */ - query predicate problems( - ApiNode location, ApiNode sourceNode, ApiNode sinkNode, string message - ) { - nodes(sourceNode) and - nodes(sinkNode) and - isSource(sourceNode) and - isSink(sinkNode) and - sinkNode = getAnEpsilonSuccessorInline(sourceNode) and - sourceNode != sinkNode and - location = sinkNode and - message = "Node flows here" - } - } - } -} diff --git a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll index 001375b4dc54..fb3d7bf828f2 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll @@ -55,9 +55,10 @@ private module Cached { ) } - /** Gets a type tracker with no content and the call bit set to the given value. */ - cached - TypeTracker noContentTypeTracker(boolean hasCall) { result = MkTypeTracker(hasCall, noContent()) } + pragma[nomagic] + private TypeTracker noContentTypeTracker(boolean hasCall) { + result = MkTypeTracker(hasCall, noContent()) + } /** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */ cached @@ -317,8 +318,6 @@ class StepSummary extends TStepSummary { /** Provides predicates for updating step summaries (`StepSummary`s). */ module StepSummary { - predicate append = Cached::append/2; - /** * Gets the summary that corresponds to having taken a forwards * inter-procedural step from `nodeFrom` to `nodeTo`. @@ -379,35 +378,6 @@ module StepSummary { } deprecated predicate localSourceStoreStep = flowsToStoreStep/3; - - /** Gets the step summary for a level step. */ - StepSummary levelStep() { result = LevelStep() } - - /** Gets the step summary for a call step. */ - StepSummary callStep() { result = CallStep() } - - /** Gets the step summary for a return step. */ - StepSummary returnStep() { result = ReturnStep() } - - /** Gets the step summary for storing into `content`. */ - StepSummary storeStep(TypeTrackerContent content) { result = StoreStep(content) } - - /** Gets the step summary for loading from `content`. */ - StepSummary loadStep(TypeTrackerContent content) { result = LoadStep(content) } - - /** Gets the step summary for loading from `load` and then storing into `store`. */ - StepSummary loadStoreStep(TypeTrackerContent load, TypeTrackerContent store) { - result = LoadStoreStep(load, store) - } - - /** Gets the step summary for a step that only permits contents matched by `filter`. */ - StepSummary withContent(ContentFilter filter) { result = WithContent(filter) } - - /** Gets the step summary for a step that blocks contents matched by `filter`. */ - StepSummary withoutContent(ContentFilter filter) { result = WithoutContent(filter) } - - /** Gets the step summary for a jump step. */ - StepSummary jumpStep() { result = JumpStep() } } /** @@ -570,13 +540,6 @@ module TypeTracker { * Gets a valid end point of type tracking. */ TypeTracker end() { result.end() } - - /** - * INTERNAL USE ONLY. - * - * Gets a valid end point of type tracking with the call bit set to the given value. - */ - predicate end = Cached::noContentTypeTracker/1; } pragma[nomagic] diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected b/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected index 5677b16fedbe..43b6490b0522 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/ApiGraphs.expected @@ -1,8 +1,8 @@ classMethodCalls -| test1.rb:58:1:58:8 | ForwardNode(call to m) | -| test1.rb:59:1:59:8 | ForwardNode(call to m) | +| test1.rb:58:1:58:8 | Use getMember("M1").getMember("C1").getMethod("m").getReturn() | +| test1.rb:59:1:59:8 | Use getMember("M2").getMember("C3").getMethod("m").getReturn() | instanceMethodCalls -| test1.rb:61:1:61:12 | ForwardNode(call to m) | -| test1.rb:62:1:62:12 | ForwardNode(call to m) | +| test1.rb:61:1:61:12 | Use getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn() | +| test1.rb:62:1:62:12 | Use getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn() | flowThroughArray | test1.rb:73:1:73:10 | call to m | diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql deleted file mode 100644 index 93b5aaf745ef..000000000000 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.ql +++ /dev/null @@ -1,77 +0,0 @@ -import ruby -import codeql.ruby.ast.internal.TreeSitter -import codeql.ruby.dataflow.internal.AccessPathSyntax -import codeql.ruby.frameworks.data.internal.ApiGraphModels -import codeql.ruby.ApiGraphs -import TestUtilities.InlineExpectationsTest - -class AccessPathFromExpectation extends AccessPath::Range { - AccessPathFromExpectation() { hasExpectationWithValue(_, this) } -} - -API::Node evaluatePath(AccessPath path, int n) { - path instanceof AccessPathFromExpectation and - n = 1 and - exists(AccessPathToken token | token = path.getToken(0) | - token.getName() = "Member" and - result = API::getTopLevelMember(token.getAnArgument()) - or - token.getName() = "Method" and - result = API::getTopLevelCall(token.getAnArgument()) - or - token.getName() = "EntryPoint" and - result = token.getAnArgument().(API::EntryPoint).getANode() - ) - or - result = getSuccessorFromNode(evaluatePath(path, n - 1), path.getToken(n - 1)) - or - result = getSuccessorFromInvoke(evaluatePath(path, n - 1), path.getToken(n - 1)) - or - // TODO this is a workaround, support parsing of Method['[]'] instead - path.getToken(n - 1).getName() = "MethodBracket" and - result = evaluatePath(path, n - 1).getMethod("[]") -} - -API::Node evaluatePath(AccessPath path) { result = evaluatePath(path, path.getNumToken()) } - -module ApiUseTest implements TestSig { - string getARelevantTag() { result = ["source", "sink", "call", "reachableFromSource"] } - - predicate hasActualResult(Location location, string element, string tag, string value) { - // All results are considered optional - none() - } - - predicate hasOptionalResult(Location location, string element, string tag, string value) { - exists(API::Node apiNode, DataFlow::Node dataflowNode | - apiNode = evaluatePath(value) and - ( - tag = "source" and dataflowNode = apiNode.asSource() - or - tag = "reachableFromSource" and dataflowNode = apiNode.getAValueReachableFromSource() - or - tag = "sink" and dataflowNode = apiNode.asSink() - or - tag = "call" and dataflowNode = apiNode.asCall() - ) and - location = dataflowNode.getLocation() and - element = dataflowNode.toString() - ) - } -} - -import MakeTest - -class CustomEntryPointCall extends API::EntryPoint { - CustomEntryPointCall() { this = "CustomEntryPointCall" } - - override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" } -} - -class CustomEntryPointUse extends API::EntryPoint { - CustomEntryPointUse() { this = "CustomEntryPointUse" } - - override DataFlow::LocalSourceNode getASource() { - result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse" - } -} diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb index 35cf4471b483..34c4d17d212a 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/callbacks.rb @@ -1,39 +1,39 @@ -Something.foo.withCallback do |a, b| #$ source=Member[Something].Method[foo].ReturnValue - a.something #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[0].Method[something].ReturnValue - b.somethingElse #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].Argument[block].Argument[1].Method[somethingElse].ReturnValue -end #$ source=Member[Something].Method[foo].ReturnValue.Method[withCallback].ReturnValue +Something.foo.withCallback do |a, b| #$ use=getMember("Something").getMethod("foo").getReturn() + a.something #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(0).getMethod("something").getReturn() + b.somethingElse #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getBlock().getParameter(1).getMethod("somethingElse").getReturn() +end #$ use=getMember("Something").getMethod("foo").getReturn().getMethod("withCallback").getReturn() -Something.withNamedArg do |a:, b: nil| #$ source=Member[Something] - a.something #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[a:].Method[something].ReturnValue - b.somethingElse #$ source=Member[Something].Method[withNamedArg].Argument[block].Parameter[b:].Method[somethingElse].ReturnValue -end #$ source=Member[Something].Method[withNamedArg].ReturnValue +Something.withNamedArg do |a:, b: nil| #$ use=getMember("Something") + a.something #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("a").getMethod("something").getReturn() + b.somethingElse #$ use=getMember("Something").getMethod("withNamedArg").getBlock().getKeywordParameter("b").getMethod("somethingElse").getReturn() +end #$ use=getMember("Something").getMethod("withNamedArg").getReturn() -Something.withLambda ->(a, b) { #$ source=Member[Something] - a.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[0].Method[something].ReturnValue - b.something #$ source=Member[Something].Method[withLambda].Argument[0].Parameter[1].Method[something].ReturnValue -} #$ source=Member[Something].Method[withLambda].ReturnValue +Something.withLambda ->(a, b) { #$ use=getMember("Something") + a.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(0).getMethod("something").getReturn() + b.something #$ use=getMember("Something").getMethod("withLambda").getParameter(0).getParameter(1).getMethod("something").getReturn() +} #$ use=getMember("Something").getMethod("withLambda").getReturn() -Something.namedCallback( #$ source=Member[Something] +Something.namedCallback( #$ use=getMember("Something") onEvent: ->(a, b) { - a.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[0].Method[something].ReturnValue - b.something #$ source=Member[Something].Method[namedCallback].Argument[onEvent:].Parameter[1].Method[something].ReturnValue + a.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(0).getMethod("something").getReturn() + b.something #$ use=getMember("Something").getMethod("namedCallback").getKeywordParameter("onEvent").getParameter(1).getMethod("something").getReturn() } -) #$ source=Member[Something].Method[namedCallback].ReturnValue +) #$ use=getMember("Something").getMethod("namedCallback").getReturn() -Something.nestedCall1 do |a| #$ source=Member[Something] - a.nestedCall2 do |b:| #$ reachableFromSource=Member[Something].Method[nestedCall1].Argument[block].Parameter[0] - b.something #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].Argument[block].Parameter[b:].Method[something].ReturnValue - end #$ source=Member[Something].Method[nestedCall1].Argument[block].Parameter[0].Method[nestedCall2].ReturnValue -end #$ source=Member[Something].Method[nestedCall1].ReturnValue +Something.nestedCall1 do |a| #$ use=getMember("Something") + a.nestedCall2 do |b:| #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0) + b.something #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getBlock().getKeywordParameter("b").getMethod("something").getReturn() + end #$ use=getMember("Something").getMethod("nestedCall1").getBlock().getParameter(0).getMethod("nestedCall2").getReturn() +end #$ use=getMember("Something").getMethod("nestedCall1").getReturn() def getCallback() ->(x) { - x.something #$ source=Member[Something].Method[indirectCallback].Argument[0].Parameter[0].Method[something].ReturnValue + x.something #$ use=getMember("Something").getMethod("indirectCallback").getParameter(0).getParameter(0).getMethod("something").getReturn() } end -Something.indirectCallback(getCallback()) #$ source=Member[Something].Method[indirectCallback].ReturnValue +Something.indirectCallback(getCallback()) #$ use=getMember("Something").getMethod("indirectCallback").getReturn() -Something.withMixed do |a, *args, b| #$ source=Member[Something] - a.something #$ source=Member[Something].Method[withMixed].Argument[block].Parameter[0].Method[something].ReturnValue +Something.withMixed do |a, *args, b| #$ use=getMember("Something") + a.something #$ use=getMember("Something").getMethod("withMixed").getBlock().getParameter(0).getMethod("something").getReturn() # b.something # not currently handled correctly -end #$ source=Member[Something].Method[withMixed].ReturnValue +end #$ use=getMember("Something").getMethod("withMixed").getReturn() diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb deleted file mode 100644 index b0e4f07e7010..000000000000 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/chained-access.rb +++ /dev/null @@ -1,31 +0,0 @@ -def chained_access1 - Something.foo [[[ - 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] - ]]] -end - -def chained_access2 - array = [] - array[0] = [[ - 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] - ]] - Something.foo array -end - -def chained_access3 - array = [[]] - array[0][0] = [ - 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[0].Element[0].Element[0] - ] - Something.foo array -end - -def chained_access4 - Something.foo { - :one => { - :two => { - :three => 'sink' # $ sink=Member[Something].Method[foo].Argument[0].Element[:one].Element[:two].Element[:three] - } - } - } -end diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb deleted file mode 100644 index 63a4b4c3dcea..000000000000 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/method-callbacks.rb +++ /dev/null @@ -1,64 +0,0 @@ -class BaseClass - def inheritedInstanceMethod - yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[inheritedInstanceMethod].Parameter[block].Argument[0] - end - - def self.inheritedSingletonMethod - yield "taint" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[inheritedSingletonMethod].Parameter[block].Argument[0] - end -end - -class ClassWithCallbacks < BaseClass - def instanceMethod - yield "taint" # $ sink=Member[Something].Method[foo].Argument[block].ReturnValue.Method[instanceMethod].Parameter[block].Argument[0] - end - - def self.singletonMethod - yield "bar" # $ sink=Member[Something].Method[bar].Argument[block].ReturnValue.Method[singletonMethod].Parameter[block].Argument[0] - end - - def escapeSelf - Something.baz { self } - end - - def self.escapeSingletonSelf - Something.baz { self } - end - - def self.foo x - x # $ reachableFromSource=Member[BaseClass].Method[foo].Parameter[0] - x # $ reachableFromSource=Member[ClassWithCallbacks].Method[foo].Parameter[0] - x # $ reachableFromSource=Member[Subclass].Method[foo].Parameter[0] - end - - def bar x - x # $ reachableFromSource=Member[BaseClass].Instance.Method[bar].Parameter[0] - x # $ reachableFromSource=Member[ClassWithCallbacks].Instance.Method[bar].Parameter[0] - x # $ reachableFromSource=Member[Subclass].Instance.Method[bar].Parameter[0] - end -end - -class Subclass < ClassWithCallbacks - def instanceMethodInSubclass - yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[instanceMethodInSubclass].Parameter[block].Argument[0] - end - - def self.singletonMethodInSubclass - yield "bar" # $ sink=Member[Something].Method[baz].Argument[block].ReturnValue.Method[singletonMethodInSubclass].Parameter[block].Argument[0] - end -end - -Something.foo { ClassWithCallbacks.new } -Something.bar { ClassWithCallbacks } - -class ClassWithCallMethod - def call x - x # $ reachableFromSource=Method[topLevelMethod].Argument[0].Parameter[0] - "bar" # $ sink=Method[topLevelMethod].Argument[0].ReturnValue - end -end - -topLevelMethod ClassWithCallMethod.new - -blah = topLevelMethod -blah # $ reachableFromSource=Method[topLevelMethod].ReturnValue diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb deleted file mode 100644 index 178cacbe2c07..000000000000 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/self-dot-class.rb +++ /dev/null @@ -1,10 +0,0 @@ -module SelfDotClass - module Mixin - def foo - self.class.bar # $ call=Member[Foo].Method[bar] - end - end - class Subclass < Foo - include Mixin - end -end diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb b/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb index 3af793dd4f79..86b8bce9587b 100644 --- a/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/test1.rb @@ -1,34 +1,34 @@ -MyModule #$ source=Member[MyModule] -print MyModule.foo #$ source=Member[MyModule].Method[foo].ReturnValue -Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue sink=Member[Kernel].Method[print].Argument[0] -Object::Kernel #$ source=Member[Kernel] -Object::Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue +MyModule #$ use=getMember("MyModule") +print MyModule.foo #$ use=getMember("MyModule").getMethod("foo").getReturn() +Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() def=getMember("Kernel").getMethod("print").getParameter(0) +Object::Kernel #$ use=getMember("Kernel") +Object::Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() begin - print MyModule.bar #$ source=Member[MyModule].Method[bar].ReturnValue - raise AttributeError #$ source=Member[AttributeError] -rescue AttributeError => e #$ source=Member[AttributeError] - Kernel.print(e) #$ source=Member[Kernel].Method[print].ReturnValue + print MyModule.bar #$ use=getMember("MyModule").getMethod("bar").getReturn() + raise AttributeError #$ use=getMember("AttributeError") +rescue AttributeError => e #$ use=getMember("AttributeError") + Kernel.print(e) #$ use=getMember("Kernel").getMethod("print").getReturn() end -Unknown.new.run #$ source=Member[Unknown].Method[new].ReturnValue.Method[run].ReturnValue -Foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] +Unknown.new.run #$ use=getMember("Unknown").getMethod("new").getReturn().getMethod("run").getReturn() +Foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") -Const = [1, 2, 3] #$ source=Member[Array].MethodBracket.ReturnValue -Const.each do |c| #$ source=Member[Const] - puts c #$ reachableFromSource=Member[Const].Method[each].Argument[block].Parameter[0] reachableFromSource=Member[Const].Element[any] -end #$ source=Member[Const].Method[each].ReturnValue sink=Member[Const].Method[each].Argument[block] +Const = [1, 2, 3] #$ use=getMember("Array").getMethod("[]").getReturn() +Const.each do |c| #$ use=getMember("Const") + puts c #$ use=getMember("Const").getMethod("each").getBlock().getParameter(0) use=getMember("Const").getContent(element) +end #$ use=getMember("Const").getMethod("each").getReturn() def=getMember("Const").getMethod("each").getBlock() -foo = Foo #$ source=Member[Foo] -foo::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] +foo = Foo #$ use=getMember("Foo") +foo::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") -FooAlias = Foo #$ source=Member[Foo] -FooAlias::Bar::Baz #$ source=Member[Foo].Member[Bar].Member[Baz] source=Member[FooAlias].Member[Bar].Member[Baz] +FooAlias = Foo #$ use=getMember("Foo") +FooAlias::Bar::Baz #$ use=getMember("Foo").getMember("Bar").getMember("Baz") module Outer module Inner end end -Outer::Inner.foo #$ source=Member[Outer].Member[Inner].Method[foo].ReturnValue +Outer::Inner.foo #$ use=getMember("Outer").getMember("Inner").getMethod("foo").getReturn() module M1 class C1 @@ -40,36 +40,36 @@ def m end end -class C2 < M1::C1 #$ source=Member[M1].Member[C1] +class C2 < M1::C1 #$ use=getMember("M1").getMember("C1") end module M2 - class C3 < M1::C1 #$ source=Member[M1].Member[C1] + class C3 < M1::C1 #$ use=getMember("M1").getMember("C1") end - class C4 < C2 #$ source=Member[C2] + class C4 < C2 #$ use=getMember("C2") end end -C2 #$ source=Member[C2] reachableFromSource=Member[M1].Member[C1] -M2::C3 #$ source=Member[M2].Member[C3] reachableFromSource=Member[M1].Member[C1] -M2::C4 #$ source=Member[M2].Member[C4] reachableFromSource=Member[C2] reachableFromSource=Member[M1].Member[C1] +C2 #$ use=getMember("C2") use=getMember("M1").getMember("C1").getASubclass() +M2::C3 #$ use=getMember("M2").getMember("C3") use=getMember("M1").getMember("C1").getASubclass() +M2::C4 #$ use=getMember("M2").getMember("C4") use=getMember("C2").getASubclass() use=getMember("M1").getMember("C1").getASubclass().getASubclass() -M1::C1.m #$ source=Member[M1].Member[C1].Method[m].ReturnValue -M2::C3.m #$ source=Member[M2].Member[C3].Method[m].ReturnValue source=Member[M1].Member[C1].Method[m].ReturnValue +M1::C1.m #$ use=getMember("M1").getMember("C1").getMethod("m").getReturn() +M2::C3.m #$ use=getMember("M2").getMember("C3").getMethod("m").getReturn() use=getMember("M1").getMember("C1").getASubclass().getMethod("m").getReturn() -M1::C1.new.m #$ source=Member[M1].Member[C1].Method[new].ReturnValue.Method[m].ReturnValue -M2::C3.new.m #$ source=Member[M2].Member[C3].Method[new].ReturnValue.Method[m].ReturnValue +M1::C1.new.m #$ use=getMember("M1").getMember("C1").getMethod("new").getReturn().getMethod("m").getReturn() +M2::C3.new.m #$ use=getMember("M2").getMember("C3").getMethod("new").getReturn().getMethod("m").getReturn() -Foo.foo(a,b:c) #$ source=Member[Foo].Method[foo].ReturnValue sink=Member[Foo].Method[foo].Argument[0] sink=Member[Foo].Method[foo].Argument[b:] +Foo.foo(a,b:c) #$ use=getMember("Foo").getMethod("foo").getReturn() def=getMember("Foo").getMethod("foo").getParameter(0) def=getMember("Foo").getMethod("foo").getKeywordParameter("b") def userDefinedFunction(x, y) x.noApiGraph(y) - x.customEntryPointCall(y) #$ call=EntryPoint[CustomEntryPointCall] source=EntryPoint[CustomEntryPointCall].ReturnValue sink=EntryPoint[CustomEntryPointCall].Parameter[0] - x.customEntryPointUse(y) #$ source=EntryPoint[CustomEntryPointUse] + x.customEntryPointCall(y) #$ call=entryPoint("CustomEntryPointCall") use=entryPoint("CustomEntryPointCall").getReturn() rhs=entryPoint("CustomEntryPointCall").getParameter(0) + x.customEntryPointUse(y) #$ use=entryPoint("CustomEntryPointUse") end -array = [A::B::C] #$ source=Member[Array].MethodBracket.ReturnValue -array[0].m #$ source=Member[A].Member[B].Member[C].Method[m].ReturnValue source=Member[Array].MethodBracket.ReturnValue.Element[0].Method[m].ReturnValue +array = [A::B::C] #$ use=getMember("Array").getMethod("[]").getReturn() +array[0].m #$ use=getMember("A").getMember("B").getMember("C").getMethod("m").getReturn() -A::B::C[0] #$ source=Member[A].Member[B].Member[C].Element[0] +A::B::C[0] #$ use=getMember("A").getMember("B").getMember("C").getContent(element_0) diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.expected b/ruby/ql/test/library-tests/dataflow/api-graphs/use.expected similarity index 100% rename from ruby/ql/test/library-tests/dataflow/api-graphs/VerifyApiGraphExpectations.expected rename to ruby/ql/test/library-tests/dataflow/api-graphs/use.expected diff --git a/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql new file mode 100644 index 000000000000..a0c11640ce09 --- /dev/null +++ b/ruby/ql/test/library-tests/dataflow/api-graphs/use.ql @@ -0,0 +1,88 @@ +import codeql.ruby.AST +import codeql.ruby.DataFlow +import TestUtilities.InlineExpectationsTest +import codeql.ruby.ApiGraphs + +class CustomEntryPointCall extends API::EntryPoint { + CustomEntryPointCall() { this = "CustomEntryPointCall" } + + override DataFlow::CallNode getACall() { result.getMethodName() = "customEntryPointCall" } +} + +class CustomEntryPointUse extends API::EntryPoint { + CustomEntryPointUse() { this = "CustomEntryPointUse" } + + override DataFlow::LocalSourceNode getASource() { + result.(DataFlow::CallNode).getMethodName() = "customEntryPointUse" + } +} + +module ApiUseTest implements TestSig { + string getARelevantTag() { result = ["use", "def", "call"] } + + private predicate relevantNode(API::Node a, DataFlow::Node n, Location l, string tag) { + l = n.getLocation() and + ( + tag = "use" and + n = a.getAValueReachableFromSource() + or + tag = "def" and + n = a.asSink() + or + tag = "call" and + n = a.(API::MethodAccessNode).getCallNode() + ) + } + + predicate hasActualResult(Location location, string element, string tag, string value) { + tag = "use" and // def tags are always optional + exists(DataFlow::Node n | relevantNode(_, n, location, tag) | + // Only report the longest path on this line: + value = + max(API::Node a2, Location l2, DataFlow::Node n2 | + relevantNode(a2, n2, l2, tag) and + l2.getFile() = location.getFile() and + l2.getEndLine() = location.getEndLine() + | + a2.getPath() + order by + size(n2.asExpr().getExpr()), a2.getPath().length() desc, a2.getPath() desc + ) and + element = n.toString() + ) + } + + // We also permit optional annotations for any other path on the line. + // This is used to test subclass paths, which typically have a shorter canonical path. + predicate hasOptionalResult(Location location, string element, string tag, string value) { + exists(API::Node a, DataFlow::Node n | relevantNode(a, n, location, tag) | + element = n.toString() and + value = getAPath(a, _) + ) + } +} + +import MakeTest + +private int size(AstNode n) { not n instanceof StmtSequence and result = count(n.getAChild*()) } + +/** + * Gets a path of the given `length` from the root to the given node. + * This is a copy of `API::getAPath()` without the restriction on path length, + * which would otherwise rule out paths involving `getASubclass()`. + */ +string getAPath(API::Node node, int length) { + node instanceof API::Root and + length = 0 and + result = "" + or + exists(API::Node pred, API::Label::ApiLabel lbl, string predpath | + pred.getASuccessor(lbl) = node and + predpath = getAPath(pred, length - 1) and + exists(string dot | if length = 1 then dot = "" else dot = "." | + result = predpath + dot + lbl and + // avoid producing strings longer than 1MB + result.length() < 1000 * 1000 + ) + ) +} diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected index 4f1b0c309203..66da43ab78b9 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected +++ b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.expected @@ -1,8 +1,6 @@ sourceTest | hello_world_server.rb:8:13:8:15 | req | -| hello_world_server.rb:32:18:32:20 | req | ssrfSinkTest | hello_world_client.rb:6:47:6:75 | "http://localhost:8080/twirp" | serviceInstantiationTest | hello_world_server.rb:24:11:24:61 | call to new | -| hello_world_server.rb:38:1:38:57 | call to new | diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql index fee49cbb48c2..4c0494f9100c 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql +++ b/ruby/ql/test/library-tests/frameworks/Twirp/Twirp.ql @@ -5,4 +5,4 @@ query predicate sourceTest(Twirp::UnmarshaledParameter source) { any() } query predicate ssrfSinkTest(Twirp::ServiceUrlAsSsrfSink sink) { any() } -deprecated query predicate serviceInstantiationTest(Twirp::ServiceInstantiation si) { any() } +query predicate serviceInstantiationTest(Twirp::ServiceInstantiation si) { any() } diff --git a/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb b/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb index 7cd117a5843f..1aa0b9aa8def 100644 --- a/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb +++ b/ruby/ql/test/library-tests/frameworks/Twirp/hello_world_server.rb @@ -5,7 +5,7 @@ class HelloWorldHandler # test: request - def hello(req, env) + def hello(req, env) puts ">> Hello #{req.name}" {message: "Hello #{req.name}"} end @@ -13,7 +13,7 @@ def hello(req, env) class FakeHelloWorldHandler # test: !request - def hello(req, env) + def hello(req, env) puts ">> Hello #{req.name}" {message: "Hello #{req.name}"} end @@ -21,18 +21,9 @@ def hello(req, env) handler = HelloWorldHandler.new() # test: serviceInstantiation -service = Example::HelloWorld::HelloWorldService.new(handler) +service = Example::HelloWorld::HelloWorldService.new(handler) path_prefix = "/twirp/" + service.full_name server = WEBrick::HTTPServer.new(Port: 8080) server.mount path_prefix, Rack::Handler::WEBrick, service server.start - -class StaticHandler - def self.hello(req, env) - puts ">> Hello #{req.name}" - {message: "Hello #{req.name}"} - end -end - -Example::HelloWorld::HelloWorldService.new(StaticHandler) diff --git a/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected b/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected index 4eacd48bd603..713273509413 100644 --- a/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected +++ b/ruby/ql/test/library-tests/frameworks/action_dispatch/ActionDispatch.expected @@ -55,12 +55,12 @@ underscore | LotsOfCapitalLetters | lots_of_capital_letters | | invalid | invalid | mimeTypeInstances -| mime_type.rb:2:6:2:28 | ForwardNode(call to fetch) | -| mime_type.rb:3:6:3:32 | ForwardNode(call to new) | -| mime_type.rb:4:6:4:35 | ForwardNode(call to lookup) | -| mime_type.rb:5:6:5:43 | ForwardNode(call to lookup_by_extension) | -| mime_type.rb:6:6:6:47 | ForwardNode(call to register) | -| mime_type.rb:7:6:7:64 | ForwardNode(call to register_alias) | +| mime_type.rb:2:6:2:28 | Use getMember("Mime").getContent(element_text/html) | +| mime_type.rb:3:6:3:32 | Use getMember("Mime").getMember("Type").getMethod("new").getReturn() | +| mime_type.rb:4:6:4:35 | Use getMember("Mime").getMember("Type").getMethod("lookup").getReturn() | +| mime_type.rb:5:6:5:43 | Use getMember("Mime").getMember("Type").getMethod("lookup_by_extension").getReturn() | +| mime_type.rb:6:6:6:47 | Use getMember("Mime").getMember("Type").getMethod("register").getReturn() | +| mime_type.rb:7:6:7:64 | Use getMember("Mime").getMember("Type").getMethod("register_alias").getReturn() | mimeTypeMatchRegExpInterpretations | mime_type.rb:11:11:11:19 | "foo/bar" | | mime_type.rb:12:7:12:15 | "foo/bar" | diff --git a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected index a3ee4ebebf54..0374a54c0c12 100644 --- a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected +++ b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.expected @@ -10,7 +10,6 @@ activeRecordInstances | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:13:5:13:40 | call to find_by | | ActiveRecord.rb:13:5:13:46 | call to users | -| ActiveRecord.rb:35:5:35:51 | call to authenticate | | ActiveRecord.rb:36:5:36:30 | call to find_by_name | | ActiveRecord.rb:55:5:57:7 | if ... | | ActiveRecord.rb:55:43:56:40 | then ... | @@ -108,14 +107,12 @@ activeRecordSqlExecutionRanges | ActiveRecord.rb:19:16:19:24 | condition | | ActiveRecord.rb:28:30:28:44 | ...[...] | | ActiveRecord.rb:29:20:29:42 | "id = '#{...}'" | -| ActiveRecord.rb:30:21:30:45 | call to [] | | ActiveRecord.rb:30:22:30:44 | "id = '#{...}'" | | ActiveRecord.rb:31:16:31:21 | <<-SQL | | ActiveRecord.rb:34:20:34:47 | "user.id = '#{...}'" | | ActiveRecord.rb:46:20:46:32 | ... + ... | | ActiveRecord.rb:52:16:52:28 | "name #{...}" | | ActiveRecord.rb:56:20:56:39 | "username = #{...}" | -| ActiveRecord.rb:68:21:68:44 | ...[...] | | ActiveRecord.rb:106:27:106:76 | "this is an unsafe annotation:..." | activeRecordModelClassMethodCalls | ActiveRecord.rb:2:3:2:17 | call to has_many | @@ -130,6 +127,7 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:31:5:31:35 | call to where | | ActiveRecord.rb:34:5:34:14 | call to where | | ActiveRecord.rb:34:5:34:48 | call to not | +| ActiveRecord.rb:35:5:35:51 | call to authenticate | | ActiveRecord.rb:36:5:36:30 | call to find_by_name | | ActiveRecord.rb:37:5:37:36 | call to not_a_find_by_method | | ActiveRecord.rb:46:5:46:33 | call to delete_by | @@ -137,6 +135,7 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:56:7:56:40 | call to find_by | | ActiveRecord.rb:60:5:60:33 | call to find_by | | ActiveRecord.rb:62:5:62:34 | call to find | +| ActiveRecord.rb:68:5:68:45 | call to delete_by | | ActiveRecord.rb:72:5:72:24 | call to create | | ActiveRecord.rb:76:5:76:66 | call to create | | ActiveRecord.rb:80:5:80:68 | call to create | @@ -153,96 +152,6 @@ activeRecordModelClassMethodCalls | associations.rb:12:3:12:32 | call to has_and_belongs_to_many | | associations.rb:16:3:16:18 | call to belongs_to | | associations.rb:19:11:19:20 | call to new | -| associations.rb:21:9:21:21 | call to posts | -| associations.rb:21:9:21:28 | call to create | -| associations.rb:23:12:23:25 | call to comments | -| associations.rb:23:12:23:32 | call to create | -| associations.rb:25:11:25:22 | call to author | -| associations.rb:27:9:27:21 | call to posts | -| associations.rb:27:9:27:28 | call to create | -| associations.rb:29:1:29:13 | call to posts | -| associations.rb:29:1:29:22 | ... << ... | -| associations.rb:31:1:31:12 | call to author= | -| associations.rb:35:1:35:14 | call to comments | -| associations.rb:35:1:35:21 | call to create | -| associations.rb:35:1:35:28 | call to create | -| associations.rb:37:1:37:13 | call to posts | -| associations.rb:37:1:37:20 | call to reload | -| associations.rb:37:1:37:27 | call to create | -| associations.rb:39:1:39:15 | call to build_tag | -| associations.rb:40:1:40:15 | call to build_tag | -| associations.rb:42:1:42:13 | call to posts | -| associations.rb:42:1:42:25 | call to push | -| associations.rb:43:1:43:13 | call to posts | -| associations.rb:43:1:43:27 | call to concat | -| associations.rb:44:1:44:13 | call to posts | -| associations.rb:44:1:44:19 | call to build | -| associations.rb:45:1:45:13 | call to posts | -| associations.rb:45:1:45:20 | call to create | -| associations.rb:46:1:46:13 | call to posts | -| associations.rb:46:1:46:21 | call to create! | -| associations.rb:47:1:47:13 | call to posts | -| associations.rb:47:1:47:20 | call to delete | -| associations.rb:48:1:48:13 | call to posts | -| associations.rb:48:1:48:24 | call to delete_all | -| associations.rb:49:1:49:13 | call to posts | -| associations.rb:49:1:49:21 | call to destroy | -| associations.rb:50:1:50:13 | call to posts | -| associations.rb:50:1:50:25 | call to destroy_all | -| associations.rb:51:1:51:13 | call to posts | -| associations.rb:51:1:51:22 | call to distinct | -| associations.rb:51:1:51:36 | call to find | -| associations.rb:52:1:52:13 | call to posts | -| associations.rb:52:1:52:19 | call to reset | -| associations.rb:52:1:52:33 | call to find | -| associations.rb:53:1:53:13 | call to posts | -| associations.rb:53:1:53:20 | call to reload | -| associations.rb:53:1:53:34 | call to find | -activeRecordModelClassMethodCallsReplacement -| ActiveRecord.rb:1:1:3:3 | UserGroup | ActiveRecord.rb:2:3:2:17 | call to has_many | -| ActiveRecord.rb:1:1:3:3 | UserGroup | ActiveRecord.rb:13:5:13:40 | call to find_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:6:3:6:24 | call to belongs_to | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:9:5:9:68 | call to find | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:19:5:19:25 | call to destroy_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:28:5:28:45 | call to calculate | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:29:5:29:43 | call to delete_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:30:5:30:46 | call to destroy_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:31:5:31:35 | call to where | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:34:5:34:14 | call to where | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:35:5:35:51 | call to authenticate | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:36:5:36:30 | call to find_by_name | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:37:5:37:36 | call to not_a_find_by_method | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:46:5:46:33 | call to delete_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:52:5:52:29 | call to order | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:56:7:56:40 | call to find_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:60:5:60:33 | call to find_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:62:5:62:34 | call to find | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:68:5:68:45 | call to delete_by | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:72:5:72:24 | call to create | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:76:5:76:66 | call to create | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:80:5:80:68 | call to create | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:84:5:84:16 | call to create | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:88:5:88:27 | call to update | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:92:5:92:69 | call to update | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:96:5:96:71 | call to update | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:102:13:102:54 | call to annotate | -| ActiveRecord.rb:5:1:15:3 | User | ActiveRecord.rb:106:13:106:77 | call to annotate | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:19:5:19:25 | call to destroy_by | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:68:5:68:45 | call to delete_by | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:72:5:72:24 | call to create | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:76:5:76:66 | call to create | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:80:5:80:68 | call to create | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:84:5:84:16 | call to create | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:88:5:88:27 | call to update | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:92:5:92:69 | call to update | -| ActiveRecord.rb:17:1:21:3 | Admin | ActiveRecord.rb:96:5:96:71 | call to update | -| associations.rb:1:1:3:3 | Author | associations.rb:2:3:2:17 | call to has_many | -| associations.rb:1:1:3:3 | Author | associations.rb:19:11:19:20 | call to new | -| associations.rb:5:1:9:3 | Post | associations.rb:6:3:6:20 | call to belongs_to | -| associations.rb:5:1:9:3 | Post | associations.rb:7:3:7:20 | call to has_many | -| associations.rb:5:1:9:3 | Post | associations.rb:8:3:8:31 | call to has_and_belongs_to_many | -| associations.rb:11:1:13:3 | Tag | associations.rb:12:3:12:32 | call to has_and_belongs_to_many | -| associations.rb:15:1:17:3 | Comment | associations.rb:16:3:16:18 | call to belongs_to | potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:19:5:19:25 | call to destroy_by | diff --git a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql index 348ca1456e2f..731679e437bb 100644 --- a/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql +++ b/ruby/ql/test/library-tests/frameworks/active_record/ActiveRecord.ql @@ -9,19 +9,9 @@ query predicate activeRecordInstances(ActiveRecordInstance i) { any() } query predicate activeRecordSqlExecutionRanges(ActiveRecordSqlExecutionRange range) { any() } -deprecated query predicate activeRecordModelClassMethodCalls(ActiveRecordModelClassMethodCall call) { - any() -} - -query predicate activeRecordModelClassMethodCallsReplacement( - ActiveRecordModelClass cls, DataFlow::CallNode call -) { - call = cls.getClassNode().trackModule().getAMethodCall(_) -} +query predicate activeRecordModelClassMethodCalls(ActiveRecordModelClassMethodCall call) { any() } -deprecated query predicate potentiallyUnsafeSqlExecutingMethodCall( - PotentiallyUnsafeSqlExecutingMethodCall call -) { +query predicate potentiallyUnsafeSqlExecutingMethodCall(PotentiallyUnsafeSqlExecutingMethodCall call) { any() } diff --git a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected index e6d3b056971f..a55946c1852c 100644 --- a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected +++ b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.expected @@ -33,13 +33,6 @@ modelInstances | active_resource.rb:26:9:26:14 | people | | active_resource.rb:26:9:26:20 | call to first | | active_resource.rb:27:1:27:5 | alice | -modelInstancesAsSource -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:5:9:5:33 | call to new | -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:8:9:8:22 | call to find | -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:16:1:16:23 | call to new | -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:18:1:18:22 | call to get | -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:24:10:24:26 | call to find | -| active_resource.rb:1:1:3:3 | Person | active_resource.rb:26:9:26:20 | call to first | modelInstanceMethodCalls | active_resource.rb:6:1:6:10 | call to save | | active_resource.rb:9:1:9:13 | call to address= | @@ -57,6 +50,3 @@ collections | active_resource.rb:24:1:24:26 | ... = ... | | active_resource.rb:24:10:24:26 | call to find | | active_resource.rb:26:9:26:14 | people | -collectionSources -| active_resource.rb:23:10:23:19 | call to all | -| active_resource.rb:24:10:24:26 | call to find | diff --git a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql index f1898ddbc985..1f2fd1efcf12 100644 --- a/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql +++ b/ruby/ql/test/library-tests/frameworks/active_resource/ActiveResource.ql @@ -3,8 +3,7 @@ import codeql.ruby.DataFlow import codeql.ruby.frameworks.ActiveResource query predicate modelClasses( - ActiveResource::ModelClassNode c, DataFlow::Node siteAssignCall, - boolean disablesCertificateValidation + ActiveResource::ModelClass c, DataFlow::Node siteAssignCall, boolean disablesCertificateValidation ) { c.getASiteAssignment() = siteAssignCall and if c.disablesCertificateValidation(siteAssignCall) @@ -14,16 +13,8 @@ query predicate modelClasses( query predicate modelClassMethodCalls(ActiveResource::ModelClassMethodCall c) { any() } -deprecated query predicate modelInstances(ActiveResource::ModelInstance c) { any() } - -query predicate modelInstancesAsSource( - ActiveResource::ModelClassNode cls, DataFlow::LocalSourceNode node -) { - node = cls.getAnInstanceReference().asSource() -} +query predicate modelInstances(ActiveResource::ModelInstance c) { any() } query predicate modelInstanceMethodCalls(ActiveResource::ModelInstanceMethodCall c) { any() } -deprecated query predicate collections(ActiveResource::Collection c) { any() } - -query predicate collectionSources(ActiveResource::CollectionSource c) { any() } +query predicate collections(ActiveResource::Collection c) { any() } diff --git a/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected b/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected index 460c7da31454..0eaf24029efb 100644 --- a/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected +++ b/ruby/ql/test/query-tests/security/cwe-079/StoredXSS.expected @@ -3,6 +3,7 @@ edges | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | | app/controllers/foo/stores_controller.rb:9:22:9:23 | dt | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | +| app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:2:9:2:20 | call to display_text | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:5:9:5:21 | call to local_assigns [element :display_text] | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | app/views/foo/stores/show.html.erb:9:9:9:21 | call to local_assigns [element :display_text] | @@ -21,6 +22,7 @@ nodes | app/controllers/foo/stores_controller.rb:8:5:8:6 | dt | semmle.label | dt | | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | semmle.label | call to read | | app/controllers/foo/stores_controller.rb:9:22:9:23 | dt | semmle.label | dt | +| app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | semmle.label | call to raw_name | | app/controllers/foo/stores_controller.rb:13:55:13:56 | dt | semmle.label | dt | | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | semmle.label | call to display_text | | app/views/foo/bars/_widget.html.erb:8:9:8:21 | call to local_assigns [element :display_text] | semmle.label | call to local_assigns [element :display_text] | @@ -37,7 +39,11 @@ nodes | app/views/foo/stores/show.html.erb:40:64:40:87 | ... + ... | semmle.label | ... + ... | | app/views/foo/stores/show.html.erb:40:76:40:87 | call to display_text | semmle.label | call to display_text | | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | semmle.label | call to handle | +| app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | semmle.label | call to raw_name | | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | semmle.label | call to handle | +| app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | semmle.label | call to raw_name | +| app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | semmle.label | call to display_name | +| app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | semmle.label | @other_user_raw_name | | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | semmle.label | call to sprintf | | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | semmle.label | call to handle | subpaths @@ -51,5 +57,9 @@ subpaths | app/views/foo/stores/show.html.erb:32:3:32:14 | call to display_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/views/foo/stores/show.html.erb:32:3:32:14 | call to display_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value | | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | app/views/foo/stores/show.html.erb:37:3:37:16 | @instance_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value | | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:46:5:46:16 | call to handle | stored value | +| app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:49:5:49:18 | call to raw_name | stored value | | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:63:3:63:18 | call to handle | stored value | +| app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:69:3:69:20 | call to raw_name | stored value | +| app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:79:5:79:22 | call to display_name | stored value | +| app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | app/views/foo/stores/show.html.erb:82:5:82:24 | @other_user_raw_name | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | stored value | | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | app/views/foo/stores/show.html.erb:86:3:86:29 | call to sprintf | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:86:17:86:28 | call to handle | stored value | diff --git a/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb b/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb index d8afec1c4324..29656a15a3d3 100644 --- a/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb +++ b/ruby/ql/test/query-tests/security/cwe-079/app/views/foo/stores/show.html.erb @@ -63,7 +63,7 @@ some_user.handle.html_safe %> -<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> +<%# BAD: Indirect to a database value without escaping %> <%= some_user = User.find 1 some_user.raw_name.html_safe @@ -75,10 +75,10 @@ some_user.handle %> -<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> +<%# BAD: Indirect to a database value without escaping %> <%= @user.display_name.html_safe %> -<%# BAD: Indirect to a database value without escaping (currently missed due to lack of 'self' handling in ORM tracking) %> +<%# BAD: Indirect to a database value without escaping %> <%= @other_user_raw_name.html_safe %> <%# BAD: Kernel.sprintf is a taint-step %> diff --git a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected index 161cdcc77516..0cc0d213dcc0 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected +++ b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected @@ -22,7 +22,6 @@ edges | ActiveRecordInjection.rb:70:38:70:50 | ...[...] | ActiveRecordInjection.rb:8:31:8:34 | pass | | ActiveRecordInjection.rb:74:41:74:46 | call to params | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | -| ActiveRecordInjection.rb:79:23:79:28 | call to params | ActiveRecordInjection.rb:79:23:79:35 | ...[...] | | ActiveRecordInjection.rb:83:17:83:22 | call to params | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | | ActiveRecordInjection.rb:84:19:84:24 | call to params | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | | ActiveRecordInjection.rb:88:18:88:23 | call to params | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | @@ -36,7 +35,6 @@ edges | ActiveRecordInjection.rb:103:11:103:17 | ...[...] | ActiveRecordInjection.rb:103:5:103:7 | uid | | ActiveRecordInjection.rb:104:5:104:9 | uidEq | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | | ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | -| ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | ActiveRecordInjection.rb:20:22:20:30 | condition | | ActiveRecordInjection.rb:155:59:155:64 | call to params | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | @@ -104,8 +102,6 @@ nodes | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | semmle.label | "id = '#{...}'" | | ActiveRecordInjection.rb:74:41:74:46 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:74:41:74:51 | ...[...] | semmle.label | ...[...] | -| ActiveRecordInjection.rb:79:23:79:28 | call to params | semmle.label | call to params | -| ActiveRecordInjection.rb:79:23:79:35 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:83:17:83:22 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:84:19:84:24 | call to params | semmle.label | call to params | @@ -127,7 +123,6 @@ nodes | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | semmle.label | ... + ... | | ActiveRecordInjection.rb:141:21:141:26 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | semmle.label | ...[...] | -| ActiveRecordInjection.rb:141:21:141:44 | ...[...] | semmle.label | ...[...] | | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." | | ActiveRecordInjection.rb:155:59:155:64 | call to params | semmle.label | call to params | | ActiveRecordInjection.rb:155:59:155:74 | ...[...] | semmle.label | ...[...] | @@ -177,7 +172,6 @@ subpaths | ActiveRecordInjection.rb:61:16:61:21 | <<-SQL | ActiveRecordInjection.rb:62:21:62:26 | call to params | ActiveRecordInjection.rb:61:16:61:21 | <<-SQL | This SQL query depends on a $@. | ActiveRecordInjection.rb:62:21:62:26 | call to params | user-provided value | | ActiveRecordInjection.rb:68:20:68:47 | "user.id = '#{...}'" | ActiveRecordInjection.rb:68:34:68:39 | call to params | ActiveRecordInjection.rb:68:20:68:47 | "user.id = '#{...}'" | This SQL query depends on a $@. | ActiveRecordInjection.rb:68:34:68:39 | call to params | user-provided value | | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | ActiveRecordInjection.rb:74:41:74:46 | call to params | ActiveRecordInjection.rb:74:32:74:54 | "id = '#{...}'" | This SQL query depends on a $@. | ActiveRecordInjection.rb:74:41:74:46 | call to params | user-provided value | -| ActiveRecordInjection.rb:79:23:79:35 | ...[...] | ActiveRecordInjection.rb:79:23:79:28 | call to params | ActiveRecordInjection.rb:79:23:79:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:79:23:79:28 | call to params | user-provided value | | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | ActiveRecordInjection.rb:83:17:83:22 | call to params | ActiveRecordInjection.rb:83:17:83:31 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:83:17:83:22 | call to params | user-provided value | | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | ActiveRecordInjection.rb:84:19:84:24 | call to params | ActiveRecordInjection.rb:84:19:84:33 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:84:19:84:24 | call to params | user-provided value | | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | user-provided value | @@ -185,7 +179,6 @@ subpaths | ActiveRecordInjection.rb:94:18:94:35 | ...[...] | ActiveRecordInjection.rb:94:18:94:23 | call to params | ActiveRecordInjection.rb:94:18:94:35 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:94:18:94:23 | call to params | user-provided value | | ActiveRecordInjection.rb:96:23:96:47 | ...[...] | ActiveRecordInjection.rb:96:23:96:28 | call to params | ActiveRecordInjection.rb:96:23:96:47 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:96:23:96:28 | call to params | user-provided value | | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | ActiveRecordInjection.rb:102:10:102:15 | call to params | ActiveRecordInjection.rb:108:20:108:32 | ... + ... | This SQL query depends on a $@. | ActiveRecordInjection.rb:102:10:102:15 | call to params | user-provided value | -| ActiveRecordInjection.rb:141:21:141:44 | ...[...] | ActiveRecordInjection.rb:141:21:141:26 | call to params | ActiveRecordInjection.rb:141:21:141:44 | ...[...] | This SQL query depends on a $@. | ActiveRecordInjection.rb:141:21:141:26 | call to params | user-provided value | | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:155:59:155:64 | call to params | ActiveRecordInjection.rb:155:27:155:76 | "this is an unsafe annotation:..." | This SQL query depends on a $@. | ActiveRecordInjection.rb:155:59:155:64 | call to params | user-provided value | | ActiveRecordInjection.rb:168:37:168:41 | query | ActiveRecordInjection.rb:173:5:173:10 | call to params | ActiveRecordInjection.rb:168:37:168:41 | query | This SQL query depends on a $@. | ActiveRecordInjection.rb:173:5:173:10 | call to params | user-provided value | | ActiveRecordInjection.rb:177:43:177:104 | "SELECT * FROM users WHERE id ..." | ActiveRecordInjection.rb:173:5:173:10 | call to params | ActiveRecordInjection.rb:177:43:177:104 | "SELECT * FROM users WHERE id ..." | This SQL query depends on a $@. | ActiveRecordInjection.rb:173:5:173:10 | call to params | user-provided value | @@ -196,4 +189,4 @@ subpaths | PgInjection.rb:20:22:20:25 | qry2 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:20:22:20:25 | qry2 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | | PgInjection.rb:21:28:21:31 | qry2 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:21:28:21:31 | qry2 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | | PgInjection.rb:32:29:32:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:32:29:32:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | -| PgInjection.rb:44:29:44:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:44:29:44:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | +| PgInjection.rb:44:29:44:32 | qry3 | PgInjection.rb:6:12:6:17 | call to params | PgInjection.rb:44:29:44:32 | qry3 | This SQL query depends on a $@. | PgInjection.rb:6:12:6:17 | call to params | user-provided value | \ No newline at end of file diff --git a/shared/util/codeql/util/test/InlineExpectationsTest.qll b/shared/util/codeql/util/test/InlineExpectationsTest.qll index ec2f30c41196..83c6a851f7df 100644 --- a/shared/util/codeql/util/test/InlineExpectationsTest.qll +++ b/shared/util/codeql/util/test/InlineExpectationsTest.qll @@ -501,19 +501,6 @@ module Make { class FalseNegativeExpectation = LegacyTest::FalseNegativeTestExpectation; class InvalidExpectation = LegacyTest::InvalidTestExpectation; - - /** - * Holds if the expectation `tag=value` is found in one or more expectation comments. - * - * This can be used when writing tests where the set of possible values must be known in advance, - * for example, when testing a predicate for which `value` is part of the binding set. - */ - predicate hasExpectationWithValue(string tag, string value) { - exists(string tags | - getAnExpectation(_, _, _, tags, value) and - tag = tags.splitAt(",") - ) - } } /**