From 2c14b412d718d81ca9eedd54191cb0ff7ad19215 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 24 Apr 2025 18:20:16 +0200 Subject: [PATCH] python: make content sets an IPA type this should have no semantic consequences --- .../python/dataflow/new/TypeTracking.qll | 8 +-- .../dataflow/new/internal/DataFlowPrivate.qll | 50 +++++++++---------- .../dataflow/new/internal/DataFlowPublic.qll | 16 ++++-- .../dataflow/new/internal/FlowSummaryImpl.qll | 50 +++++++++++-------- .../new/internal/TypeTrackingImpl.qll | 12 ++--- .../LoopVariableCaptureQuery.qll | 10 ++-- 6 files changed, 83 insertions(+), 63 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll index 9d21be852fdc..1614cae949c0 100644 --- a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll @@ -48,7 +48,7 @@ class TypeTracker extends Impl::TypeTracker { */ predicate startInAttr(string attrName) { exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName | - this.startInContent(content) + this.startInContent(DataFlowPublic::TSingletonContent(content)) ) } @@ -58,8 +58,10 @@ class TypeTracker extends Impl::TypeTracker { * Gets the attribute associated with this type tracker. */ string getAttr() { - if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent - then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute() + if this.getContent().asSome().asSingleton() instanceof DataFlowPublic::AttributeContent + then + result = + this.getContent().asSome().asSingleton().(DataFlowPublic::AttributeContent).getAttribute() else result = "" } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index b29be706c4fc..a367637aebec 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -679,13 +679,13 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { * no reason to include steps for list content right now. */ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { - tupleStoreStep(nodeFrom, c, nodeTo) + tupleStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - dictStoreStep(nodeFrom, c, nodeTo) + dictStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - moreDictStoreSteps(nodeFrom, c, nodeTo) + moreDictStoreSteps(nodeFrom, c.asSingleton(), nodeTo) or - iterableUnpackingStoreStep(nodeFrom, c, nodeTo) + iterableUnpackingStoreStep(nodeFrom, c.asSingleton(), nodeTo) } /** @@ -695,26 +695,26 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { storeStepCommon(nodeFrom, c, nodeTo) or - listStoreStep(nodeFrom, c, nodeTo) + listStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - setStoreStep(nodeFrom, c, nodeTo) + setStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - attributeStoreStep(nodeFrom, c, nodeTo) + attributeStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - matchStoreStep(nodeFrom, c, nodeTo) + matchStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo) + any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c.asSingleton(), nodeTo) or FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c, nodeTo.(FlowSummaryNode).getSummaryNode()) or - synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo) + synthStarArgsElementParameterNodeStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo) + synthDictSplatArgumentNodeStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - yieldStoreStep(nodeFrom, c, nodeTo) + yieldStoreStep(nodeFrom, c.asSingleton(), nodeTo) or - VariableCapture::storeStep(nodeFrom, c, nodeTo) + VariableCapture::storeStep(nodeFrom, c.asSingleton(), nodeTo) } /** @@ -911,9 +911,9 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) { * Subset of `readStep` that should be shared with type-tracking. */ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { - subscriptReadStep(nodeFrom, c, nodeTo) + subscriptReadStep(nodeFrom, c.asSingleton(), nodeTo) or - iterableUnpackingReadStep(nodeFrom, c, nodeTo) + iterableUnpackingReadStep(nodeFrom, c.asSingleton(), nodeTo) } /** @@ -922,18 +922,18 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) { readStepCommon(nodeFrom, c, nodeTo) or - matchReadStep(nodeFrom, c, nodeTo) + matchReadStep(nodeFrom, c.asSingleton(), nodeTo) or - forReadStep(nodeFrom, c, nodeTo) + forReadStep(nodeFrom, c.asSingleton(), nodeTo) or - attributeReadStep(nodeFrom, c, nodeTo) + attributeReadStep(nodeFrom, c.asSingleton(), nodeTo) or FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c, nodeTo.(FlowSummaryNode).getSummaryNode()) or - synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo) + synthDictSplatParameterNodeReadStep(nodeFrom, c.asSingleton(), nodeTo) or - VariableCapture::readStep(nodeFrom, c, nodeTo) + VariableCapture::readStep(nodeFrom, c.asSingleton(), nodeTo) } /** Data flows from a sequence to a subscript of the sequence. */ @@ -995,17 +995,17 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo) * in `x.f = newValue`. */ predicate clearsContent(Node n, ContentSet c) { - matchClearStep(n, c) + matchClearStep(n, c.asSingleton()) or - attributeClearStep(n, c) + attributeClearStep(n, c.asSingleton()) or - dictClearStep(n, c) + dictClearStep(n, c.asSingleton()) or FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c) or - dictSplatParameterNodeClearStep(n, c) + dictSplatParameterNodeClearStep(n, c.asSingleton()) or - VariableCapture::clearsContent(n, c) + VariableCapture::clearsContent(n, c.asSingleton()) } /** diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index ffecbcba57ac..baa92124bc7e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -769,19 +769,27 @@ class CapturedVariableContent extends Content, TCapturedVariableContent { override string getMaDRepresentation() { none() } } +newtype TContentSet = TSingletonContent(Content c) + /** * An entity that represents a set of `Content`s. * * The set may be interpreted differently depending on whether it is * stored into (`getAStoreContent`) or read from (`getAReadContent`). */ -class ContentSet instanceof Content { +class ContentSet extends TContentSet { + /** Holds if this content set is the singleton `{c}`. */ + predicate isSingleton(Content c) { this = TSingletonContent(c) } + + /** Gets the singleton `Content` underlying this `ContentSet`, if any. */ + Content asSingleton() { this.isSingleton(result) } + /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { result = this } + Content getAStoreContent() { this.isSingleton(result) } /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { result = this } + Content getAReadContent() { this.isSingleton(result) } /** Gets a textual representation of this content set. */ - string toString() { result = super.toString() } + string toString() { result = any(Content c | this.isSingleton(c)).toString() } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll index f7fdf84549e6..afba9202744b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll @@ -62,21 +62,23 @@ module Input implements InputSig } string encodeContent(ContentSet cs, string arg) { - cs = TListElementContent() and result = "ListElement" and arg = "" - or - cs = TSetElementContent() and result = "SetElement" and arg = "" - or - exists(int index | - cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString() - ) - or - exists(string key | - cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key + exists(Content c | cs.isSingleton(c) | + c = TListElementContent() and result = "ListElement" and arg = "" + or + c = TSetElementContent() and result = "SetElement" and arg = "" + or + exists(int index | + c = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString() + ) + or + exists(string key | + c = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key + ) + or + c = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = "" + or + exists(string attr | c = TAttributeContent(attr) and result = "Attribute" and arg = attr) ) - or - cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = "" - or - exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr) } bindingset[token] @@ -132,27 +134,35 @@ module Private { predicate withContent = SC::withContent/1; /** Gets a summary component that represents a list element. */ - SummaryComponent listElement() { result = content(any(ListElementContent c)) } + SummaryComponent listElement() { result = content(TSingletonContent(TListElementContent())) } /** Gets a summary component that represents a set element. */ - SummaryComponent setElement() { result = content(any(SetElementContent c)) } + SummaryComponent setElement() { result = content(TSingletonContent(TSetElementContent())) } /** Gets a summary component that represents a tuple element. */ SummaryComponent tupleElement(int index) { - exists(TupleElementContent c | c.getIndex() = index and result = content(c)) + exists(TupleElementContent c | + c.getIndex() = index and result = content(TSingletonContent(c)) + ) } /** Gets a summary component that represents a dictionary element. */ SummaryComponent dictionaryElement(string key) { - exists(DictionaryElementContent c | c.getKey() = key and result = content(c)) + exists(DictionaryElementContent c | + c.getKey() = key and result = content(TSingletonContent(c)) + ) } /** Gets a summary component that represents a dictionary element at any key. */ - SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) } + SummaryComponent dictionaryElementAny() { + result = content(TSingletonContent(TDictionaryElementAnyContent())) + } /** Gets a summary component that represents an attribute element. */ SummaryComponent attribute(string attr) { - exists(AttributeContent c | c.getAttribute() = attr and result = content(c)) + exists(AttributeContent c | + c.getAttribute() = attr and result = content(TSingletonContent(c)) + ) } /** Gets a summary component that represents the return value of a call. */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 09d50253e05a..dcf73a17c3b5 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -111,7 +111,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { class LocalSourceNode = DataFlowPublic::LocalSourceNode; - class Content extends DataFlowPublic::Content { + class Content extends DataFlowPublic::ContentSet { Content() { // TODO: for now, it's not 100% clear if should support non-precise content in // type-tracking, or if it will lead to bad results. We start with only allowing @@ -119,11 +119,11 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { // the process of examining new results from non-precise content steps in the // future, since you will _only_ have to look over the results from the new // non-precise steps. - this instanceof DataFlowPublic::AttributeContent + this.asSingleton() instanceof DataFlowPublic::AttributeContent or - this instanceof DataFlowPublic::DictionaryElementContent + this.asSingleton() instanceof DataFlowPublic::DictionaryElementContent or - this instanceof DataFlowPublic::TupleElementContent + this.asSingleton() instanceof DataFlowPublic::TupleElementContent } } @@ -212,7 +212,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { */ predicate storeStep(Node nodeFrom, Node nodeTo, Content content) { exists(DataFlowPublic::AttrWrite a, string attrName | - content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and + content.asSingleton().(DataFlowPublic::AttributeContent).getAttribute() = attrName and a.mayHaveAttributeName(attrName) and nodeFrom = a.getValue() and nodeTo = a.getObject() @@ -242,7 +242,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { */ predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) { exists(DataFlowPublic::AttrRead a, string attrName | - content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and + content.asSingleton().(DataFlowPublic::AttributeContent).getAttribute() = attrName and a.mayHaveAttributeName(attrName) and nodeFrom = a.getObject() and nodeTo = a diff --git a/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll b/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll index 7f25701cac8e..82fc28307e4f 100644 --- a/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll +++ b/python/ql/src/Variables/LoopVariableCapture/LoopVariableCaptureQuery.qll @@ -58,11 +58,11 @@ module EscapingCaptureFlowConfig implements DataFlow::ConfigSig { predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) { isSink(node) and - ( - cs.(DataFlow::TupleElementContent).getIndex() in [0 .. 10] or - cs instanceof DataFlow::ListElementContent or - cs instanceof DataFlow::SetElementContent or - cs instanceof DataFlow::DictionaryElementAnyContent + exists(DataFlow::Content c | cs = DataFlow::TSingletonContent(c) | + c.(DataFlow::TupleElementContent).getIndex() in [0 .. 10] or + c instanceof DataFlow::ListElementContent or + c instanceof DataFlow::SetElementContent or + c instanceof DataFlow::DictionaryElementAnyContent ) } }