Skip to content

python: make content sets an IPA type #19407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class TypeTracker extends Impl::TypeTracker {
*/
predicate startInAttr(string attrName) {
exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName |
this.startInContent(content)
this.startInContent(DataFlowPublic::TSingletonContent(content))
)
}

Expand All @@ -58,8 +58,10 @@ class TypeTracker extends Impl::TypeTracker {
* Gets the attribute associated with this type tracker.
*/
string getAttr() {
if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent
then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute()
if this.getContent().asSome().asSingleton() instanceof DataFlowPublic::AttributeContent
then
result =
this.getContent().asSome().asSingleton().(DataFlowPublic::AttributeContent).getAttribute()
else result = ""
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -679,13 +679,13 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
* no reason to include steps for list content right now.
*/
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
tupleStoreStep(nodeFrom, c, nodeTo)
tupleStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
dictStoreStep(nodeFrom, c, nodeTo)
dictStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
moreDictStoreSteps(nodeFrom, c, nodeTo)
moreDictStoreSteps(nodeFrom, c.asSingleton(), nodeTo)
or
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
iterableUnpackingStoreStep(nodeFrom, c.asSingleton(), nodeTo)
}

/**
Expand All @@ -695,26 +695,26 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
storeStepCommon(nodeFrom, c, nodeTo)
or
listStoreStep(nodeFrom, c, nodeTo)
listStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
setStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
attributeStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
matchStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c.asSingleton(), nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
synthDictSplatArgumentNodeStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
yieldStoreStep(nodeFrom, c, nodeTo)
yieldStoreStep(nodeFrom, c.asSingleton(), nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
VariableCapture::storeStep(nodeFrom, c.asSingleton(), nodeTo)
}

/**
Expand Down Expand Up @@ -911,9 +911,9 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
* Subset of `readStep` that should be shared with type-tracking.
*/
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
subscriptReadStep(nodeFrom, c.asSingleton(), nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
iterableUnpackingReadStep(nodeFrom, c.asSingleton(), nodeTo)
}

/**
Expand All @@ -922,18 +922,18 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
matchReadStep(nodeFrom, c.asSingleton(), nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
forReadStep(nodeFrom, c.asSingleton(), nodeTo)
or
attributeReadStep(nodeFrom, c, nodeTo)
attributeReadStep(nodeFrom, c.asSingleton(), nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
synthDictSplatParameterNodeReadStep(nodeFrom, c.asSingleton(), nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
VariableCapture::readStep(nodeFrom, c.asSingleton(), nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
Expand Down Expand Up @@ -995,17 +995,17 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet c) {
matchClearStep(n, c)
matchClearStep(n, c.asSingleton())
or
attributeClearStep(n, c)
attributeClearStep(n, c.asSingleton())
or
dictClearStep(n, c)
dictClearStep(n, c.asSingleton())
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c)
or
dictSplatParameterNodeClearStep(n, c)
dictSplatParameterNodeClearStep(n, c.asSingleton())
or
VariableCapture::clearsContent(n, c)
VariableCapture::clearsContent(n, c.asSingleton())
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -769,19 +769,27 @@
override string getMaDRepresentation() { none() }
}

newtype TContentSet = TSingletonContent(Content c)

Check warning on line 772 in python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

View workflow job for this annotation

GitHub Actions / qldoc

Missing QLdoc for newtype-branch DataFlowPublic::TSingletonContent

Check warning on line 772 in python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

View workflow job for this annotation

GitHub Actions / qldoc

Missing QLdoc for newtype DataFlowPublic::TContentSet

/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet instanceof Content {
class ContentSet extends TContentSet {
/** Holds if this content set is the singleton `{c}`. */
predicate isSingleton(Content c) { this = TSingletonContent(c) }

/** Gets the singleton `Content` underlying this `ContentSet`, if any. */
Content asSingleton() { this.isSingleton(result) }

/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { result = this }
Content getAStoreContent() { this.isSingleton(result) }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { result = this }
Content getAReadContent() { this.isSingleton(result) }

/** Gets a textual representation of this content set. */
string toString() { result = super.toString() }
string toString() { result = any(Content c | this.isSingleton(c)).toString() }
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,23 @@ module Input implements InputSig<Location, DataFlowImplSpecific::PythonDataFlow>
}

string encodeContent(ContentSet cs, string arg) {
cs = TListElementContent() and result = "ListElement" and arg = ""
or
cs = TSetElementContent() and result = "SetElement" and arg = ""
or
exists(int index |
cs = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
)
or
exists(string key |
cs = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
exists(Content c | cs.isSingleton(c) |
c = TListElementContent() and result = "ListElement" and arg = ""
or
c = TSetElementContent() and result = "SetElement" and arg = ""
or
exists(int index |
c = TTupleElementContent(index) and result = "TupleElement" and arg = index.toString()
)
or
exists(string key |
c = TDictionaryElementContent(key) and result = "DictionaryElement" and arg = key
)
or
c = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
or
exists(string attr | c = TAttributeContent(attr) and result = "Attribute" and arg = attr)
)
or
cs = TDictionaryElementAnyContent() and result = "DictionaryElementAny" and arg = ""
or
exists(string attr | cs = TAttributeContent(attr) and result = "Attribute" and arg = attr)
}

bindingset[token]
Expand Down Expand Up @@ -132,27 +134,35 @@ module Private {
predicate withContent = SC::withContent/1;

/** Gets a summary component that represents a list element. */
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
SummaryComponent listElement() { result = content(TSingletonContent(TListElementContent())) }

/** Gets a summary component that represents a set element. */
SummaryComponent setElement() { result = content(any(SetElementContent c)) }
SummaryComponent setElement() { result = content(TSingletonContent(TSetElementContent())) }

/** Gets a summary component that represents a tuple element. */
SummaryComponent tupleElement(int index) {
exists(TupleElementContent c | c.getIndex() = index and result = content(c))
exists(TupleElementContent c |
c.getIndex() = index and result = content(TSingletonContent(c))
)
}

/** Gets a summary component that represents a dictionary element. */
SummaryComponent dictionaryElement(string key) {
exists(DictionaryElementContent c | c.getKey() = key and result = content(c))
exists(DictionaryElementContent c |
c.getKey() = key and result = content(TSingletonContent(c))
)
}

/** Gets a summary component that represents a dictionary element at any key. */
SummaryComponent dictionaryElementAny() { result = content(any(DictionaryElementAnyContent c)) }
SummaryComponent dictionaryElementAny() {
result = content(TSingletonContent(TDictionaryElementAnyContent()))
}

/** Gets a summary component that represents an attribute element. */
SummaryComponent attribute(string attr) {
exists(AttributeContent c | c.getAttribute() = attr and result = content(c))
exists(AttributeContent c |
c.getAttribute() = attr and result = content(TSingletonContent(c))
)
}

/** Gets a summary component that represents the return value of a call. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,19 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {

class LocalSourceNode = DataFlowPublic::LocalSourceNode;

class Content extends DataFlowPublic::Content {
class Content extends DataFlowPublic::ContentSet {
Content() {
// TODO: for now, it's not 100% clear if should support non-precise content in
// type-tracking, or if it will lead to bad results. We start with only allowing
// precise content, which should always be a good improvement! It also simplifies
// the process of examining new results from non-precise content steps in the
// future, since you will _only_ have to look over the results from the new
// non-precise steps.
this instanceof DataFlowPublic::AttributeContent
this.asSingleton() instanceof DataFlowPublic::AttributeContent
or
this instanceof DataFlowPublic::DictionaryElementContent
this.asSingleton() instanceof DataFlowPublic::DictionaryElementContent
or
this instanceof DataFlowPublic::TupleElementContent
this.asSingleton() instanceof DataFlowPublic::TupleElementContent
}
}

Expand Down Expand Up @@ -212,7 +212,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
*/
predicate storeStep(Node nodeFrom, Node nodeTo, Content content) {
exists(DataFlowPublic::AttrWrite a, string attrName |
content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
content.asSingleton().(DataFlowPublic::AttributeContent).getAttribute() = attrName and
a.mayHaveAttributeName(attrName) and
nodeFrom = a.getValue() and
nodeTo = a.getObject()
Expand Down Expand Up @@ -242,7 +242,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
*/
predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) {
exists(DataFlowPublic::AttrRead a, string attrName |
content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and
content.asSingleton().(DataFlowPublic::AttributeContent).getAttribute() = attrName and
a.mayHaveAttributeName(attrName) and
nodeFrom = a.getObject() and
nodeTo = a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ module EscapingCaptureFlowConfig implements DataFlow::ConfigSig {

predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
isSink(node) and
(
cs.(DataFlow::TupleElementContent).getIndex() in [0 .. 10] or
cs instanceof DataFlow::ListElementContent or
cs instanceof DataFlow::SetElementContent or
cs instanceof DataFlow::DictionaryElementAnyContent
exists(DataFlow::Content c | cs = DataFlow::TSingletonContent(c) |
c.(DataFlow::TupleElementContent).getIndex() in [0 .. 10] or
c instanceof DataFlow::ListElementContent or
c instanceof DataFlow::SetElementContent or
c instanceof DataFlow::DictionaryElementAnyContent
)
}
}
Expand Down
Loading