Skip to content

Java: Add data-flow consistency checks. #2921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions config/identical-files.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
"java/ql/src/semmle/code/java/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
"java/ql/src/semmle/code/java/dataflow/internal/tainttracking2/TaintTrackingImpl.qll"
],
"DataFlow Java/C++/C# Consistency checks": [
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplConsistency.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplConsistency.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll",
"csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowImplConsistency.qll"
],
"C++ SubBasicBlocks": [
"cpp/ql/src/semmle/code/cpp/controlflow/SubBasicBlocks.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/SubBasicBlocks.qll"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/

private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public

module Consistency {
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
this instanceof ParameterNode or
this instanceof ReturnNode or
this = getAnOutNode(_, _) or
simpleLocalFlowStep(this, _) or
simpleLocalFlowStep(_, this) or
jumpStep(this, _) or
jumpStep(_, this) or
storeStep(this, _, _) or
storeStep(_, _, this) or
readStep(this, _, _) or
readStep(_, _, this) or
defaultAdditionalTaintStep(this, _) or
defaultAdditionalTaintStep(_, this)
}
}

query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getEnclosingCallable()) and
c != 1 and
msg = "Node should have one enclosing callable but has " + c + "."
)
}

query predicate uniqueTypeBound(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getTypeBound()) and
c != 1 and
msg = "Node should have one type bound but has " + c + "."
)
}

query predicate uniqueTypeRepr(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(getErasedRepr(n.getTypeBound())) and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could use getErasedNodeTypeBound here instead...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That requires an additional import of DataFlowImplCommon, so perhaps just leave as-is?

c != 1 and
msg = "Node should have one type representation but has " + c + "."
)
}

query predicate uniqueNodeLocation(Node n, string msg) {
exists(int c |
c =
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
msg = "Node should have one location but has " + c + "."
)
}

query predicate missingLocation(string msg) {
exists(int c |
c =
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
msg = "Nodes without location: " + c
)
}

query predicate uniqueNodeToString(Node n, string msg) {
exists(int c |
c = count(n.toString()) and
c != 1 and
msg = "Node should have one toString but has " + c + "."
)
}

query predicate missingToString(string msg) {
exists(int c |
c = strictcount(Node n | not exists(n.toString())) and
msg = "Nodes without toString: " + c
)
}

query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
msg = "Callable mismatch for parameter."
}

query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
msg = "Local flow step does not preserve enclosing callable."
}

private DataFlowType typeRepr() { result = getErasedRepr(any(Node n).getTypeBound()) }

query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
t = typeRepr() and
not compatibleTypes(t, t) and
msg = "Type compatibility predicate is not reflexive."
}

query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = n.getEnclosingCallable() and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
}

query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
(
n = getAnOutNode(call, _) and
msg = "OutNode and call does not share enclosing callable."
or
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
n.getEnclosingCallable() != call.getEnclosingCallable()
}

query predicate postIsNotPre(PostUpdateNode n, string msg) {
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you comment briefly on what happens if this condition is violated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Flow that exits a method call through a side-effect on an argument may re-enter the method. E.g. in the following you'll get wrong flow from source to sink if you have a call to someMethod where the PostUpdateNode for the argument equals the ArgumentNode:

someMethod(A a) {
  sink(a.f);
  a.f = source();
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense.

}

query predicate postHasUniquePre(PostUpdateNode n, string msg) {
exists(int c |
c = count(n.getPreUpdateNode()) and
c != 1 and
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One example is that a store to a PostUpdateNode that doesn't have a corresponding pre-update won't be able to exit through a parameter: in someMethod(A a) { a.f = source(); } the store targets the post-update read of a in the qualifier of a.f, combined with local value flow from the ParameterNode to the corresponding pre-update node, the store is observed as a side-effect of the method and can then exit through the parameter and reach a PostUpdateNode of an argument in a call to someMethod. I don't really have an overview of what would happen if a PostUpdateNode has multiple pre-update nodes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I was mostly asking about the case of multiple pre-update nodes. Glad to hear it's not known to mess things up massively.

)
}

query predicate uniquePostUpdate(Node n, string msg) {
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
msg = "Node has multiple PostUpdateNodes."
}

query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
msg = "PostUpdateNode does not share callable with its pre-update node."
}

private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }

query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
msg = "Origin of readStep is missing a PostUpdateNode."
}

query predicate storeIsPostUpdate(Node n, string msg) {
storeStep(_, _, n) and
not n instanceof PostUpdateNode and
msg = "Store targets should be PostUpdateNodes."
}

query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,12 @@ class DataFlowCall extends Expr {
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } // stub implementation

int accessPathLimit() { result = 5 }

/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { none() }
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/**
* Provides consistency queries for checking invariants in the language-specific
* data-flow classes and predicates.
*/

private import DataFlowImplSpecific::Private
private import DataFlowImplSpecific::Public
private import tainttracking1.TaintTrackingParameter::Private
private import tainttracking1.TaintTrackingParameter::Public

module Consistency {
private class RelevantNode extends Node {
RelevantNode() {
this instanceof ArgumentNode or
this instanceof ParameterNode or
this instanceof ReturnNode or
this = getAnOutNode(_, _) or
simpleLocalFlowStep(this, _) or
simpleLocalFlowStep(_, this) or
jumpStep(this, _) or
jumpStep(_, this) or
storeStep(this, _, _) or
storeStep(_, _, this) or
readStep(this, _, _) or
readStep(_, _, this) or
defaultAdditionalTaintStep(this, _) or
defaultAdditionalTaintStep(_, this)
}
}

query predicate uniqueEnclosingCallable(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getEnclosingCallable()) and
c != 1 and
msg = "Node should have one enclosing callable but has " + c + "."
)
}

query predicate uniqueTypeBound(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(n.getTypeBound()) and
c != 1 and
msg = "Node should have one type bound but has " + c + "."
)
}

query predicate uniqueTypeRepr(Node n, string msg) {
exists(int c |
n instanceof RelevantNode and
c = count(getErasedRepr(n.getTypeBound())) and
c != 1 and
msg = "Node should have one type representation but has " + c + "."
)
}

query predicate uniqueNodeLocation(Node n, string msg) {
exists(int c |
c =
count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
) and
c != 1 and
msg = "Node should have one location but has " + c + "."
)
}

query predicate missingLocation(string msg) {
exists(int c |
c =
strictcount(Node n |
not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
)
) and
msg = "Nodes without location: " + c
)
}

query predicate uniqueNodeToString(Node n, string msg) {
exists(int c |
c = count(n.toString()) and
c != 1 and
msg = "Node should have one toString but has " + c + "."
)
}

query predicate missingToString(string msg) {
exists(int c |
c = strictcount(Node n | not exists(n.toString())) and
msg = "Nodes without toString: " + c
)
}

query predicate parameterCallable(ParameterNode p, string msg) {
exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
msg = "Callable mismatch for parameter."
}

query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
simpleLocalFlowStep(n1, n2) and
n1.getEnclosingCallable() != n2.getEnclosingCallable() and
msg = "Local flow step does not preserve enclosing callable."
}

private DataFlowType typeRepr() { result = getErasedRepr(any(Node n).getTypeBound()) }

query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
t = typeRepr() and
not compatibleTypes(t, t) and
msg = "Type compatibility predicate is not reflexive."
}

query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
isUnreachableInCall(n, call) and
exists(DataFlowCallable c |
c = n.getEnclosingCallable() and
not viableCallable(call) = c
) and
msg = "Call context for isUnreachableInCall is inconsistent with call graph."
}

query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
(
n = getAnOutNode(call, _) and
msg = "OutNode and call does not share enclosing callable."
or
n.(ArgumentNode).argumentOf(call, _) and
msg = "ArgumentNode and call does not share enclosing callable."
) and
n.getEnclosingCallable() != call.getEnclosingCallable()
}

query predicate postIsNotPre(PostUpdateNode n, string msg) {
n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
}

query predicate postHasUniquePre(PostUpdateNode n, string msg) {
exists(int c |
c = count(n.getPreUpdateNode()) and
c != 1 and
msg = "PostUpdateNode should have one pre-update node but has " + c + "."
)
}

query predicate uniquePostUpdate(Node n, string msg) {
1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
msg = "Node has multiple PostUpdateNodes."
}

query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
msg = "PostUpdateNode does not share callable with its pre-update node."
}

private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }

query predicate reverseRead(Node n, string msg) {
exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
msg = "Origin of readStep is missing a PostUpdateNode."
}

query predicate storeIsPostUpdate(Node n, string msg) {
storeStep(_, _, n) and
not n instanceof PostUpdateNode and
msg = "Store targets should be PostUpdateNodes."
}

query predicate argHasPostUpdate(ArgumentNode n, string msg) {
not hasPost(n) and
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,12 @@ class DataFlowCall extends CallInstruction {
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } // stub implementation

int accessPathLimit() { result = 5 }

/**
* Holds if `n` does not require a `PostUpdateNode` as it either cannot be
* modified or its modification cannot be observed, for example if it is a
* freshly created object that is not saved in a variable.
*
* This predicate is only used for consistency checks.
*/
predicate isImmutableOrUnobservable(Node n) { none() }
Loading