Skip to content

C++: Wire up param/arg indirections in data flow #3123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ private DataFlow::Node getNodeForSource(Expr source) {
// to `gets`. It's impossible here to tell which is which, but the "access
// to argv" source is definitely not intended to match an output argument,
// and it causes false positives if we let it.
//
// This case goes together with the similar (but not identical) rule in
// `nodeIsBarrierIn`.
result = DataFlow::definitionByReferenceNode(source) and
not argv(source.(VariableAccess).getTarget())
)
Expand Down Expand Up @@ -202,7 +205,13 @@ private predicate nodeIsBarrier(DataFlow::Node node) {

private predicate nodeIsBarrierIn(DataFlow::Node node) {
// don't use dataflow into taint sources, as this leads to duplicate results.
node = getNodeForSource(any(Expr e))
exists(Expr source | isUserInput(source, _) |
node = DataFlow::exprNode(source)
or
// This case goes together with the similar (but not identical) rule in
// `getNodeForSource`.
node = DataFlow::definitionByReferenceNode(source)
)
}

cached
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,28 @@ private import DataFlowDispatch
* to the callable. Instance arguments (`this` pointer) are also included.
*/
class ArgumentNode extends InstructionNode {
ArgumentNode() { exists(CallInstruction call | this.getInstruction() = call.getAnArgument()) }
ArgumentNode() {
exists(CallInstruction call |
instr = call.getAnArgument()
or
instr.(ReadSideEffectInstruction).getPrimaryInstruction() = call
)
}

/**
* Holds if this argument occurs at the given position in the given call.
* The instance argument is considered to have index `-1`.
*/
predicate argumentOf(DataFlowCall call, int pos) {
this.getInstruction() = call.getPositionalArgument(pos)
instr = call.getPositionalArgument(pos)
or
this.getInstruction() = call.getThisArgument() and pos = -1
instr = call.getThisArgument() and pos = -1
or
exists(ReadSideEffectInstruction read |
read = instr and
read.getPrimaryInstruction() = call and
pos = getArgumentPosOfSideEffect(read.getIndex())
)
}

/** Gets the call in which this node is an argument. */
Expand Down
115 changes: 99 additions & 16 deletions cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class Node extends TIRDataFlowNode {
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }

/** Gets the parameter corresponding to this node, if any. */
/** Gets the positional parameter corresponding to this node, if any. */
Parameter asParameter() { result = this.(ExplicitParameterNode).getParameter() }

/**
Expand Down Expand Up @@ -156,44 +156,90 @@ class ExprNode extends InstructionNode {
}

/**
* A node representing a `Parameter`. This includes both explicit parameters such
* as `x` in `f(x)` and implicit parameters such as `this` in `x.f()`
* INTERNAL: do not use. Translates a parameter/argument index into a negative
* number that denotes the index of its side effect (pointer indirection).
*/
bindingset[index]
int getArgumentPosOfSideEffect(int index) {
// -1 -> -2
// 0 -> -3
// 1 -> -4
// ...
result = -3 - index
}

/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph. This includes both explicit parameters such as `x` in `f(x)`
* and implicit parameters such as `this` in `x.f()`.
*
* To match a specific kind of parameter, consider using one of the subclasses
* `ExplicitParameterNode`, `ThisParameterNode`, or
* `ParameterIndirectionNode`.
*/
class ParameterNode extends InstructionNode {
override InitializeParameterInstruction instr;
ParameterNode() {
// To avoid making this class abstract, we enumerate its values here
instr instanceof InitializeParameterInstruction
or
instr instanceof InitializeIndirectionInstruction
}

/**
* Holds if this node is the parameter of `c` at the specified (zero-based)
* position. The implicit `this` parameter is considered to have index `-1`.
* Holds if this node is the parameter of `f` at the specified position. The
* implicit `this` parameter is considered to have position `-1`, and
* pointer-indirection parameters are at further negative positions.
*/
predicate isParameterOf(Function f, int i) { none() } // overriden by subclasses
predicate isParameterOf(Function f, int pos) { none() } // overridden by subclasses
}

/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
/** An explicit positional parameter, not including `this` or `...`. */
private class ExplicitParameterNode extends ParameterNode {
override InitializeParameterInstruction instr;

ExplicitParameterNode() { exists(instr.getParameter()) }

override predicate isParameterOf(Function f, int i) { f.getParameter(i) = instr.getParameter() }
override predicate isParameterOf(Function f, int pos) {
f.getParameter(pos) = instr.getParameter()
}

/** Gets the parameter corresponding to this node. */
/** Gets the `Parameter` associated with this node. */
Parameter getParameter() { result = instr.getParameter() }

override string toString() { result = instr.getParameter().toString() }
}

private class ThisParameterNode extends ParameterNode {
/** An implicit `this` parameter. */
class ThisParameterNode extends ParameterNode {
override InitializeParameterInstruction instr;

ThisParameterNode() { instr.getIRVariable() instanceof IRThisVariable }

override predicate isParameterOf(Function f, int i) {
i = -1 and instr.getEnclosingFunction() = f
override predicate isParameterOf(Function f, int pos) {
pos = -1 and instr.getEnclosingFunction() = f
}

override string toString() { result = "this" }
}

/** A synthetic parameter to model the pointed-to object of a pointer parameter. */
class ParameterIndirectionNode extends ParameterNode {
override InitializeIndirectionInstruction instr;

override predicate isParameterOf(Function f, int pos) {
exists(int index |
f.getParameter(index) = instr.getParameter()
or
index = -1 and
instr.getIRVariable().(IRThisVariable).getEnclosingFunction() = f
|
pos = getArgumentPosOfSideEffect(index)
)
}

override string toString() { result = "*" + instr.getIRVariable().toString() }
}

/**
* DEPRECATED: Data flow was never an accurate way to determine what
* expressions might be uninitialized. It errs on the side of saying that
Expand Down Expand Up @@ -341,6 +387,18 @@ class DefinitionByReferenceNode extends InstructionNode {
}
}

/**
* A node representing the memory pointed to by a function argument.
*
* This class exists only in order to override `toString`, which would
* otherwise be the default implementation inherited from `InstructionNode`.
*/
private class ArgumentIndirectionNode extends InstructionNode {
override ReadSideEffectInstruction instr;

override string toString() { result = "Argument " + instr.getIndex() + " indirection" }
}

/**
* A `Node` corresponding to a variable in the program, as opposed to the
* value of that variable at some particular point. This can be used for
Expand Down Expand Up @@ -442,6 +500,31 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
or
iTo.(PhiInstruction).getAnOperand().getDef() = iFrom
or
// A read side effect is almost never exact since we don't know exactly how
// much memory the callee will read.
iTo.(ReadSideEffectInstruction).getSideEffectOperand().getAnyDef() = iFrom and
not iFrom.isResultConflated()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this rule. It sends flow into a ReadSideEffectInstruction, which actually means there is flow to the result value of that instruction. But it's an instruction without a result!

The rule is here because we need a node to be the ArgumentNode in DataFlowPrivate.qll, and I've chosen the node for the ReadSideEffectInstruction. The alternatives I can see are:

  1. Omit this rule and use the most recent definition of the indirection (here named iFrom) as the ArgumentNode. Then a node could be the argument for multiple calls, leading to confusing path explanations.
  2. Create a new synthetic node for this purpose.
  3. Change the data-flow library so that flow alternates between Instruction and Operand nodes. The list of good reasons to do this is starting to get long.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can see the effect on path explanations in argvLocal.expected, where many new nodes with the awkward name BufferReadSideEffect are now there.

or
// Loading a single `int` from an `int *` parameter is not an exact load since
// the parameter may point to an entire array rather than a single `int`. The
// following rule ensures that any flow going into the
// `InitializeIndirectionInstruction`, even if it's for a different array
// element, will propagate to a load of the first element.
//
// Since we're linking `InitializeIndirectionInstruction` and
// `LoadInstruction` together directly, this rule will break if there's any
// reassignment of the parameter indirection, including a conditional one that
// leads to a phi node.
exists(InitializeIndirectionInstruction init |
iFrom = init and
iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = init and
// Check that the types match. Otherwise we can get flow from an object to
// its fields, which leads to field conflation when there's flow from other
// fields to the object elsewhere.
init.getParameter().getType().getUnspecifiedType().(DerivedType).getBaseType() =
iTo.getResultType().getUnspecifiedType()
Comment on lines +521 to +525
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we still get flow from object to field when the type of the field is equal to the parameter type? I'm thinking about a situation with a binary tree like this:

struct Tree { Tree *left, *right; };

Tree source();
void sink(Tree*);

void read_left_subtree(Tree* tree) {
  sink(tree->left);
}
...
Tree tree = source();
read_left_subtree(&tree);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I'll add a test to find out. But even if the answer is that such conflation is possible, does that mean it's undesirable? If we want conflation between array indexes, don't we then also want conflation between entries in a tree or a linked list?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. It's probably reasonable to have object to field flow in such situations.

)
or
// Treat all conversions as flow, even conversions between different numeric types.
iTo.(ConvertInstruction).getUnary() = iFrom
or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,35 @@ void test_conflated_fields3() {
taint_y(&xy);
sink(xy.x); // not tainted
}

struct Point {
int x;
int y;

void callSink() {
sink(this->x); // tainted
sink(this->y); // not tainted
}
};

void test_conflated_fields1() {
Point p;
p.x = getenv("VAR")[0];
sink(p.x); // tainted
sink(p.y); // not tainted
p.callSink();
}

void taint_x(Point *pp) {
pp->x = getenv("VAR")[0];
}

void y_to_sink(Point *pp) {
sink(pp->y); // not tainted
}

void test_conflated_fields2() {
Point p;
taint_x(&p);
y_to_sink(&p);
}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | (int)... |
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | access to array |
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:12:111:18 | tainted |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:126:16:126:16 | x |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:14 | call to getenv |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:24 | (int)... |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:24 | access to array |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:134:10:134:10 | x |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | shared.h:6:15:6:23 | sinkparam |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:16 | call to getenv |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:26 | (int)... |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:26 | access to array |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:143:23:143:24 | pp |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:144:8:144:9 | pp |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:150:13:150:14 | & ... |
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:24:28:27 | call to atoi |
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:34 | call to getenv |
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:45 | (const char *)... |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | p2 | IR only |
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | shared.h:5:23:5:31 | sinkparam | IR only |
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:8:111:8 | y | AST only |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:126:16:126:16 | x | IR only |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:5:133:5 | x | AST only |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:134:10:134:10 | x | IR only |
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | shared.h:6:15:6:23 | sinkparam | IR only |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:7:140:7 | x | AST only |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:143:23:143:24 | pp | IR only |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:144:8:144:9 | pp | IR only |
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:150:13:150:14 | & ... | IR only |
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:13:5:13:11 | global1 | AST only |
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:23:5:23:11 | global2 | AST only |
| test_diff.cpp:104:12:104:15 | argv | test_diff.cpp:104:11:104:20 | (...) | IR only |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ void following_pointers(

sourceStruct1_ptr->m1 = source();
sink(sourceStruct1_ptr->m1); // flow
sink(sourceStruct1_ptr->getFirst()); // flow [NOT DETECTED with IR]
sink(sourceStruct1_ptr->getFirst()); // flow
sink(sourceStruct1_ptr->m2); // no flow
sink(sourceStruct1.m1); // no flow

Expand Down
2 changes: 1 addition & 1 deletion cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ class FlowThroughFields {
}

int calledAfterTaint() {
sink(field); // tainted [NOT DETECTED with IR]
sink(field); // tainted
}

int taintAndCall() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
| BarrierGuard.cpp:49:10:49:15 | BarrierGuard.cpp:51:13:51:13 | AST only |
| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:62:14:62:14 | AST only |
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
| clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only |
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |
| dispatch.cpp:16:37:16:42 | dispatch.cpp:32:16:32:24 | IR only |
| dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only |
Expand All @@ -19,11 +18,7 @@
| dispatch.cpp:144:8:144:13 | dispatch.cpp:96:8:96:8 | IR only |
| globals.cpp:13:23:13:28 | globals.cpp:12:10:12:24 | IR only |
| globals.cpp:23:23:23:28 | globals.cpp:19:10:19:24 | IR only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:14:3:14:6 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:18:8:18:8 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:21:3:21:6 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:29:3:29:6 | AST only |
| lambdas.cpp:8:10:8:15 | lambdas.cpp:41:8:41:8 | AST only |
| lambdas.cpp:43:7:43:12 | lambdas.cpp:46:7:46:7 | AST only |
| ref.cpp:29:11:29:16 | ref.cpp:62:10:62:11 | AST only |
| ref.cpp:53:9:53:10 | ref.cpp:56:10:56:11 | AST only |
Expand All @@ -39,7 +34,6 @@
| test.cpp:109:9:109:14 | test.cpp:110:10:110:12 | IR only |
| test.cpp:347:17:347:22 | test.cpp:349:10:349:18 | AST only |
| test.cpp:359:13:359:18 | test.cpp:365:10:365:14 | AST only |
| test.cpp:373:13:373:18 | test.cpp:369:10:369:14 | AST only |
| test.cpp:399:7:399:9 | test.cpp:401:8:401:10 | AST only |
| test.cpp:405:7:405:9 | test.cpp:408:8:408:10 | AST only |
| test.cpp:416:7:416:11 | test.cpp:418:8:418:12 | AST only |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
| clang.cpp:18:8:18:19 | (const int *)... | clang.cpp:12:9:12:20 | sourceArray1 |
| clang.cpp:18:8:18:19 | sourceArray1 | clang.cpp:12:9:12:20 | sourceArray1 |
| clang.cpp:29:27:29:28 | m1 | clang.cpp:28:27:28:32 | call to source |
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |
| clang.cpp:41:18:41:19 | m2 | clang.cpp:39:42:39:47 | call to source |
| clang.cpp:45:17:45:18 | m2 | clang.cpp:43:35:43:40 | call to source |
Expand All @@ -38,7 +39,11 @@
| globals.cpp:6:10:6:14 | local | globals.cpp:5:17:5:22 | call to source |
| globals.cpp:12:10:12:24 | flowTestGlobal1 | globals.cpp:13:23:13:28 | call to source |
| globals.cpp:19:10:19:24 | flowTestGlobal2 | globals.cpp:23:23:23:28 | call to source |
| lambdas.cpp:14:3:14:6 | t | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:18:8:18:8 | call to operator() | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:29:3:29:6 | t | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:35:8:35:8 | a | lambdas.cpp:8:10:8:15 | call to source |
| lambdas.cpp:41:8:41:8 | (reference dereference) | lambdas.cpp:8:10:8:15 | call to source |
| ref.cpp:123:13:123:15 | val | ref.cpp:122:23:122:28 | call to source |
| ref.cpp:126:13:126:15 | val | ref.cpp:125:19:125:24 | call to source |
| ref.cpp:129:13:129:15 | val | ref.cpp:94:15:94:20 | call to source |
Expand All @@ -65,6 +70,7 @@
| test.cpp:266:12:266:12 | x | test.cpp:265:22:265:27 | call to source |
| test.cpp:289:14:289:14 | x | test.cpp:305:17:305:22 | call to source |
| test.cpp:318:7:318:7 | x | test.cpp:314:4:314:9 | call to source |
| test.cpp:369:10:369:14 | field | test.cpp:373:13:373:18 | call to source |
| test.cpp:375:10:375:14 | field | test.cpp:373:13:373:18 | call to source |
| test.cpp:385:8:385:10 | tmp | test.cpp:382:48:382:54 | source1 |
| test.cpp:392:8:392:10 | tmp | test.cpp:388:53:388:59 | source1 |
Expand Down
2 changes: 1 addition & 1 deletion cpp/ql/test/library-tests/dataflow/fields/A.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class A
{
if (C1 *c1 = dynamic_cast<C1 *>(c))
{
sink(c1->a); // $ast $f-:ir
sink(c1->a); // $ast $ir
}
C *cc;
if (C2 *c2 = dynamic_cast<C2 *>(c))
Expand Down
2 changes: 1 addition & 1 deletion cpp/ql/test/library-tests/dataflow/fields/by_reference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void test_setThroughNonMember() {
void test_nonMemberSetA() {
S s;
nonMemberSetA(&s, user_input());
sink(nonMemberGetA(&s)); // $ast $f-:ir
sink(nonMemberGetA(&s)); // $ast,ir
}

////////////////////
Expand Down
Loading