Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,9 @@ module TaintedWithPath {
/** Gets the element that `pathNode` wraps, if any. */
Element getElementFromPathNode(PathNode pathNode) {
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
result = node.asExpr() or
result = node.asParameter()
result = node.asInstruction().getAST()
or
result = node.asOperand().getDef().getAST()
)
or
result = pathNode.(EndpointPathNode).inner()
Expand Down
28 changes: 12 additions & 16 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
or
// Flow into, through, and out of store nodes
StoreNodeFlow::flowInto(nodeFrom, nodeTo)
StoreNodeFlow::flowInto(nodeFrom.asInstruction(), nodeTo)
or
StoreNodeFlow::flowThrough(nodeFrom, nodeTo)
or
Expand All @@ -831,18 +831,11 @@ private predicate adjacentDefUseFlow(Node nodeFrom, Node nodeTo) {
//Def-use flow
Ssa::ssaFlow(nodeFrom, nodeTo)
or
exists(Instruction loadAddress | loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) |
// Use-use flow through reads
exists(Node address |
Ssa::addressFlowTC(address.asInstruction(), loadAddress) and
Ssa::ssaFlow(address, nodeTo)
)
or
// Use-use flow through stores.
exists(Node store |
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
Ssa::ssaFlow(store, nodeTo)
)
// Use-use flow through stores.
exists(Instruction loadAddress, Node store |
loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) and
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
Ssa::ssaFlow(store, nodeTo)
)
)
}
Expand Down Expand Up @@ -906,10 +899,13 @@ private module ReadNodeFlow {
}
}

private module StoreNodeFlow {
/**
* INTERNAL: Do not use.
*/
module StoreNodeFlow {
/** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */
predicate flowInto(Node nodeFrom, StoreNode nodeTo) {
nodeTo.flowInto(Ssa::getDestinationAddress(nodeFrom.asInstruction()))
predicate flowInto(Instruction instrFrom, StoreNode nodeTo) {
nodeTo.flowInto(Ssa::getDestinationAddress(instrFrom))
}

/** Holds if the store node `nodeTo` should receive flow from `nodeFom`. */
Expand Down
79 changes: 59 additions & 20 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,6 @@ Instruction getDestinationAddress(Instruction instr) {
]
}

class ReferenceToInstruction extends CopyValueInstruction {
ReferenceToInstruction() {
this.getResultType() instanceof Cpp::ReferenceType and
not this.getUnary().getResultType() instanceof Cpp::ReferenceType
}

Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }

Operand getSourceAddressOperand() { result = this.getUnaryOperand() }
}

/** Gets the source address of `instr` if it is an instruction that behaves like a `LoadInstruction`. */
Instruction getSourceAddress(Instruction instr) { result = getSourceAddressOperand(instr).getDef() }

Expand All @@ -266,11 +255,7 @@ Operand getSourceAddressOperand(Instruction instr) {
result =
[
instr.(LoadInstruction).getSourceAddressOperand(),
instr.(ReadSideEffectInstruction).getArgumentOperand(),
// `ReferenceToInstruction` is really more of an address-of operation,
// but by including it in this list we break out of `flowOutOfAddressStep` at an
// instruction that, at the source level, looks like a use of a variable.
instr.(ReferenceToInstruction).getSourceAddressOperand()
instr.(ReadSideEffectInstruction).getArgumentOperand()
]
}

Expand All @@ -295,10 +280,6 @@ Operand getSourceValueOperand(Instruction instr) {
result = instr.(LoadInstruction).getSourceValueOperand()
or
result = instr.(ReadSideEffectInstruction).getSideEffectOperand()
or
// See the comment on the `ReferenceToInstruction` disjunct in `getSourceAddressOperand` for why
// this case is included.
result = instr.(ReferenceToInstruction).getSourceValueOperand()
}

/**
Expand Down Expand Up @@ -513,6 +494,64 @@ private module Cached {
explicitWrite(false, storeNode.getStoreInstruction(), def)
)
or
// The destination of a store operation has undergone lvalue-to-rvalue conversion and is now a
// right-hand-side of a store operation.
// Find the next use of the variable in that store operation, and recursively find the load of that
// pointer. For example, consider this case:
//
// ```cpp
// int x = source();
// int* p = &x;
// sink(*p);
// ```
//
// if we want to find the load of the address of `x`, we see that the pointer is stored into `p`,
// and we then need to recursively look for the load of `p`.
exists(
Def def, StoreInstruction store, IRBlock block1, int rnk1, Use use, IRBlock block2, int rnk2
|
store = def.getInstruction() and
store.getSourceValueOperand() = operand and
def.hasRankInBlock(block1, rnk1) and
use.hasRankInBlock(block2, rnk2) and
adjacentDefRead(_, block1, rnk1, block2, rnk2)
|
// The shared SSA library has determined that `use` is the next use of the operand
// so we find the next load of that use (but only if there is no `PostUpdateNode`) we
// need to flow into first.
not StoreNodeFlow::flowInto(store, _) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
or
// It may also be the case that `store` gives rise to another store step. So let's make sure that
// we also take those into account.
StoreNodeFlow::flowInto(store, nodeTo)
)
or
// As we find the next load of an address, we might come across another use of the same variable.
// In that case, we recursively find the next use of _that_ operand, and continue searching for
// the next load of that operand. For example, consider this case:
//
// ```cpp
// int x = source();
// use(&x);
// int* p = &x;
// sink(*p);
// ```
//
// The next use of `x` after its definition is `use(&x)`, but there is a later load of the address
// of `x` that we want to flow to. So we use the shared SSA library to find the next load.
not operand = getSourceAddressOperand(_) and
exists(Use use1, Use use2, IRBlock block1, int rnk1, IRBlock block2, int rnk2 |
use1.getOperand() = operand and
use1.hasRankInBlock(block1, rnk1) and
// Don't flow to the next use if this use is part of a store operation that totally
// overrides a variable.
not explicitWrite(true, _, use1.getOperand().getDef()) and
adjacentDefRead(_, block1, rnk1, block2, rnk2) and
use2.hasRankInBlock(block2, rnk2) and
flowOutOfAddressStep(use2.getOperand(), nodeTo)
)
or
operand = getSourceAddressOperand(nodeTo.asInstruction())
or
exists(ReturnIndirectionInstruction ret |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ using SinkFunction = void (*)(int);

void notSink(int notSinkParam);

void callsSink(int sinkParam) { // $ ir-path=31:28 ir-path=32:31 ir-path=34:22
sink(sinkParam); // $ ir-sink=31:28 ir-sink=32:31 ir-sink=34:22 ast=31:28 ast=32:31 ast=34:22 MISSING: ast,ir=28
void callsSink(int sinkParam) { // $ ir-path=31:23 ir-path=32:26 ir-path=34:17
sink(sinkParam); // $ ast=31:28 ast=32:31 ast=34:22 ir-sink
}

struct {
Expand All @@ -25,7 +25,7 @@ void assignGlobals() {
};

void testStruct() {
globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ ir MISSING: ast
globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ MISSING: ir-path,ast
globalStruct.notSinkPtr(atoi(getenv("TAINTED"))); // clean

globalUnion.sinkPtr(atoi(getenv("TAINTED"))); // $ ast ir-path
Expand All @@ -48,8 +48,8 @@ class D2 : public D1 {

class D3 : public D2 {
public:
void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 ir-path=73:30
sink(p); // $ ir-sink=58:10 ir-sink=60:17 ir-sink=61:28 ir-sink=62:29 ir-sink=63:33 ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 SPURIOUS: ast=73:30 ir-sink=73:30
void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 SPURIOUS: ir-path=73:30
sink(p); // $ ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 ir-sink SPURIOUS: ast=73:30
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,18 @@ class SourceConfiguration extends TaintedWithPath::TaintTrackingConfiguration {
override predicate isSink(Element e) { isSinkArgument(e) }
}

predicate irTaint(Element source, Element sink, string tag) {
exists(TaintedWithPath::PathNode sinkNode, TaintedWithPath::PathNode predNode |
predicate irTaint(Element source, TaintedWithPath::PathNode predNode, string tag) {
exists(TaintedWithPath::PathNode sinkNode |
TaintedWithPath::taintedWithPath(source, _, _, sinkNode) and
predNode = getAPredecessor*(sinkNode) and
sink = getElementFromPathNode(predNode) and
// Make sure the path is actually reachable from this predecessor.
// Otherwise, we could pick `predNode` to be b when `source` is
// `source1` in this dataflow graph:
// source1 ---> a ---> c ---> sinkNode
// ^
// source2 ---> b --/
source = getElementFromPathNode(getAPredecessor*(predNode)) and
if sinkNode = predNode then tag = "ir-sink" else tag = "ir-path"
if predNode = sinkNode then tag = "ir-sink" else tag = "ir-path"
)
}

Expand All @@ -45,21 +44,25 @@ class IRDefaultTaintTrackingTest extends InlineExpectationsTest {
override string getARelevantTag() { result = ["ir-path", "ir-sink"] }

override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Element source, Element tainted, int n |
irTaint(source, tainted, tag) and
n = strictcount(Element otherSource | irTaint(otherSource, tainted, _)) and
(
n = 1 and value = ""
or
// If there is more than one source for this sink
// we specify the source location explicitly.
n > 1 and
exists(Element source, Element elem, TaintedWithPath::PathNode node, int n |
irTaint(source, node, tag) and
elem = getElementFromPathNode(node) and
n = count(int startline | getAPredecessor(node).hasLocationInfo(_, startline, _, _, _)) and
location = elem.getLocation() and
element = elem.toString()
|
// Zero predecessors means it's a source, and 1 predecessor means it has a unique predecessor.
// In either of these cases we leave out the location.
n = [0, 1] and value = ""
or
// If there is more than one predecessor for this node
// we specify the source location explicitly.
n > 1 and
exists(TaintedWithPath::PathNode pred | pred = getAPredecessor(node) |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this also check irTaint(source, pred, tag)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I think it should.

value =
source.getLocation().getStartLine().toString() + ":" +
source.getLocation().getStartColumn()
) and
location = tainted.getLocation() and
element = tainted.toString()
getElementFromPathNode(pred).getLocation().getStartLine().toString() + ":" +
getElementFromPathNode(pred).getLocation().getStartColumn()
)
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ struct S {
}
};

void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=98:18
sink(a); // $ ast=96:26 ast=98:18 ir-sink=96:26 ir-sink=98:18
void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=102:26
sink(a); // $ ast=96:26 ast=98:18 ir-sink
}

extern int i;
Expand All @@ -27,7 +27,7 @@ class BaseWithPureVirtual {
class DerivedCallsSink : public BaseWithPureVirtual {
public:
void f(const char* p) override { // $ ir-path
sink(p); // $ ir-sink ast=108:10 SPURIOUS: ast=111:10
sink(p); // $ ast=108:10 ir-sink SPURIOUS: ast=111:10
}
};

Expand All @@ -49,16 +49,16 @@ class DerivedDoesNotCallSinkDiamond2 : virtual public BaseWithPureVirtual {
};

class DerivesMultiple : public DerivedCallsSinkDiamond1, public DerivedDoesNotCallSinkDiamond2 {
void f(const char* p) override { // $ ir-path
DerivedCallsSinkDiamond1::f(p);
void f(const char* p) override { // $ ir-path=53:37 ir-path=115:11
DerivedCallsSinkDiamond1::f(p); // $ ir-path
}
};

template<typename T>
class CRTP {
public:
void f(const char* p) { // $ ir-path
static_cast<T*>(this)->g(p);
static_cast<T*>(this)->g(p); // $ ir-path
}
};

Expand All @@ -79,7 +79,7 @@ class Derived2 : public Derived1 {
class Derived3 : public Derived2 {
public:
void f(const char* p) override { // $ ir-path=124:19 ir-path=126:43 ir-path=128:44
sink(p); // $ ast,ir-sink=124:19 ast,ir-sink=126:43 ast,ir-sink=128:44
sink(p); // $ ast=124:19 ast=126:43 ast=128:44 ir-sink
}
};

Expand All @@ -97,11 +97,11 @@ int main(int argc, char *argv[]) {

char*** p = &argv; // $ ast,ir-path

sink(*p[0]); // $ ast,ir-sink
sink(*p[0]); // $ ast ir-sink=96:26 ir-sink=98:18

calls_sink_with_argv(*p[i]); // $ MISSING: ast,ir-path
calls_sink_with_argv(*p[i]); // $ ir-path=96:26 ir-path=98:18 MISSING:ast

sink(*(argv + 1)); // $ ast,ir-path ir-sink
sink(*(argv + 1)); // $ ast ir-path ir-sink

BaseWithPureVirtual* b = new DerivedCallsSink;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,9 @@ void test_pointers1()
sink(ptr1); // $ ast MISSING: ir
sink(ptr2); // $ SPURIOUS: ast
sink(*ptr2); // $ ast MISSING: ir
sink(ptr3); // $ ast MISSING: ir
sink(ptr4); // $ SPURIOUS: ast
sink(*ptr4); // $ ast MISSING: ir
sink(ptr3); // $ ast,ir
sink(ptr4); // $ SPURIOUS: ast,ir
sink(*ptr4); // $ ast,ir
}

void test_pointers2()
Expand Down
Loading