github · geoffw0 · Nov 23, 2021 · Nov 9, 2021 · Nov 11, 2021 · Nov 22, 2021
@@ -484,8 +484,9 @@ module TaintedWithPath {
     /** Gets the element that `pathNode` wraps, if any. */
     Element getElementFromPathNode(PathNode pathNode) {
       exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
-        result = node.asExpr() or
-        result = node.asParameter()
+        result = node.asInstruction().getAST()
+        or
+        result = node.asOperand().getDef().getAST()
       )
       or
       result = pathNode.(EndpointPathNode).inner()

@@ -806,7 +806,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
   simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
   or
   // Flow into, through, and out of store nodes
-  StoreNodeFlow::flowInto(nodeFrom, nodeTo)
+  StoreNodeFlow::flowInto(nodeFrom.asInstruction(), nodeTo)
   or
   StoreNodeFlow::flowThrough(nodeFrom, nodeTo)
   or
@@ -831,18 +831,11 @@ private predicate adjacentDefUseFlow(Node nodeFrom, Node nodeTo) {
     //Def-use flow
     Ssa::ssaFlow(nodeFrom, nodeTo)
     or
-    exists(Instruction loadAddress | loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) |
-      // Use-use flow through reads
-      exists(Node address |
-        Ssa::addressFlowTC(address.asInstruction(), loadAddress) and
-        Ssa::ssaFlow(address, nodeTo)
-      )
-      or
-      // Use-use flow through stores.
-      exists(Node store |
-        Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
-        Ssa::ssaFlow(store, nodeTo)
-      )
+    // Use-use flow through stores.
+    exists(Instruction loadAddress, Node store |
+      loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) and
+      Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
+      Ssa::ssaFlow(store, nodeTo)
     )
   )
 }
@@ -906,10 +899,13 @@ private module ReadNodeFlow {
   }
 }
 
-private module StoreNodeFlow {
+/**
+ * INTERNAL: Do not use.
+ */
+module StoreNodeFlow {
   /** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */
-  predicate flowInto(Node nodeFrom, StoreNode nodeTo) {
-    nodeTo.flowInto(Ssa::getDestinationAddress(nodeFrom.asInstruction()))
+  predicate flowInto(Instruction instrFrom, StoreNode nodeTo) {
+    nodeTo.flowInto(Ssa::getDestinationAddress(instrFrom))
   }
 
   /** Holds if the store node `nodeTo` should receive flow from `nodeFom`. */

@@ -244,17 +244,6 @@ Instruction getDestinationAddress(Instruction instr) {
     ]
 }
 
-class ReferenceToInstruction extends CopyValueInstruction {
-  ReferenceToInstruction() {
-    this.getResultType() instanceof Cpp::ReferenceType and
-    not this.getUnary().getResultType() instanceof Cpp::ReferenceType
-  }
-
-  Instruction getSourceAddress() { result = getSourceAddressOperand().getDef() }
-
-  Operand getSourceAddressOperand() { result = this.getUnaryOperand() }
-}
-
 /** Gets the source address of `instr` if it is an instruction that behaves like a `LoadInstruction`. */
 Instruction getSourceAddress(Instruction instr) { result = getSourceAddressOperand(instr).getDef() }
 
@@ -266,11 +255,7 @@ Operand getSourceAddressOperand(Instruction instr) {
   result =
     [
       instr.(LoadInstruction).getSourceAddressOperand(),
-      instr.(ReadSideEffectInstruction).getArgumentOperand(),
-      // `ReferenceToInstruction` is really more of an address-of operation,
-      // but by including it in this list we break out of `flowOutOfAddressStep` at an
-      // instruction that, at the source level, looks like a use of a variable.
-      instr.(ReferenceToInstruction).getSourceAddressOperand()
+      instr.(ReadSideEffectInstruction).getArgumentOperand()
     ]
 }
 
@@ -295,10 +280,6 @@ Operand getSourceValueOperand(Instruction instr) {
   result = instr.(LoadInstruction).getSourceValueOperand()
   or
   result = instr.(ReadSideEffectInstruction).getSideEffectOperand()
-  or
-  // See the comment on the `ReferenceToInstruction` disjunct in `getSourceAddressOperand` for why
-  // this case is included.
-  result = instr.(ReferenceToInstruction).getSourceValueOperand()
 }
 
 /**
@@ -513,6 +494,64 @@ private module Cached {
       explicitWrite(false, storeNode.getStoreInstruction(), def)
     )
     or
+    // The destination of a store operation has undergone lvalue-to-rvalue conversion and is now a
+    // right-hand-side of a store operation.
+    // Find the next use of the variable in that store operation, and recursively find the load of that
+    // pointer. For example, consider this case:
+    //
+    // ```cpp
+    // int x = source();
+    // int* p = &x;
+    // sink(*p);
+    // ```
+    //
+    // if we want to find the load of the address of `x`, we see that the pointer is stored into `p`,
+    // and we then need to recursively look for the load of `p`.
+    exists(
+      Def def, StoreInstruction store, IRBlock block1, int rnk1, Use use, IRBlock block2, int rnk2
+    |
+      store = def.getInstruction() and
+      store.getSourceValueOperand() = operand and
+      def.hasRankInBlock(block1, rnk1) and
+      use.hasRankInBlock(block2, rnk2) and
+      adjacentDefRead(_, block1, rnk1, block2, rnk2)
+    |
+      // The shared SSA library has determined that `use` is the next use of the operand
+      // so we find the next load of that use (but only if there is no `PostUpdateNode`) we
+      // need to flow into first.
+      not StoreNodeFlow::flowInto(store, _) and
+      flowOutOfAddressStep(use.getOperand(), nodeTo)
+      or
+      // It may also be the case that `store` gives rise to another store step. So let's make sure that
+      // we also take those into account.
+      StoreNodeFlow::flowInto(store, nodeTo)
+    )
+    or
+    // As we find the next load of an address, we might come across another use of the same variable.
+    // In that case, we recursively find the next use of _that_ operand, and continue searching for
+    // the next load of that operand. For example, consider this case:
+    //
+    // ```cpp
+    // int x = source();
+    // use(&x);
+    // int* p = &x;
+    // sink(*p);
+    // ```
+    //
+    // The next use of `x` after its definition is `use(&x)`, but there is a later load of the address
+    // of `x` that we want to flow to. So we use the shared SSA library to find the next load.
+    not operand = getSourceAddressOperand(_) and
+    exists(Use use1, Use use2, IRBlock block1, int rnk1, IRBlock block2, int rnk2 |
+      use1.getOperand() = operand and
+      use1.hasRankInBlock(block1, rnk1) and
+      // Don't flow to the next use if this use is part of a store operation that totally
+      // overrides a variable.
+      not explicitWrite(true, _, use1.getOperand().getDef()) and
+      adjacentDefRead(_, block1, rnk1, block2, rnk2) and
+      use2.hasRankInBlock(block2, rnk2) and
+      flowOutOfAddressStep(use2.getOperand(), nodeTo)
+    )
+    or
     operand = getSourceAddressOperand(nodeTo.asInstruction())
     or
     exists(ReturnIndirectionInstruction ret |

@@ -4,8 +4,8 @@ using SinkFunction = void (*)(int);
 
 void notSink(int notSinkParam);
 
-void callsSink(int sinkParam) { // $ ir-path=31:28 ir-path=32:31 ir-path=34:22
-  sink(sinkParam); // $ ir-sink=31:28 ir-sink=32:31 ir-sink=34:22 ast=31:28 ast=32:31 ast=34:22 MISSING: ast,ir=28
+void callsSink(int sinkParam) { // $ ir-path=31:23 ir-path=32:26 ir-path=34:17
+  sink(sinkParam); // $ ast=31:28 ast=32:31 ast=34:22 ir-sink
 }
 
 struct {
@@ -25,7 +25,7 @@ void assignGlobals() {
 };
 
 void testStruct() {
-  globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ ir MISSING: ast
+  globalStruct.sinkPtr(atoi(getenv("TAINTED"))); // $ MISSING: ir-path,ast
   globalStruct.notSinkPtr(atoi(getenv("TAINTED"))); // clean
 
   globalUnion.sinkPtr(atoi(getenv("TAINTED"))); // $ ast ir-path
@@ -48,8 +48,8 @@ class D2 : public D1 {
 
 class D3 : public D2 {
     public:
-    void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 ir-path=73:30
-        sink(p); // $ ir-sink=58:10 ir-sink=60:17 ir-sink=61:28 ir-sink=62:29 ir-sink=63:33 ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 SPURIOUS: ast=73:30 ir-sink=73:30
+    void f(const char* p) override { // $ ir-path=58:10 ir-path=60:17 ir-path=61:28 ir-path=62:29 ir-path=63:33 SPURIOUS: ir-path=73:30
+        sink(p); // $ ast=58:10 ast=60:17 ast=61:28 ast=62:29 ast=63:33 ir-sink SPURIOUS: ast=73:30
     }
 };
 

@@ -23,19 +23,18 @@ class SourceConfiguration extends TaintedWithPath::TaintTrackingConfiguration {
   override predicate isSink(Element e) { isSinkArgument(e) }
 }
 
-predicate irTaint(Element source, Element sink, string tag) {
-  exists(TaintedWithPath::PathNode sinkNode, TaintedWithPath::PathNode predNode |
+predicate irTaint(Element source, TaintedWithPath::PathNode predNode, string tag) {
+  exists(TaintedWithPath::PathNode sinkNode |
     TaintedWithPath::taintedWithPath(source, _, _, sinkNode) and
     predNode = getAPredecessor*(sinkNode) and
-    sink = getElementFromPathNode(predNode) and
     // Make sure the path is actually reachable from this predecessor.
     // Otherwise, we could pick `predNode` to be b when `source` is
     // `source1` in this dataflow graph:
     // source1 ---> a ---> c ---> sinkNode
     //                   ^
     // source2 ---> b --/
     source = getElementFromPathNode(getAPredecessor*(predNode)) and
-    if sinkNode = predNode then tag = "ir-sink" else tag = "ir-path"
+    if predNode = sinkNode then tag = "ir-sink" else tag = "ir-path"
   )
 }
 
@@ -45,21 +44,25 @@ class IRDefaultTaintTrackingTest extends InlineExpectationsTest {
   override string getARelevantTag() { result = ["ir-path", "ir-sink"] }
 
   override predicate hasActualResult(Location location, string element, string tag, string value) {
-    exists(Element source, Element tainted, int n |
-      irTaint(source, tainted, tag) and
-      n = strictcount(Element otherSource | irTaint(otherSource, tainted, _)) and
-      (
-        n = 1 and value = ""
-        or
-        // If there is more than one source for this sink
-        // we specify the source location explicitly.
-        n > 1 and
+    exists(Element source, Element elem, TaintedWithPath::PathNode node, int n |
+      irTaint(source, node, tag) and
+      elem = getElementFromPathNode(node) and
+      n = count(int startline | getAPredecessor(node).hasLocationInfo(_, startline, _, _, _)) and
+      location = elem.getLocation() and
+      element = elem.toString()
+    |
+      // Zero predecessors means it's a source, and 1 predecessor means it has a unique predecessor.
+      // In either of these cases we leave out the location.
+      n = [0, 1] and value = ""
+      or
+      // If there is more than one predecessor for this node
+      // we specify the source location explicitly.
+      n > 1 and
+      exists(TaintedWithPath::PathNode pred | pred = getAPredecessor(node) |
         value =
-          source.getLocation().getStartLine().toString() + ":" +
-            source.getLocation().getStartColumn()
-      ) and
-      location = tainted.getLocation() and
-      element = tainted.toString()
+          getElementFromPathNode(pred).getLocation().getStartLine().toString() + ":" +
+            getElementFromPathNode(pred).getLocation().getStartColumn()
+      )
     )
   }
 }

@@ -13,8 +13,8 @@ struct S {
     }
 };
 
-void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=98:18
-    sink(a); // $ ast=96:26 ast=98:18 ir-sink=96:26 ir-sink=98:18
+void calls_sink_with_argv(const char* a) { // $ ir-path=96:26 ir-path=102:26
+    sink(a); // $ ast=96:26 ast=98:18 ir-sink
 }
 
 extern int i;
@@ -27,7 +27,7 @@ class BaseWithPureVirtual {
 class DerivedCallsSink : public BaseWithPureVirtual {
 public:
     void f(const char* p) override { // $ ir-path
-        sink(p); // $ ir-sink ast=108:10 SPURIOUS: ast=111:10
+        sink(p); // $ ast=108:10 ir-sink SPURIOUS: ast=111:10
     }
 };
 
@@ -49,16 +49,16 @@ class DerivedDoesNotCallSinkDiamond2 : virtual public BaseWithPureVirtual {
 };
 
 class DerivesMultiple : public DerivedCallsSinkDiamond1, public DerivedDoesNotCallSinkDiamond2 {
-    void f(const char* p) override { // $ ir-path
-        DerivedCallsSinkDiamond1::f(p);
+    void f(const char* p) override { // $ ir-path=53:37 ir-path=115:11
+        DerivedCallsSinkDiamond1::f(p); // $ ir-path
     }
 };
 
 template<typename T>
 class CRTP {
 public:
     void f(const char* p) { // $ ir-path
-        static_cast<T*>(this)->g(p);
+        static_cast<T*>(this)->g(p); // $ ir-path
     }
 };
 
@@ -79,7 +79,7 @@ class Derived2 : public Derived1 {
 class Derived3 : public Derived2 {
     public:
     void f(const char* p) override { // $ ir-path=124:19 ir-path=126:43 ir-path=128:44
-        sink(p); // $ ast,ir-sink=124:19 ast,ir-sink=126:43 ast,ir-sink=128:44
+        sink(p); // $ ast=124:19 ast=126:43 ast=128:44 ir-sink
     }
 };
 
@@ -97,11 +97,11 @@ int main(int argc, char *argv[]) {
 
     char*** p = &argv; // $ ast,ir-path
 
-    sink(*p[0]); // $ ast,ir-sink
+    sink(*p[0]); // $ ast ir-sink=96:26 ir-sink=98:18
 
-    calls_sink_with_argv(*p[i]); // $ MISSING: ast,ir-path
+    calls_sink_with_argv(*p[i]); // $ ir-path=96:26 ir-path=98:18 MISSING:ast
 
-    sink(*(argv + 1)); // $ ast,ir-path ir-sink
+    sink(*(argv + 1)); // $ ast ir-path ir-sink
 
     BaseWithPureVirtual* b = new DerivedCallsSink;
 

@@ -190,9 +190,9 @@ void test_pointers1()
 	sink(ptr1); // $ ast MISSING: ir
 	sink(ptr2); // $ SPURIOUS: ast
 	sink(*ptr2); // $ ast MISSING: ir
-	sink(ptr3); // $ ast MISSING: ir
-	sink(ptr4); // $ SPURIOUS: ast
-	sink(*ptr4); // $ ast MISSING: ir
+	sink(ptr3); // $ ast,ir
+	sink(ptr4); // $ SPURIOUS: ast,ir
+	sink(*ptr4); // $ ast,ir
 }
 
 void test_pointers2()