diff --git a/docs/language/learn-ql/cpp/dataflow.rst b/docs/language/learn-ql/cpp/dataflow.rst index 89f25913fbcc..f8d4c9c76114 100644 --- a/docs/language/learn-ql/cpp/dataflow.rst +++ b/docs/language/learn-ql/cpp/dataflow.rst @@ -244,6 +244,49 @@ The following data flow configuration tracks data flow from environment variable select fopen, "This 'fopen' uses data from $@.", getenv, "call to 'getenv'" +The following taint-tracking configuration tracks data from a call to ``ntohl`` to an array index operation. It uses the ``Guards`` library to recognize expressions that have been bounds-checked, and defines ``isSanitizer`` to prevent taint from propagating through them. It also uses ``isAdditionalTaintStep`` to add flow from loop bounds to loop indexes. + +.. code-block:: ql + + import cpp + import semmle.code.cpp.controlflow.Guards + import semmle.code.cpp.dataflow.TaintTracking + + class NetworkToBufferSizeConfiguration extends TaintTracking::Configuration { + NetworkToBufferSizeConfiguration() { this = "NetworkToBufferSizeConfiguration" } + + override predicate isSource(DataFlow::Node node) { + node.asExpr().(FunctionCall).getTarget().hasGlobalName("ntohl") + } + + override predicate isSink(DataFlow::Node node) { + exists(ArrayExpr ae | node.asExpr() = ae.getArrayOffset()) + } + + override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + exists(Loop loop, LoopCounter lc | + loop = lc.getALoop() and + loop.getControllingExpr().(RelationalOperation).getGreaterOperand() = pred.asExpr() | + succ.asExpr() = lc.getVariableAccessInLoop(loop) + ) + } + + override predicate isSanitizer(DataFlow::Node node) { + exists(GuardCondition gc, Variable v | + gc.getAChild*() = v.getAnAccess() and + node.asExpr() = v.getAnAccess() and + gc.controls(node.asExpr().getBasicBlock(), _) + ) + } + } + + from DataFlow::Node ntohl, DataFlow::Node offset, NetworkToBufferSizeConfiguration conf + where conf.hasFlow(ntohl, offset) + select offset, "This array offset may be influenced by $@.", ntohl, + "converted data from the network" + + + Exercises ~~~~~~~~~