From 2801941ca24826d39efc8b69478dc077eaf01795 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Thu, 26 Mar 2020 20:36:49 +0100 Subject: [PATCH] C++: Never track flow out of an argv argument This change removes some duplicate results that will otherwise appear due to https://github.com/Semmle/ql/pull/3123 and possibly https://github.com/Semmle/ql/pull/2704. --- .../semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index 7321d82ebc77..d932ad30b87e 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -60,7 +60,14 @@ private DataFlow::Node getNodeForSource(Expr source) { ( result = DataFlow::exprNode(source) or - result = DataFlow::definitionByReferenceNode(source) + // Some of the sources in `isUserInput` are intended to match the value of + // an expression, while others (those modeled below) are intended to match + // the taint that propagates out of an argument, like the `char *` argument + // to `gets`. It's impossible here to tell which is which, but the "access + // to argv" source is definitely not intended to match an output argument, + // and it causes false positives if we let it. + result = DataFlow::definitionByReferenceNode(source) and + not argv(source.(VariableAccess).getTarget()) ) }