Skip to content

Commit 02a75a4

Browse files
committed
C++: Change the behavior of 'isUserInput' so that the 'argv' paramter (and not all of the uses of 'argv') is user input. This removes the duplicate paths.
1 parent 5434ca2 commit 02a75a4

File tree

4 files changed

+58
-52
lines changed

4 files changed

+58
-52
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,24 @@ predicate predictableOnlyFlow(string name) {
4747
]
4848
}
4949

50-
private DataFlow::Node getNodeForSource(Expr source) {
51-
isUserInput(source, _) and
52-
result = getNodeForExpr(source)
50+
private DataFlow::Node getNodeForSource(Element source) {
51+
(
52+
// It is user input
53+
isUserInput(source, _) and
54+
// But `isUserInput` also marks all uses of `argv` as user input, and we don't want those.
55+
not argv(source.(VariableAccess).getTarget())
56+
or
57+
// Instead, we want the actual parameter as user input.
58+
argv(source)
59+
) and
60+
result = getNodeForElement(source)
5361
}
5462

55-
private DataFlow::Node getNodeForExpr(Expr node) {
63+
private DataFlow::Node getNodeForElement(Element node) {
5664
result = DataFlow::exprNode(node)
5765
or
66+
result.asParameter() = node
67+
or
5868
// Some of the sources in `isUserInput` are intended to match the value of
5969
// an expression, while others (those modeled below) are intended to match
6070
// the taint that propagates out of an argument, like the `char *` argument
@@ -194,13 +204,7 @@ private module Cached {
194204
cached
195205
predicate nodeIsBarrierIn(DataFlow::Node node) {
196206
// don't use dataflow into taint sources, as this leads to duplicate results.
197-
exists(Expr source | isUserInput(source, _) |
198-
node = DataFlow::exprNode(source)
199-
or
200-
// This case goes together with the similar (but not identical) rule in
201-
// `getNodeForSource`.
202-
node = DataFlow::definitionByReferenceNodeFromArgument(source)
203-
)
207+
node = getNodeForSource(_)
204208
or
205209
// don't use dataflow into binary instructions if both operands are unpredictable
206210
exists(BinaryInstruction iTo |
@@ -303,7 +307,7 @@ private import Cached
303307
* If you need that you must call `taintedIncludingGlobalVars`.
304308
*/
305309
cached
306-
predicate tainted(Expr source, Element tainted) {
310+
predicate tainted(Element source, Element tainted) {
307311
exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
308312
cfg.hasFlow(getNodeForSource(source), sink) and
309313
tainted = adjustedSink(sink)
@@ -326,7 +330,7 @@ predicate tainted(Expr source, Element tainted) {
326330
* through a global variable, then `globalVar = ""`.
327331
*/
328332
cached
329-
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
333+
predicate taintedIncludingGlobalVars(Element source, Element tainted, string globalVar) {
330334
tainted(source, tainted) and
331335
globalVar = ""
332336
or
@@ -376,13 +380,13 @@ module TaintedWithPath {
376380
*/
377381
class TaintTrackingConfiguration extends TSingleton {
378382
/** Override this to specify which elements are sources in this configuration. */
379-
predicate isSource(Expr source) { exists(getNodeForSource(source)) }
383+
predicate isSource(Element source) { exists(getNodeForSource(source)) }
380384

381385
/** Override this to specify which elements are sinks in this configuration. */
382386
abstract predicate isSink(Element e);
383387

384388
/** Override this to specify which expressions are barriers in this configuration. */
385-
predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) }
389+
predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForElement(e)) }
386390

387391
/**
388392
* Override this predicate to `any()` to allow taint to flow through global
@@ -398,8 +402,8 @@ module TaintedWithPath {
398402
AdjustedConfiguration() { this = "AdjustedConfiguration" }
399403

400404
override predicate isSource(DataFlow::Node source) {
401-
exists(TaintTrackingConfiguration cfg, Expr e |
402-
cfg.isSource(e) and source = getNodeForExpr(e)
405+
exists(TaintTrackingConfiguration cfg, Element e |
406+
cfg.isSource(e) and source = getNodeForElement(e)
403407
)
404408
}
405409

@@ -419,7 +423,9 @@ module TaintedWithPath {
419423
}
420424

421425
override predicate isSanitizer(DataFlow::Node node) {
422-
exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e))
426+
exists(TaintTrackingConfiguration cfg, Element e |
427+
cfg.isBarrier(e) and node = getNodeForElement(e)
428+
)
423429
}
424430

425431
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
@@ -447,7 +453,7 @@ module TaintedWithPath {
447453
exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode |
448454
cfg.hasFlow(sourceNode, sinkNode)
449455
|
450-
sourceNode = getNodeForExpr(e) and
456+
sourceNode = getNodeForElement(e) and
451457
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e))
452458
or
453459
e = adjustedSink(sinkNode) and
@@ -506,7 +512,7 @@ module TaintedWithPath {
506512
}
507513

508514
private class EndpointPathNode extends PathNode, TEndpointPathNode {
509-
Expr inner() { this = TEndpointPathNode(result) }
515+
Element inner() { this = TEndpointPathNode(result) }
510516

511517
override string toString() { result = this.inner().toString() }
512518

@@ -543,14 +549,14 @@ module TaintedWithPath {
543549
// Same for the first node
544550
exists(WrapPathNode sourceNode |
545551
DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and
546-
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner())
552+
sourceNode.inner().getNode() = getNodeForElement(a.(InitialPathNode).inner())
547553
)
548554
or
549555
// Finally, handle the case where the path goes directly from a source to a
550556
// sink, meaning that they both need to be translated.
551557
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
552558
DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and
553-
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and
559+
sourceNode.inner().getNode() = getNodeForElement(a.(InitialPathNode).inner()) and
554560
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
555561
)
556562
}
@@ -575,15 +581,15 @@ module TaintedWithPath {
575581
exists(WrapPathNode sourceNode |
576582
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
577583
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and
578-
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner())
584+
sourceNode.inner().getNode() = getNodeForElement(arg.(InitialPathNode).inner())
579585
)
580586
or
581587
// Finally, handle the case where the path goes directly from a source to a
582588
// sink, meaning that they both need to be translated.
583589
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
584590
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
585591
ret.(WrapPathNode).inner(), sinkNode.inner()) and
586-
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and
592+
sourceNode.inner().getNode() = getNodeForElement(arg.(InitialPathNode).inner()) and
587593
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
588594
)
589595
}
@@ -603,10 +609,10 @@ module TaintedWithPath {
603609
* user input in a way that users can probably control the exact output of
604610
* the computation.
605611
*/
606-
predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
612+
predicate taintedWithPath(Element source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
607613
exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink |
608614
source = sourceNode.(InitialPathNode).inner() and
609-
flowSource = getNodeForExpr(source) and
615+
flowSource = getNodeForElement(source) and
610616
cfg.hasFlow(flowSource, flowSink) and
611617
tainted = adjustedSink(flowSink) and
612618
tainted = sinkNode.(FinalPathNode).inner()

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/test_diff.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,20 @@ class CRTPDoesNotCallSink : public CRTP<CRTPDoesNotCallSink> {
8888
void g(const char* p) {}
8989
};
9090

91-
int main(int argc, char *argv[]) {
92-
sink(argv[0]); // $ ast,ir-path,ir-sink
91+
int main(int argc, char *argv[]) { // $ ir-path
92+
sink(argv[0]); // $ ast,ir-sink
9393

94-
sink(reinterpret_cast<int>(argv)); // $ ast,ir-path,ir-sink
94+
sink(reinterpret_cast<int>(argv)); // $ ast,ir-sink
9595

9696
calls_sink_with_argv(argv[1]); // $ ast,ir-path
9797

98-
char*** p = &argv; // $ ast,ir-path
98+
char*** p = &argv; // $ ast
9999

100-
sink(*p[0]); // $ ast ir-sink=92:10 ir-sink=94:32 ir-sink=96:26 ir-sink=98:18
100+
sink(*p[0]); // $ ast,ir-sink
101101

102-
calls_sink_with_argv(*p[i]); // $ ir-path=92:10 ir-path=94:32 ir-path=96:26 ir-path=98:18 MISSING:ast
102+
calls_sink_with_argv(*p[i]); // $ ir-path MISSING:ast
103103

104-
sink(*(argv + 1)); // $ ast ir-path ir-sink
104+
sink(*(argv + 1)); // $ ast,ir-sink
105105

106106
BaseWithPureVirtual* b = new DerivedCallsSink;
107107

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
| test.cpp:23:23:23:28 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only |
2-
| test.cpp:23:23:23:28 | call to getenv | test.cpp:23:14:23:19 | envStr | AST only |
3-
| test.cpp:38:23:38:28 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only |
4-
| test.cpp:38:23:38:28 | call to getenv | test.cpp:38:14:38:19 | envStr | AST only |
51
| test.cpp:49:23:49:28 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only |
6-
| test.cpp:49:23:49:28 | call to getenv | test.cpp:45:13:45:24 | envStrGlobal | AST only |
7-
| test.cpp:49:23:49:28 | call to getenv | test.cpp:49:14:49:19 | envStr | AST only |
82
| test.cpp:49:23:49:28 | call to getenv | test.cpp:50:15:50:24 | envStr_ptr | AST only |
93
| test.cpp:49:23:49:28 | call to getenv | test.cpp:50:28:50:40 | & ... | AST only |
104
| test.cpp:49:23:49:28 | call to getenv | test.cpp:50:29:50:40 | envStrGlobal | AST only |
@@ -14,33 +8,20 @@
148
| test.cpp:49:23:49:28 | call to getenv | test.cpp:54:7:54:12 | call to strcmp | AST only |
159
| test.cpp:49:23:49:28 | call to getenv | test.cpp:54:7:54:35 | (bool)... | AST only |
1610
| test.cpp:49:23:49:28 | call to getenv | test.cpp:54:14:54:25 | envStrGlobal | AST only |
17-
| test.cpp:60:29:60:34 | call to getenv | test.cpp:10:27:10:27 | s | AST only |
18-
| test.cpp:60:29:60:34 | call to getenv | test.cpp:60:18:60:25 | userName | AST only |
1911
| test.cpp:68:28:68:33 | call to getenv | test.cpp:11:20:11:21 | s1 | AST only |
20-
| test.cpp:68:28:68:33 | call to getenv | test.cpp:11:36:11:37 | s2 | AST only |
2112
| test.cpp:68:28:68:33 | call to getenv | test.cpp:67:7:67:13 | copying | AST only |
22-
| test.cpp:68:28:68:33 | call to getenv | test.cpp:68:17:68:24 | userName | AST only |
2313
| test.cpp:68:28:68:33 | call to getenv | test.cpp:69:10:69:13 | copy | AST only |
2414
| test.cpp:68:28:68:33 | call to getenv | test.cpp:70:5:70:10 | call to strcpy | AST only |
2515
| test.cpp:68:28:68:33 | call to getenv | test.cpp:70:12:70:15 | copy | AST only |
2616
| test.cpp:68:28:68:33 | call to getenv | test.cpp:71:12:71:15 | copy | AST only |
27-
| test.cpp:75:20:75:25 | call to getenv | test.cpp:15:22:15:25 | nptr | AST only |
28-
| test.cpp:83:28:83:33 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only |
2917
| test.cpp:83:28:83:33 | call to getenv | test.cpp:11:20:11:21 | s1 | AST only |
30-
| test.cpp:83:28:83:33 | call to getenv | test.cpp:11:36:11:37 | s2 | AST only |
31-
| test.cpp:83:28:83:33 | call to getenv | test.cpp:83:17:83:24 | userName | AST only |
3218
| test.cpp:83:28:83:33 | call to getenv | test.cpp:85:8:85:11 | copy | AST only |
3319
| test.cpp:83:28:83:33 | call to getenv | test.cpp:86:2:86:7 | call to strcpy | AST only |
3420
| test.cpp:83:28:83:33 | call to getenv | test.cpp:86:9:86:12 | copy | AST only |
35-
| test.cpp:100:12:100:15 | call to gets | test.cpp:98:8:98:14 | pointer | AST only |
3621
| test.cpp:100:12:100:15 | call to gets | test.cpp:100:2:100:8 | pointer | AST only |
37-
| test.cpp:100:17:100:22 | buffer | test.cpp:93:18:93:18 | s | AST only |
3822
| test.cpp:100:17:100:22 | buffer | test.cpp:97:7:97:12 | buffer | AST only |
3923
| test.cpp:100:17:100:22 | buffer | test.cpp:100:17:100:22 | array to pointer conversion | IR only |
40-
| test.cpp:106:28:106:33 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only |
4124
| test.cpp:106:28:106:33 | call to getenv | test.cpp:11:20:11:21 | s1 | AST only |
42-
| test.cpp:106:28:106:33 | call to getenv | test.cpp:11:36:11:37 | s2 | AST only |
43-
| test.cpp:106:28:106:33 | call to getenv | test.cpp:106:17:106:24 | userName | AST only |
4425
| test.cpp:106:28:106:33 | call to getenv | test.cpp:108:8:108:11 | copy | AST only |
4526
| test.cpp:106:28:106:33 | call to getenv | test.cpp:109:2:109:7 | call to strcpy | AST only |
4627
| test.cpp:106:28:106:33 | call to getenv | test.cpp:109:9:109:12 | copy | AST only |

cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
| test.cpp:23:23:23:28 | call to getenv | test.cpp:8:24:8:25 | s1 |
2+
| test.cpp:23:23:23:28 | call to getenv | test.cpp:23:14:23:19 | envStr |
13
| test.cpp:23:23:23:28 | call to getenv | test.cpp:23:23:23:28 | call to getenv |
24
| test.cpp:23:23:23:28 | call to getenv | test.cpp:23:23:23:40 | (const char *)... |
35
| test.cpp:23:23:23:28 | call to getenv | test.cpp:25:6:25:29 | ! ... |
@@ -8,21 +10,33 @@
810
| test.cpp:23:23:23:28 | call to getenv | test.cpp:29:7:29:12 | call to strcmp |
911
| test.cpp:23:23:23:28 | call to getenv | test.cpp:29:7:29:28 | (bool)... |
1012
| test.cpp:23:23:23:28 | call to getenv | test.cpp:29:14:29:19 | envStr |
13+
| test.cpp:38:23:38:28 | call to getenv | test.cpp:8:24:8:25 | s1 |
14+
| test.cpp:38:23:38:28 | call to getenv | test.cpp:38:14:38:19 | envStr |
1115
| test.cpp:38:23:38:28 | call to getenv | test.cpp:38:23:38:28 | call to getenv |
1216
| test.cpp:38:23:38:28 | call to getenv | test.cpp:38:23:38:40 | (const char *)... |
1317
| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:14:40:19 | envStr |
18+
| test.cpp:49:23:49:28 | call to getenv | test.cpp:45:13:45:24 | envStrGlobal |
19+
| test.cpp:49:23:49:28 | call to getenv | test.cpp:49:14:49:19 | envStr |
1420
| test.cpp:49:23:49:28 | call to getenv | test.cpp:49:23:49:28 | call to getenv |
1521
| test.cpp:49:23:49:28 | call to getenv | test.cpp:49:23:49:40 | (const char *)... |
1622
| test.cpp:49:23:49:28 | call to getenv | test.cpp:52:16:52:21 | envStr |
23+
| test.cpp:60:29:60:34 | call to getenv | test.cpp:10:27:10:27 | s |
24+
| test.cpp:60:29:60:34 | call to getenv | test.cpp:60:18:60:25 | userName |
1725
| test.cpp:60:29:60:34 | call to getenv | test.cpp:60:29:60:34 | call to getenv |
1826
| test.cpp:60:29:60:34 | call to getenv | test.cpp:60:29:60:47 | (const char *)... |
1927
| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:25:64:32 | userName |
28+
| test.cpp:68:28:68:33 | call to getenv | test.cpp:11:36:11:37 | s2 |
29+
| test.cpp:68:28:68:33 | call to getenv | test.cpp:68:17:68:24 | userName |
2030
| test.cpp:68:28:68:33 | call to getenv | test.cpp:68:28:68:33 | call to getenv |
2131
| test.cpp:68:28:68:33 | call to getenv | test.cpp:68:28:68:46 | (const char *)... |
2232
| test.cpp:68:28:68:33 | call to getenv | test.cpp:70:18:70:25 | userName |
33+
| test.cpp:75:20:75:25 | call to getenv | test.cpp:15:22:15:25 | nptr |
2334
| test.cpp:75:20:75:25 | call to getenv | test.cpp:75:15:75:18 | call to atoi |
2435
| test.cpp:75:20:75:25 | call to getenv | test.cpp:75:20:75:25 | call to getenv |
2536
| test.cpp:75:20:75:25 | call to getenv | test.cpp:75:20:75:45 | (const char *)... |
37+
| test.cpp:83:28:83:33 | call to getenv | test.cpp:8:24:8:25 | s1 |
38+
| test.cpp:83:28:83:33 | call to getenv | test.cpp:11:36:11:37 | s2 |
39+
| test.cpp:83:28:83:33 | call to getenv | test.cpp:83:17:83:24 | userName |
2640
| test.cpp:83:28:83:33 | call to getenv | test.cpp:83:28:83:33 | call to getenv |
2741
| test.cpp:83:28:83:33 | call to getenv | test.cpp:83:28:83:46 | (const char *)... |
2842
| test.cpp:83:28:83:33 | call to getenv | test.cpp:86:15:86:22 | userName |
@@ -31,9 +45,14 @@
3145
| test.cpp:83:28:83:33 | call to getenv | test.cpp:88:7:88:27 | (bool)... |
3246
| test.cpp:83:28:83:33 | call to getenv | test.cpp:88:14:88:17 | (const char *)... |
3347
| test.cpp:83:28:83:33 | call to getenv | test.cpp:88:14:88:17 | copy |
48+
| test.cpp:100:12:100:15 | call to gets | test.cpp:98:8:98:14 | pointer |
3449
| test.cpp:100:12:100:15 | call to gets | test.cpp:100:12:100:15 | call to gets |
50+
| test.cpp:100:17:100:22 | buffer | test.cpp:93:18:93:18 | s |
3551
| test.cpp:100:17:100:22 | buffer | test.cpp:100:17:100:22 | array to pointer conversion |
3652
| test.cpp:100:17:100:22 | buffer | test.cpp:100:17:100:22 | buffer |
53+
| test.cpp:106:28:106:33 | call to getenv | test.cpp:8:24:8:25 | s1 |
54+
| test.cpp:106:28:106:33 | call to getenv | test.cpp:11:36:11:37 | s2 |
55+
| test.cpp:106:28:106:33 | call to getenv | test.cpp:106:17:106:24 | userName |
3756
| test.cpp:106:28:106:33 | call to getenv | test.cpp:106:28:106:33 | call to getenv |
3857
| test.cpp:106:28:106:33 | call to getenv | test.cpp:106:28:106:46 | (const char *)... |
3958
| test.cpp:106:28:106:33 | call to getenv | test.cpp:109:15:109:22 | userName |

0 commit comments

Comments
 (0)