Skip to content

Commit 8b544c0

Browse files
committed
C++: Data flow through address-of operator (&)
The data flow library conflates pointers and their objects in some places but not others. For example, a member function call `x.f()` will cause flow from `x` of type `T` to `this` of type `T*` inside `f`. It might be ideal to avoid that conflation, but that's not realistic without using the IR. We've had good experience in the taint tracking library with conflating pointers and objects, and it improves results for field flow, so perhaps it's time to try it out for all data flow.
1 parent ef4f954 commit 8b544c0

File tree

8 files changed

+40
-7
lines changed

8 files changed

+40
-7
lines changed

change-notes/1.23/analysis-cpp.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ The following changes in version 1.23 affect C/C++ analysis in all applications.
2828
picture of the partial flow paths from a given source. The feature is
2929
disabled by default and can be enabled for individual configurations by
3030
overriding `int explorationLimit()`.
31+
* The data-flow library now allows flow through the address-of operator (`&`).
3132
* The `DataFlow::DefinitionByReferenceNode` class now considers `f(x)` to be a
3233
definition of `x` when `x` is a variable of pointer type. It no longer
3334
considers deep paths such as `f(&x.myField)` to be definitions of `x`. These

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,10 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
540540
or
541541
toExpr = any(StmtExpr stmtExpr | fromExpr = stmtExpr.getResultExpr())
542542
or
543+
toExpr.(AddressOfExpr).getOperand() = fromExpr
544+
or
545+
toExpr.(BuiltInOperationBuiltInAddressOf).getOperand() = fromExpr
546+
or
543547
// The following case is needed to track the qualifier object for flow
544548
// through fields. It gives flow from `T(x)` to `new T(x)`. That's not
545549
// strictly _data_ flow but _taint_ flow because the type of `fromExpr` is

cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ void following_pointers(
1919

2020
sink(sourceArray1[0]); // no flow
2121
sink(*sourceArray1); // no flow
22-
sink(&sourceArray1); // no flow (since sourceArray1 is really a pointer)
22+
sink(&sourceArray1); // flow (should probably be taint only)
2323

2424
sink(sourceStruct1.m1); // no flow
2525
sink(sourceStruct1_ptr->m1); // no flow

cpp/ql/test/library-tests/dataflow/dataflow-tests/localFlow.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
1515
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |
1616
| example.c:26:18:26:24 | ref arg & ... | example.c:26:2:26:7 | coords |
17+
| example.c:26:19:26:24 | coords | example.c:26:18:26:24 | & ... |
18+
| example.c:28:23:28:25 | pos | example.c:28:22:28:25 | & ... |
1719
| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 |
1820
| test.cpp:6:12:6:17 | call to source | test.cpp:8:8:8:9 | t1 |
1921
| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 |
@@ -45,17 +47,22 @@
4547
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:384:3:384:8 | call to memcpy |
4648
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:384:33:384:35 | tmp |
4749
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:385:8:385:10 | tmp |
50+
| test.cpp:384:11:384:13 | tmp | test.cpp:384:10:384:13 | & ... |
4851
| test.cpp:384:17:384:23 | source1 | test.cpp:384:10:384:13 | ref arg & ... |
52+
| test.cpp:384:17:384:23 | source1 | test.cpp:384:16:384:23 | & ... |
4953
| test.cpp:388:53:388:59 | source1 | test.cpp:391:17:391:23 | source1 |
5054
| test.cpp:388:66:388:66 | b | test.cpp:393:7:393:7 | b |
5155
| test.cpp:389:12:389:13 | 0 | test.cpp:390:19:390:21 | tmp |
5256
| test.cpp:389:12:389:13 | 0 | test.cpp:391:11:391:13 | tmp |
5357
| test.cpp:389:12:389:13 | 0 | test.cpp:391:33:391:35 | tmp |
5458
| test.cpp:389:12:389:13 | 0 | test.cpp:392:8:392:10 | tmp |
5559
| test.cpp:389:12:389:13 | 0 | test.cpp:394:10:394:12 | tmp |
60+
| test.cpp:390:19:390:21 | tmp | test.cpp:390:18:390:21 | & ... |
5661
| test.cpp:391:10:391:13 | & ... | test.cpp:391:3:391:8 | call to memcpy |
5762
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:391:3:391:8 | call to memcpy |
5863
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:391:33:391:35 | tmp |
5964
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:392:8:392:10 | tmp |
6065
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:394:10:394:12 | tmp |
66+
| test.cpp:391:11:391:13 | tmp | test.cpp:391:10:391:13 | & ... |
6167
| test.cpp:391:17:391:23 | source1 | test.cpp:391:10:391:13 | ref arg & ... |
68+
| test.cpp:391:17:391:23 | source1 | test.cpp:391:16:391:23 | & ... |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
| acrossLinkTargets.cpp:12:8:12:8 | x | acrossLinkTargets.cpp:19:27:19:32 | call to source |
22
| clang.cpp:18:8:18:19 | sourceArray1 | clang.cpp:12:9:12:20 | sourceArray1 |
3+
| clang.cpp:22:8:22:20 | & ... | clang.cpp:12:9:12:20 | sourceArray1 |
34
| clang.cpp:29:27:29:28 | m1 | clang.cpp:28:27:28:32 | call to source |
45
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
56
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
12
| clang.cpp:28:27:28:32 | clang.cpp:29:27:29:28 | AST only |
23
| clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only |
34
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |

cpp/ql/test/library-tests/dataflow/fields/flow.expected

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,30 @@ edges
163163
| simple.cpp:48:9:48:9 | g [b_] | simple.cpp:26:15:26:15 | f [b_] |
164164
| simple.cpp:51:9:51:9 | h [a_] | simple.cpp:26:15:26:15 | f [a_] |
165165
| simple.cpp:51:9:51:9 | h [b_] | simple.cpp:26:15:26:15 | f [b_] |
166+
| struct_init.c:14:24:14:25 | ab [a] | struct_init.c:15:8:15:9 | ab [a] |
167+
| struct_init.c:15:8:15:9 | ab [a] | struct_init.c:15:12:15:12 | a |
166168
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:22:8:22:9 | ab [a] |
169+
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:24:10:24:12 | & ... [a] |
170+
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:28:5:28:7 | & ... [a] |
167171
| struct_init.c:20:20:20:29 | call to user_input | struct_init.c:20:17:20:36 | {...} [a] |
168172
| struct_init.c:22:8:22:9 | ab [a] | struct_init.c:22:11:22:11 | a |
173+
| struct_init.c:24:10:24:12 | & ... [a] | struct_init.c:14:24:14:25 | ab [a] |
169174
| struct_init.c:26:23:29:3 | {...} [nestedAB, a] | struct_init.c:31:8:31:12 | outer [nestedAB, a] |
175+
| struct_init.c:26:23:29:3 | {...} [nestedAB, a] | struct_init.c:36:11:36:15 | outer [nestedAB, a] |
176+
| struct_init.c:26:23:29:3 | {...} [pointerAB, a] | struct_init.c:33:8:33:12 | outer [pointerAB, a] |
177+
| struct_init.c:26:23:29:3 | {...} [pointerAB, a] | struct_init.c:37:10:37:14 | outer [pointerAB, a] |
170178
| struct_init.c:27:5:27:23 | {...} [a] | struct_init.c:26:23:29:3 | {...} [nestedAB, a] |
171179
| struct_init.c:27:7:27:16 | call to user_input | struct_init.c:27:5:27:23 | {...} [a] |
180+
| struct_init.c:28:5:28:7 | & ... [a] | struct_init.c:26:23:29:3 | {...} [pointerAB, a] |
172181
| struct_init.c:31:8:31:12 | outer [nestedAB, a] | struct_init.c:31:14:31:21 | nestedAB [a] |
173182
| struct_init.c:31:14:31:21 | nestedAB [a] | struct_init.c:31:23:31:23 | a |
183+
| struct_init.c:33:8:33:12 | outer [pointerAB, a] | struct_init.c:33:14:33:22 | pointerAB [a] |
184+
| struct_init.c:33:14:33:22 | pointerAB [a] | struct_init.c:33:25:33:25 | a |
185+
| struct_init.c:36:10:36:24 | & ... [a] | struct_init.c:14:24:14:25 | ab [a] |
186+
| struct_init.c:36:11:36:15 | outer [nestedAB, a] | struct_init.c:36:17:36:24 | nestedAB [a] |
187+
| struct_init.c:36:17:36:24 | nestedAB [a] | struct_init.c:36:10:36:24 | & ... [a] |
188+
| struct_init.c:37:10:37:14 | outer [pointerAB, a] | struct_init.c:37:16:37:24 | pointerAB [a] |
189+
| struct_init.c:37:16:37:24 | pointerAB [a] | struct_init.c:14:24:14:25 | ab [a] |
174190
#select
175191
| A.cpp:43:10:43:12 | & ... | A.cpp:41:15:41:21 | new | A.cpp:43:10:43:12 | & ... | & ... flows from $@ | A.cpp:41:15:41:21 | new | new |
176192
| A.cpp:49:13:49:13 | c | A.cpp:47:12:47:18 | new | A.cpp:49:13:49:13 | c | c flows from $@ | A.cpp:47:12:47:18 | new | new |
@@ -207,5 +223,8 @@ edges
207223
| simple.cpp:28:12:28:12 | call to a | simple.cpp:41:12:41:21 | call to user_input | simple.cpp:28:12:28:12 | call to a | call to a flows from $@ | simple.cpp:41:12:41:21 | call to user_input | call to user_input |
208224
| simple.cpp:29:12:29:12 | call to b | simple.cpp:40:12:40:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:40:12:40:21 | call to user_input | call to user_input |
209225
| simple.cpp:29:12:29:12 | call to b | simple.cpp:42:12:42:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:42:12:42:21 | call to user_input | call to user_input |
226+
| struct_init.c:15:12:15:12 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |
227+
| struct_init.c:15:12:15:12 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input |
210228
| struct_init.c:22:11:22:11 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:22:11:22:11 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |
211229
| struct_init.c:31:23:31:23 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:31:23:31:23 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input |
230+
| struct_init.c:33:25:33:25 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:33:25:33:25 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |

cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,12 @@
102102
| taint.cpp:121:10:121:11 | 1 | taint.cpp:124:13:124:14 | t2 | |
103103
| taint.cpp:122:10:122:11 | 1 | taint.cpp:125:13:125:14 | t3 | |
104104
| taint.cpp:123:12:123:14 | & ... | taint.cpp:129:8:129:9 | p1 | |
105-
| taint.cpp:123:13:123:14 | t1 | taint.cpp:123:12:123:14 | & ... | TAINT |
105+
| taint.cpp:123:13:123:14 | t1 | taint.cpp:123:12:123:14 | & ... | |
106106
| taint.cpp:124:12:124:14 | & ... | taint.cpp:127:3:127:4 | p2 | |
107107
| taint.cpp:124:12:124:14 | & ... | taint.cpp:130:8:130:9 | p2 | |
108-
| taint.cpp:124:13:124:14 | t2 | taint.cpp:124:12:124:14 | & ... | TAINT |
108+
| taint.cpp:124:13:124:14 | t2 | taint.cpp:124:12:124:14 | & ... | |
109109
| taint.cpp:125:12:125:14 | & ... | taint.cpp:131:8:131:9 | p3 | |
110-
| taint.cpp:125:13:125:14 | t3 | taint.cpp:125:12:125:14 | & ... | TAINT |
110+
| taint.cpp:125:13:125:14 | t3 | taint.cpp:125:12:125:14 | & ... | |
111111
| taint.cpp:127:3:127:4 | p2 | taint.cpp:127:2:127:4 | * ... | TAINT |
112112
| taint.cpp:127:8:127:13 | call to source | taint.cpp:127:2:127:15 | ... = ... | |
113113
| taint.cpp:129:8:129:9 | p1 | taint.cpp:129:7:129:9 | * ... | TAINT |
@@ -117,7 +117,7 @@
117117
| taint.cpp:133:7:133:9 | & ... | taint.cpp:134:8:134:9 | p3 | |
118118
| taint.cpp:133:7:133:9 | & ... | taint.cpp:136:3:136:4 | p3 | |
119119
| taint.cpp:133:7:133:9 | & ... | taint.cpp:137:8:137:9 | p3 | |
120-
| taint.cpp:133:8:133:9 | t1 | taint.cpp:133:7:133:9 | & ... | TAINT |
120+
| taint.cpp:133:8:133:9 | t1 | taint.cpp:133:7:133:9 | & ... | |
121121
| taint.cpp:134:8:134:9 | p3 | taint.cpp:134:7:134:9 | * ... | TAINT |
122122
| taint.cpp:136:3:136:4 | p3 | taint.cpp:136:2:136:4 | * ... | TAINT |
123123
| taint.cpp:136:8:136:8 | 0 | taint.cpp:136:2:136:8 | ... = ... | |
@@ -150,14 +150,14 @@
150150
| taint.cpp:180:19:180:19 | p | taint.cpp:181:9:181:9 | p | |
151151
| taint.cpp:181:9:181:9 | p | taint.cpp:181:8:181:9 | * ... | TAINT |
152152
| taint.cpp:185:11:185:16 | call to source | taint.cpp:186:11:186:11 | x | |
153-
| taint.cpp:186:11:186:11 | x | taint.cpp:186:10:186:11 | & ... | TAINT |
153+
| taint.cpp:186:11:186:11 | x | taint.cpp:186:10:186:11 | & ... | |
154154
| taint.cpp:192:23:192:28 | source | taint.cpp:194:13:194:18 | source | |
155155
| taint.cpp:193:6:193:6 | x | taint.cpp:194:10:194:10 | x | |
156156
| taint.cpp:193:6:193:6 | x | taint.cpp:195:7:195:7 | x | |
157157
| taint.cpp:194:9:194:10 | & ... | taint.cpp:194:2:194:7 | call to memcpy | |
158158
| taint.cpp:194:9:194:10 | ref arg & ... | taint.cpp:194:2:194:7 | call to memcpy | |
159159
| taint.cpp:194:9:194:10 | ref arg & ... | taint.cpp:195:7:195:7 | x | |
160-
| taint.cpp:194:10:194:10 | x | taint.cpp:194:9:194:10 | & ... | TAINT |
160+
| taint.cpp:194:10:194:10 | x | taint.cpp:194:9:194:10 | & ... | |
161161
| taint.cpp:194:13:194:18 | source | taint.cpp:194:9:194:10 | ref arg & ... | TAINT |
162162
| taint.cpp:194:21:194:31 | sizeof(int) | taint.cpp:194:9:194:10 | ref arg & ... | TAINT |
163163
| taint.cpp:207:6:207:11 | call to source | taint.cpp:207:2:207:13 | ... = ... | |

0 commit comments

Comments
 (0)