From 218a3cf93d73ebf3c7e8143b2b051a2ff22cea63 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 20 May 2020 18:18:26 +0200 Subject: [PATCH 1/4] C++: Remove field conflation --- .../cpp/ir/dataflow/DefaultTaintTracking.qll | 2 + .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 63 ++++++++++++------- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index dbeefae48808..5087363eee6c 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -1,6 +1,7 @@ import cpp import semmle.code.cpp.security.Security private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil private import semmle.code.cpp.ir.dataflow.DataFlow2 private import semmle.code.cpp.ir.dataflow.DataFlow3 private import semmle.code.cpp.ir.IR @@ -228,6 +229,7 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) { // Flow from an element to an array or union that contains it. i2.(ChiInstruction).getPartial() = i1 and not i2.isResultConflated() and + not exists(PartialDefinitionNode n | n.asInstruction() = i2) and exists(Type t | i2.getResultLanguageType().hasType(t, false) | t instanceof Union or diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 9f9659a506e3..772754745dfe 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -239,6 +239,8 @@ abstract class PostUpdateNode extends InstructionNode { } /** + * INTERNAL: do not use. + * * The base class for nodes that perform "partial definitions". * * In contrast to a normal "definition", which provides a new value for @@ -251,7 +253,7 @@ abstract class PostUpdateNode extends InstructionNode { * setY(&x); // a partial definition of the object `x`. * ``` */ -abstract private class PartialDefinitionNode extends PostUpdateNode, TInstructionNode { } +abstract class PartialDefinitionNode extends PostUpdateNode, TInstructionNode { } private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override ChiInstruction instr; @@ -270,6 +272,17 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } } +private class FieldStoreWriteSideEffectNode extends PartialDefinitionNode { + override ChiInstruction instr; + + FieldStoreWriteSideEffectNode() { + not instr.isResultConflated() and + exists(WriteSideEffectInstruction sideEffect | instr.getPartial() = sideEffect) + } + + override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } +} + /** * Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to. * For instance, an update to a field of a struct containing only one field. For these cases we @@ -421,6 +434,32 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr */ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction()) + or + // The next two rules allow flow from partial definitions in setters to succeeding loads in the caller. + // First, we add flow from write side-effects to non-conflated chi instructions through their + // partial operands. Consider the following example: + // ``` + // void setX(Point* p, int new_x) { + // p->x = new_x; + // } + // ... + // setX(&p, taint()); + // ``` + // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to + // `setX`, which will be melded into `p` through a chi instruction. + nodeTo instanceof FieldStoreWriteSideEffectNode and + exists(ChiInstruction chi | chi = nodeTo.asInstruction() | + chi.getPartialOperand().getDef() = nodeFrom.asInstruction().(WriteSideEffectInstruction) and + not chi.isResultConflated() + ) + or + // Next, we add flow from non-conflated chi instructions to loads (even when they are not precise). + // This ensures that loads of `p->x` gets data flow from the `WriteSideEffectInstruction` above. + nodeFrom instanceof FieldStoreWriteSideEffectNode and + exists(ChiInstruction chi | chi = nodeFrom.asInstruction() | + not chi.isResultConflated() and + nodeTo.asInstruction().(LoadInstruction).getSourceValueOperand().getAnyDef() = chi + ) } pragma[noinline] @@ -458,28 +497,6 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction // for now. iTo.getAnOperand().(ChiTotalOperand).getDef() = iFrom or - // The next two rules allow flow from partial definitions in setters to succeeding loads in the caller. - // First, we add flow from write side-effects to non-conflated chi instructions through their - // partial operands. Consider the following example: - // ``` - // void setX(Point* p, int new_x) { - // p->x = new_x; - // } - // ... - // setX(&p, taint()); - // ``` - // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to - // `setX`, which will be melded into `p` through a chi instruction. - iTo.getAnOperand().(ChiPartialOperand).getDef() = iFrom.(WriteSideEffectInstruction) and - not iTo.isResultConflated() - or - // Next, we add flow from non-conflated chi instructions to loads (even when they are not precise). - // This ensures that loads of `p->x` gets data flow from the `WriteSideEffectInstruction` above. - exists(ChiInstruction chi | iFrom = chi | - not chi.isResultConflated() and - iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = chi - ) - or // Flow from stores to structs with a single field to a load of that field. iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom and exists(int size, Type type | From 3c167125e514a606cc54cb87eb8fc6bdd90d2c10 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 20 May 2020 18:18:34 +0200 Subject: [PATCH 2/4] C++: Accept test output --- .../DefaultTaintTracking/tainted.expected | 2 -- .../DefaultTaintTracking/test_diff.expected | 2 -- .../dataflow-ir-consistency.expected | 1 + .../TaintedAllocationSize.expected | 19 ------------------- 4 files changed, 1 insertion(+), 23 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected index a2acc1a7dbf4..e1620e55f659 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected @@ -103,8 +103,6 @@ | defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | (int)... | | defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | access to array | | defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:12:111:18 | tainted | -| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:118:11:118:11 | x | -| defaulttainttracking.cpp:110:17:110:22 | call to getenv | shared.h:6:15:6:23 | sinkparam | | dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:24:28:27 | call to atoi | | dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:34 | call to getenv | | dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:45 | (const char *)... | diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected index 26c83f57cb77..b02339f160cc 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected @@ -21,8 +21,6 @@ | defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | p2 | IR only | | defaulttainttracking.cpp:97:27:97:32 | call to getenv | shared.h:5:23:5:31 | sinkparam | IR only | | defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:8:111:8 | y | AST only | -| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:118:11:118:11 | x | IR only | -| defaulttainttracking.cpp:110:17:110:22 | call to getenv | shared.h:6:15:6:23 | sinkparam | IR only | | globals.cpp:13:15:13:20 | call to getenv | globals.cpp:13:5:13:11 | global1 | AST only | | globals.cpp:23:15:23:20 | call to getenv | globals.cpp:23:5:23:11 | global2 | AST only | | test_diff.cpp:104:12:104:15 | argv | test_diff.cpp:104:11:104:20 | (...) | IR only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected index 3940c1e83893..99bf7799ff22 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected @@ -31,6 +31,7 @@ postIsNotPre postHasUniquePre uniquePostUpdate | ref.cpp:83:5:83:17 | Chi | Node has multiple PostUpdateNodes. | +| ref.cpp:100:34:100:36 | InitializeIndirection | Node has multiple PostUpdateNodes. | | ref.cpp:109:5:109:22 | Chi | Node has multiple PostUpdateNodes. | postIsInSameCallable reverseRead diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/TaintedAllocationSize.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/TaintedAllocationSize.expected index 803b330716ec..773a969e9ad4 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/TaintedAllocationSize.expected +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/TaintedAllocationSize.expected @@ -1,13 +1,4 @@ edges -| field_conflation.c:12:22:12:27 | call to getenv | field_conflation.c:13:3:13:18 | Chi | -| field_conflation.c:12:22:12:34 | (const char *)... | field_conflation.c:13:3:13:18 | Chi | -| field_conflation.c:13:3:13:18 | Chi | field_conflation.c:19:15:19:17 | taint_array output argument | -| field_conflation.c:19:15:19:17 | taint_array output argument | field_conflation.c:20:10:20:13 | (unsigned long)... | -| field_conflation.c:19:15:19:17 | taint_array output argument | field_conflation.c:20:13:20:13 | x | -| field_conflation.c:19:15:19:17 | taint_array output argument | field_conflation.c:20:13:20:13 | x | -| field_conflation.c:19:15:19:17 | taint_array output argument | field_conflation.c:20:13:20:13 | x | -| field_conflation.c:20:13:20:13 | x | field_conflation.c:20:10:20:13 | (unsigned long)... | -| field_conflation.c:20:13:20:13 | x | field_conflation.c:20:13:20:13 | x | | test.cpp:39:21:39:24 | argv | test.cpp:42:38:42:44 | (size_t)... | | test.cpp:39:21:39:24 | argv | test.cpp:42:38:42:44 | (size_t)... | | test.cpp:39:21:39:24 | argv | test.cpp:42:38:42:44 | tainted | @@ -69,15 +60,6 @@ edges | test.cpp:235:11:235:20 | (size_t)... | test.cpp:214:23:214:23 | s | | test.cpp:237:10:237:19 | (size_t)... | test.cpp:220:21:220:21 | s | nodes -| field_conflation.c:12:22:12:27 | call to getenv | semmle.label | call to getenv | -| field_conflation.c:12:22:12:34 | (const char *)... | semmle.label | (const char *)... | -| field_conflation.c:13:3:13:18 | Chi | semmle.label | Chi | -| field_conflation.c:19:15:19:17 | taint_array output argument | semmle.label | taint_array output argument | -| field_conflation.c:20:10:20:13 | (unsigned long)... | semmle.label | (unsigned long)... | -| field_conflation.c:20:10:20:13 | (unsigned long)... | semmle.label | (unsigned long)... | -| field_conflation.c:20:13:20:13 | x | semmle.label | x | -| field_conflation.c:20:13:20:13 | x | semmle.label | x | -| field_conflation.c:20:13:20:13 | x | semmle.label | x | | test.cpp:39:21:39:24 | argv | semmle.label | argv | | test.cpp:39:21:39:24 | argv | semmle.label | argv | | test.cpp:42:38:42:44 | (size_t)... | semmle.label | (size_t)... | @@ -141,7 +123,6 @@ nodes | test.cpp:235:11:235:20 | (size_t)... | semmle.label | (size_t)... | | test.cpp:237:10:237:19 | (size_t)... | semmle.label | (size_t)... | #select -| field_conflation.c:20:3:20:8 | call to malloc | field_conflation.c:12:22:12:27 | call to getenv | field_conflation.c:20:13:20:13 | x | This allocation size is derived from $@ and might overflow | field_conflation.c:12:22:12:27 | call to getenv | user input (getenv) | | test.cpp:42:31:42:36 | call to malloc | test.cpp:39:21:39:24 | argv | test.cpp:42:38:42:44 | tainted | This allocation size is derived from $@ and might overflow | test.cpp:39:21:39:24 | argv | user input (argv) | | test.cpp:43:31:43:36 | call to malloc | test.cpp:39:21:39:24 | argv | test.cpp:43:38:43:63 | ... * ... | This allocation size is derived from $@ and might overflow | test.cpp:39:21:39:24 | argv | user input (argv) | | test.cpp:45:31:45:36 | call to malloc | test.cpp:39:21:39:24 | argv | test.cpp:45:38:45:63 | ... + ... | This allocation size is derived from $@ and might overflow | test.cpp:39:21:39:24 | argv | user input (argv) | From 617ef324649b5140dfd6555324ad1bf1a5f4fbad Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Thu, 21 May 2020 02:22:57 +0200 Subject: [PATCH 3/4] C++: Remove [FALSE POSITIVE] annotations --- .../dataflow/DefaultTaintTracking/defaulttainttracking.cpp | 2 +- .../CWE/CWE-190/semmle/TaintedAllocationSize/field_conflation.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp index a456bc856ffe..4dfba0aebf92 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp @@ -115,5 +115,5 @@ void test_conflated_fields3() { XY xy; xy.x = 0; taint_y(&xy); - sink(xy.x); // not tainted [FALSE POSITIVE] + sink(xy.x); // not tainted } diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/field_conflation.c b/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/field_conflation.c index 5f6cb730e804..9a69a420a796 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/field_conflation.c +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/TaintedAllocationSize/field_conflation.c @@ -17,5 +17,5 @@ void test_conflated_fields3(void) { struct XY xy; xy.x = 4; taint_array(&xy); - malloc(xy.x); // not tainted [FALSE POSITIVE] + malloc(xy.x); // not tainted } From b205d3693361b2af66872dd3b2611af20ab97835 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 26 May 2020 11:40:26 +0200 Subject: [PATCH 4/4] C++: Remove chi -> load rule from simpleLocalFlowStep and accept tests --- .../cpp/ir/dataflow/DefaultTaintTracking.qll | 1 - .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 58 ++++++------------- .../dataflow-ir-consistency.expected | 1 - 3 files changed, 18 insertions(+), 42 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index 5087363eee6c..3076df93a6df 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -229,7 +229,6 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) { // Flow from an element to an array or union that contains it. i2.(ChiInstruction).getPartial() = i1 and not i2.isResultConflated() and - not exists(PartialDefinitionNode n | n.asInstruction() = i2) and exists(Type t | i2.getResultLanguageType().hasType(t, false) | t instanceof Union or diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 772754745dfe..f9e1aad4e660 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -239,8 +239,6 @@ abstract class PostUpdateNode extends InstructionNode { } /** - * INTERNAL: do not use. - * * The base class for nodes that perform "partial definitions". * * In contrast to a normal "definition", which provides a new value for @@ -253,7 +251,7 @@ abstract class PostUpdateNode extends InstructionNode { * setY(&x); // a partial definition of the object `x`. * ``` */ -abstract class PartialDefinitionNode extends PostUpdateNode, TInstructionNode { } +abstract private class PartialDefinitionNode extends PostUpdateNode, TInstructionNode { } private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override ChiInstruction instr; @@ -272,17 +270,6 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } } -private class FieldStoreWriteSideEffectNode extends PartialDefinitionNode { - override ChiInstruction instr; - - FieldStoreWriteSideEffectNode() { - not instr.isResultConflated() and - exists(WriteSideEffectInstruction sideEffect | instr.getPartial() = sideEffect) - } - - override Node getPreUpdateNode() { result.asInstruction() = instr.getTotal() } -} - /** * Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to. * For instance, an update to a field of a struct containing only one field. For these cases we @@ -434,32 +421,6 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr */ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction()) - or - // The next two rules allow flow from partial definitions in setters to succeeding loads in the caller. - // First, we add flow from write side-effects to non-conflated chi instructions through their - // partial operands. Consider the following example: - // ``` - // void setX(Point* p, int new_x) { - // p->x = new_x; - // } - // ... - // setX(&p, taint()); - // ``` - // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to - // `setX`, which will be melded into `p` through a chi instruction. - nodeTo instanceof FieldStoreWriteSideEffectNode and - exists(ChiInstruction chi | chi = nodeTo.asInstruction() | - chi.getPartialOperand().getDef() = nodeFrom.asInstruction().(WriteSideEffectInstruction) and - not chi.isResultConflated() - ) - or - // Next, we add flow from non-conflated chi instructions to loads (even when they are not precise). - // This ensures that loads of `p->x` gets data flow from the `WriteSideEffectInstruction` above. - nodeFrom instanceof FieldStoreWriteSideEffectNode and - exists(ChiInstruction chi | chi = nodeFrom.asInstruction() | - not chi.isResultConflated() and - nodeTo.asInstruction().(LoadInstruction).getSourceValueOperand().getAnyDef() = chi - ) } pragma[noinline] @@ -497,6 +458,23 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction // for now. iTo.getAnOperand().(ChiTotalOperand).getDef() = iFrom or + // Add flow from write side-effects to non-conflated chi instructions through their + // partial operands. From there, a `readStep` will find subsequent reads of that field. + // Consider the following example: + // ``` + // void setX(Point* p, int new_x) { + // p->x = new_x; + // } + // ... + // setX(&p, taint()); + // ``` + // Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to + // `setX`, which will be melded into `p` through a chi instruction. + exists(ChiInstruction chi | chi = iTo | + chi.getPartialOperand().getDef() = iFrom.(WriteSideEffectInstruction) and + not chi.isResultConflated() + ) + or // Flow from stores to structs with a single field to a load of that field. iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom and exists(int size, Type type | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected index 99bf7799ff22..3940c1e83893 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected @@ -31,7 +31,6 @@ postIsNotPre postHasUniquePre uniquePostUpdate | ref.cpp:83:5:83:17 | Chi | Node has multiple PostUpdateNodes. | -| ref.cpp:100:34:100:36 | InitializeIndirection | Node has multiple PostUpdateNodes. | | ref.cpp:109:5:109:22 | Chi | Node has multiple PostUpdateNodes. | postIsInSameCallable reverseRead