Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rust: Data flow improvements to unlock flow in sqlx test #18291

Merged
merged 9 commits into from
Dec 18, 2024
Merged
24 changes: 22 additions & 2 deletions rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,11 @@ private class CapturedVariableContent extends Content, TCapturedVariableContent
override string toString() { result = "captured " + v }
}

/** A value referred to by a reference. */
final class ReferenceContent extends Content, TReferenceContent {
override string toString() { result = "&ref" }
}

/**
* An element in an array.
*/
Expand Down Expand Up @@ -1051,6 +1056,13 @@ module RustDataFlow implements InputSig<Location> {
["crate::option::Option::Some", "crate::result::Result::Ok"]
)
or
exists(PrefixExprCfgNode deref |
c instanceof ReferenceContent and
deref.getOperatorName() = "*" and
node1.asExpr() = deref.getExpr() and
node2.asExpr() = deref
)
or
VariableCapture::readStep(node1, c, node2)
)
or
Expand Down Expand Up @@ -1128,6 +1140,12 @@ module RustDataFlow implements InputSig<Location> {
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = index.getBase()
)
or
exists(RefExprCfgNode ref |
c instanceof ReferenceContent and
node1.asExpr() = ref.getExpr() and
node2.asExpr() = ref
)
or
VariableCapture::storeStep(node1, c, node2)
}

Expand Down Expand Up @@ -1395,7 +1413,8 @@ private module Cached {
e =
[
any(IndexExprCfgNode i).getBase(), any(FieldExprCfgNode access).getExpr(),
any(TryExprCfgNode try).getExpr()
any(TryExprCfgNode try).getExpr(),
any(PrefixExprCfgNode pe | pe.getOperatorName() = "*").getExpr()
]
} or
TSsaNode(SsaImpl::DataFlowIntegration::SsaNode node) or
Expand Down Expand Up @@ -1495,7 +1514,8 @@ private module Cached {
TStructFieldContent(StructCanonicalPath s, string field) {
field = s.getStruct().getFieldList().(RecordFieldList).getAField().getName().getText()
} or
TCapturedVariableContent(VariableCapture::CapturedVariable v)
TCapturedVariableContent(VariableCapture::CapturedVariable v) or
TReferenceContent()

cached
newtype TContentSet = TSingletonContentSet(Content c)
Expand Down
18 changes: 7 additions & 11 deletions rust/ql/lib/codeql/rust/dataflow/internal/SsaImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,16 @@ module SsaInput implements SsaImplCommon::InputSig<Location> {
|
va instanceof VariableReadAccess
or
// For immutable variables, we model a read when they are borrowed
// (although the actual read happens later, if at all).
va = any(RefExpr re).getExpr()
or
// Although compound assignments, like `x += y`, may in fact not read `x`,
// it makes sense to treat them as such
va = any(CompoundAssignmentExpr cae).getLhs()
) and
certain = true
or
// For immutable variables, we model a read when they are borrowed (although the
// actual read happens later, if at all). This only affects the SSA liveness
// analysis.
exists(VariableAccess va |
va = any(RefExpr re).getExpr() and
va = bb.getNode(i).getAstNode() and
v = va.getVariable() and
certain = false
)
or
capturedCallRead(_, bb, i, v) and certain = false
or
capturedExitRead(bb, i, v) and certain = false
Expand Down Expand Up @@ -146,7 +140,9 @@ private predicate adjacentDefReadExt(

/** Holds if `v` is read at index `i` in basic block `bb`. */
private predicate variableReadActual(BasicBlock bb, int i, Variable v) {
exists(VariableReadAccess read |
exists(VariableAccess read |
read instanceof VariableReadAccess or read = any(RefExpr re).getExpr()
|
read.getVariable() = v and
read = bb.getNode(i).getAstNode()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ module RustTaintTracking implements InputSig<Location, RustDataFlow> {
RustDataFlow::readStep(pred, cs, succ) and
cs.getContent() instanceof ArrayElementContent
)
or
pred.asExpr() = succ.asExpr().(RefExprCfgNode).getExpr()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking my understanding: when you take a reference &foo you get data flow from f to the ReferenceContent of &f and you get taint flow from f to &f without content?

What sorts of cases do we need the contentless taint flow for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that is right. I added the taint flow to support this line in the SQL injection test:

let unsafe_query_1 = String::from("SELECT * FROM people WHERE firstname='") + &remote_string + "'";

Here remote_string is tainted, and the extra taint step makes unsafe_query_1 tainted at well. One could argue that the reference itself isn't really tainted, but on the other hand the only thing it can be used for is access tainted data and it seemed like a simple way to unlock some additional flow. Alternatively, we could also extend the handling of + to read ReferenceContent as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intuition is that having + read the ReferenceContent is more accurate but ... I'm worried this will be a can of worms if we got this way. So I guess we should probably leave it the way it is.

@hvitved do you have an opinion on this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modelling store steps as also taint steps has proven bad in the past, so I think it would be better to provide a taint flow summary for + which pops ReferenceContent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be the best way to do that for a built-in operator?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think we should revert this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modelling store steps as also taint steps has proven bad in the past

Re. this, we also do that right now for arrays (which was inspired by Ruby). Do we want to remove that as well (later)?

Copy link
Contributor

@hvitved hvitved Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hopefully we only add taint steps for reads out of arrays, and not for stores into arrays?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's right. Got it, taint steps for read steps are fine, but taint steps for store steps are not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think we should revert this.

Done 👍

)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(pred.(Node::FlowSummaryNode).getSummaryNode(),
Expand All @@ -59,7 +61,10 @@ module RustTaintTracking implements InputSig<Location, RustDataFlow> {
bindingset[node]
predicate defaultImplicitTaintRead(Node::Node node, ContentSet cs) {
exists(node) and
cs.(SingletonContentSet).getContent() instanceof ArrayElementContent
exists(Content c | c = cs.(SingletonContentSet).getContent() |
c instanceof ArrayElementContent or
c instanceof ReferenceContent
)
}

/**
Expand Down
6 changes: 6 additions & 0 deletions rust/ql/lib/codeql/rust/frameworks/reqwest.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/rust-all
extensible: summaryModel
data:
- ["repo:https://github.com/seanmonstar/reqwest:reqwest", "<crate::blocking::response::Response>::text", "Argument[self]", "ReturnValue.Variant[crate::result::Result::Ok(0)]", "taint", "manual"]
12 changes: 12 additions & 0 deletions rust/ql/lib/codeql/rust/frameworks/stdlib/lang-core.model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,16 @@ extensions:
pack: codeql/rust-all
extensible: summaryModel
data:
# Option
- ["lang:core", "<crate::option::Option>::unwrap", "Argument[self].Variant[crate::option::Option::Some(0)]", "ReturnValue", "value", "manual"]
- ["lang:core", "<crate::option::Option>::unwrap", "Argument[self]", "ReturnValue", "taint", "manual"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All these taint models should not be needed after altering the summary above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've moved one of them. But some of our sources specify taint on the entire Result, so I think I'd be fine to keep the others until that is no longer the case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather that we remove these lines, and not have flow for now, we should soon be able to have it once #18298 lands. Otherwise I fear we forget to remove these lines.

- ["lang:core", "<crate::option::Option>::unwrap_or", "Argument[self].Variant[crate::option::Option::Some(0)]", "ReturnValue", "value", "manual"]
- ["lang:core", "<crate::option::Option>::unwrap_or", "Argument[0]", "ReturnValue", "value", "manual"]
# Result
- ["lang:core", "<crate::result::Result>::unwrap", "Argument[self].Variant[crate::result::Result::Ok(0)]", "ReturnValue", "value", "manual"]
- ["lang:core", "<crate::result::Result>::unwrap", "Argument[self]", "ReturnValue", "taint", "manual"]
- ["lang:core", "<crate::result::Result>::unwrap_or", "Argument[self].Variant[crate::result::Result::Ok(0)]", "ReturnValue", "value", "manual"]
- ["lang:core", "<crate::result::Result>::unwrap_or", "Argument[0]", "ReturnValue", "value", "manual"]
- ["lang:core", "<crate::result::Result>::unwrap_or", "Argument[self]", "ReturnValue", "taint", "manual"]
# String
- ["lang:alloc", "<crate::string::String>::as_str", "Argument[self]", "ReturnValue", "taint", "manual"]
8 changes: 6 additions & 2 deletions rust/ql/lib/utils/test/InlineFlowTest.qll
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ private import codeql.rust.dataflow.internal.TaintTrackingImpl
private import codeql.rust.dataflow.internal.ModelsAsData as MaD
private import internal.InlineExpectationsTestImpl as InlineExpectationsTestImpl

// Holds if the target expression of `call` is a path and the string representation of the path is `name`.
/**
* Holds if the target expression of `call` is a path and the string
* representation of the path has `name` as a prefix.
*/
bindingset[name]
private predicate callTargetName(CallExprCfgNode call, string name) {
call.getFunction().(PathExprCfgNode).toString() = name
call.getFunction().(PathExprCfgNode).toString().matches(name + "%")
}

private module FlowTestImpl implements InputSig<Location, RustDataFlow> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
identityLocalStep
| main.rs:404:7:404:18 | phi(default_name) | Node steps to itself |
| main.rs:394:7:394:18 | phi(default_name) | Node steps to itself |
Loading
Loading