Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rust: Add basic skeleton setup for data flow #17871

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Provides a module for performing local (intra-procedural) and global
* (inter-procedural) data flow analyses.
*/

private import rust
private import codeql.dataflow.DataFlow
private import internal.DataFlowImpl as DataFlowImpl
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the as DataFlowImpl bit is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added it for this line here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, that goes away with the other suggested changes 😄

private import DataFlowImpl::Node as Node

/**
* Provides classes for performing local (intra-procedural) and global
* (inter-procedural) data flow analyses.
*/
module DataFlow {
final class Node = Node::Node;

final class ParameterNode = Node::ParameterNode;

final class PostUpdateNode = Node::PostUpdateNode;

/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep = DataFlowImpl::localFlowStepImpl/2;

/**
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
pragma[inline]
predicate localFlow(Node::Node source, Node::Node sink) { localFlowStep*(source, sink) }

import DataFlowMake<Location, DataFlowImpl::RustDataFlow>
}
17 changes: 17 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/TaintTracking.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Provides the module `TaintTracking` for performing local (intra-procedural)
* and global (inter-procedural) taint-tracking analyses.
*/

private import rust

/**
* Provides a library for performing local (intra-procedural) and global
* (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
private import codeql.dataflow.TaintTracking
private import internal.DataFlowImpl
private import internal.TaintTrackingImpl
import TaintFlowMake<Location, RustDataFlow, RustTaintTracking>
}
308 changes: 308 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
/**
* Provides Rust-specific definitions for use in the data flow library.
*/

private import codeql.util.Void
private import codeql.util.Unit
private import codeql.dataflow.DataFlow
private import codeql.dataflow.internal.DataFlowImpl
private import rust
private import codeql.rust.controlflow.ControlFlowGraph
private import codeql.rust.dataflow.Ssa

module Node {
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
abstract class Node extends TNode {
/** Gets the location of this node. */
abstract Location getLocation();

/** Gets a textual representation of this node. */
abstract string toString();

/**
* Gets the expression that corresponds to this node, if any.
*/
Expr asExpr() { none() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type here should be CfgNode, because we will base data flow on the CFG and not on the AST.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me it seems a bit confusing for users and query writers if asExpr doesn't return an expression? There is also another predicate getCfgNode to get a CFG node.

From a quick grep it seems that in Swift, Go, Java, C++, and C# the asExpr predicate gives an expression. Only in Ruby does it give a CFG node.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's leave it at Expr, for now, and then change it once we have a proper Expr layer on top of CfgNode.


/**
* Gets the control flow node that corresponds to this data flow node.
*/
CfgNode getCfgNode() { none() }

/**
* Gets this node's underlying SSA definition, if any.
*/
Ssa::Definition asDefinition() { none() }

/**
* Gets the parameter that corresponds to this node, if any.
*/
Param asParameter() { none() }
}

/** A node type that is not implemented. */
final class NaNode extends Node {
NaNode() { none() }

override string toString() { result = "N/A" }

override Location getLocation() { none() }
}

/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
final class ParameterNode extends Node {
Param param;

ParameterNode() { this = TSourceParameterNode(param) }

override Location getLocation() { result = param.getLocation() }

override string toString() { result = param.toString() }
}

final class ArgumentNode = NaNode;

final class ReturnNode extends NaNode {
RustDataFlow::ReturnKind getKind() { none() }
}

final class OutNode = NaNode;

/**
* A node associated with an object after an operation that might have
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update.
*/
final class PostUpdateNode extends Node::NaNode {
/** Gets the node before the state update. */
Node getPreUpdateNode() { none() }
}

final class CastNode = NaNode;
}

module RustDataFlow implements InputSig<Location> {
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
final class Node = Node::Node;

final class ParameterNode = Node::ParameterNode;

final class ArgumentNode = Node::ArgumentNode;

final class ReturnNode = Node::ReturnNode;

final class OutNode = Node::OutNode;

final class PostUpdateNode = Node::PostUpdateNode;

final class CastNode = Node::NaNode;

predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) { none() }

predicate isArgumentNode(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) { none() }

DataFlowCallable nodeGetEnclosingCallable(Node node) { none() }

DataFlowType getNodeType(Node node) { none() }

predicate nodeIsHidden(Node node) { none() }

class DataFlowExpr = Void;

/** Gets the node corresponding to `e`. */
Node exprNode(DataFlowExpr e) { none() }

final class DataFlowCall extends TNormalCall {
private CallExpr c;

DataFlowCall() { this = TNormalCall(c) }

DataFlowCallable getEnclosingCallable() { none() }

string toString() { result = c.toString() }

Location getLocation() { result = c.getLocation() }
}

final class DataFlowCallable = CfgScope;

final class ReturnKind = Void;

/** Gets a viable implementation of the target of the given `Call`. */
DataFlowCallable viableCallable(DataFlowCall c) { none() }

OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { none() }

final class DataFlowType = Unit;

predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

final class Content = Void;

predicate forceHighPrecision(Content c) { none() }

class ContentSet extends TContentSet {
/** Gets a textual representation of this element. */
string toString() { result = "ContentSet" }

/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { none() }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { none() }
}

final class ContentApprox = Void;

ContentApprox getContentApprox(Content c) { any() }

class ParameterPosition extends string {
ParameterPosition() { this = "pos" }
}

class ArgumentPosition extends string {
ArgumentPosition() { this = "pos" }
}

/**
* Holds if the parameter position `ppos` matches the argument position
* `apos`.
*/
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { none() }

/**
* Holds if there is a simple local flow step from `node1` to `node2`. These
* are the value-preserving intra-callable flow steps.
*/
predicate simpleLocalFlowStep(Node node1, Node node2, string model) { none() }

/**
* Holds if data can flow from `node1` to `node2` through a non-local step
* that does not follow a call edge. For example, a step through a global
* variable.
*/
predicate jumpStep(Node node1, Node node2) { none() }

/**
* Holds if data can flow from `node1` to `node2` via a read of `c`. Thus,
* `node1` references an object with a content `c.getAReadContent()` whose
* value ends up in `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) { none() }

/**
* Holds if data can flow from `node1` to `node2` via a store into `c`. Thus,
* `node2` references an object with a content `c.getAStoreContent()` that
* contains the value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) { none() }

/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet c) { none() }

/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }

class NodeRegion instanceof Void {
string toString() { result = "NodeRegion" }

predicate contains(Node n) { none() }
}

/**
* Holds if the nodes in `nr` are unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(NodeRegion nr, DataFlowCall call) { none() }

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
*
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }

/**
* Holds if the value of `node2` is given by `node1`.
*
* This predicate is combined with type information in the following way: If
* the data flow library is able to compute an improved type for `node1` then
* it will also conclude that this type applies to `node2`. Vice versa, if
* `node2` must be visited along a flow path, then any type known for `node2`
* must also apply to `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2) { none() }

class LambdaCallKind = Void;

// class LambdaCallKind;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }

/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }

/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

predicate knownSourceModel(Node source, string model) { none() }

predicate knownSinkModel(Node sink, string model) { none() }

class DataFlowSecondLevelScope = Void;
}

final class ContentSet = RustDataFlow::ContentSet;

import MakeImpl<Location, RustDataFlow>

/** A collection of cached types and predicates to be evaluated in the same stage. */
cached
private module Cached {
cached
newtype TNode =
TExprNode(CfgNode n, Expr e) { n.getAstNode() = e } or
TSourceParameterNode(Param param)

cached
newtype TDataFlowCall = TNormalCall(CallExpr c)

cached
newtype TOptionalContentSet =
TAnyElementContent() or
TAnyContent()

cached
class TContentSet = TAnyElementContent or TAnyContent;

/** This is the local flow predicate that is exposed. */
cached
predicate localFlowStepImpl(Node::Node nodeFrom, Node::Node nodeTo) { none() }
}

import Cached
20 changes: 20 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/TaintTrackingImpl.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
private import rust
private import codeql.dataflow.TaintTracking
private import DataFlowImpl

module RustTaintTracking implements InputSig<Location, RustDataFlow> {
predicate defaultTaintSanitizer(Node::Node node) { none() }

/**
* Holds if the additional step from `src` to `sink` should be included in all
* global taint flow configurations.
*/
predicate defaultAdditionalTaintStep(Node::Node src, Node::Node sink, string model) { none() }

/**
* Holds if taint flow configurations should allow implicit reads of `c` at sinks
* and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(Node::Node node, ContentSet c) { none() }
}
Empty file.
Loading
Loading