Skip to content

Commit 464d2cd

Browse files
authored
Merge pull request #20891 from hvitved/rust/data-flow-implicit-deref-borrow
Rust: Improve handling of implicit derefs/borrows in data flow
2 parents d41a2d4 + 4bfe1a8 commit 464d2cd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1133
-1068
lines changed

rust/ql/lib/codeql/rust/dataflow/internal/DataFlowConsistency.qll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,6 @@ private module Input implements InputSig<Location, RustDataFlow> {
1919
predicate postWithInFlowExclude(RustDataFlow::Node n) {
2020
n instanceof Node::FlowSummaryNode
2121
or
22-
// We allow flow into post-update node for receiver expressions (from the
23-
// synthetic post receiever node).
24-
n.(Node::PostUpdateNode).getPreUpdateNode().asExpr() = any(Node::ReceiverNode r).getReceiver()
25-
or
2622
n.(Node::PostUpdateNode).getPreUpdateNode().asExpr() = getPostUpdateReverseStep(_, _)
2723
or
2824
FlowSummaryImpl::Private::Steps::sourceLocalStep(_, n, _)

rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ private import codeql.rust.elements.Call
1212
private import SsaImpl as SsaImpl
1313
private import codeql.rust.controlflow.internal.Scope as Scope
1414
private import codeql.rust.internal.PathResolution
15-
private import codeql.rust.internal.TypeInference as TypeInference
1615
private import codeql.rust.controlflow.ControlFlowGraph
1716
private import codeql.rust.dataflow.Ssa
1817
private import codeql.rust.dataflow.FlowSummary
@@ -141,18 +140,11 @@ final class ArgumentPosition extends ParameterPosition {
141140

142141
/**
143142
* Holds if `arg` is an argument of `call` at the position `pos`.
144-
*
145-
* Note that this does not hold for the receiever expression of a method call
146-
* as the synthetic `ReceiverNode` is the argument for the `self` parameter.
147143
*/
148-
predicate isArgumentForCall(Expr arg, Call call, ParameterPosition pos) {
144+
predicate isArgumentForCall(Expr arg, Call call, ArgumentPosition pos) {
149145
// TODO: Handle index expressions as calls in data flow.
150146
not call instanceof IndexExpr and
151-
(
152-
call.getPositionalArgument(pos.getPosition()) = arg
153-
or
154-
call.getReceiver() = arg and pos.isSelf() and not call.receiverImplicitlyBorrowed()
155-
)
147+
arg = pos.getArgument(call)
156148
}
157149

158150
/** Provides logic related to SSA. */
@@ -283,14 +275,6 @@ module LocalFlow {
283275
or
284276
nodeFrom.asPat().(OrPat).getAPat() = nodeTo.asPat()
285277
or
286-
// Simple value step from receiver expression to receiver node, in case
287-
// there is no implicit deref or borrow operation.
288-
nodeFrom.asExpr() = nodeTo.(ReceiverNode).getReceiver()
289-
or
290-
// The dual step of the above, for the post-update nodes.
291-
nodeFrom.(PostUpdateNode).getPreUpdateNode().(ReceiverNode).getReceiver() =
292-
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr()
293-
or
294278
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr() =
295279
getPostUpdateReverseStep(nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr(), true)
296280
}
@@ -380,7 +364,8 @@ module RustDataFlow implements InputSig<Location> {
380364
node.(FlowSummaryNode).getSummaryNode().isHidden() or
381365
node instanceof CaptureNode or
382366
node instanceof ClosureParameterNode or
383-
node instanceof ReceiverNode or
367+
node instanceof DerefBorrowNode or
368+
node instanceof DerefOutNode or
384369
node.asExpr() instanceof ParenExpr or
385370
nodeIsHidden(node.(PostUpdateNode).getPreUpdateNode())
386371
}
@@ -520,16 +505,16 @@ module RustDataFlow implements InputSig<Location> {
520505
}
521506

522507
pragma[nomagic]
523-
private predicate implicitDerefToReceiver(Node node1, ReceiverNode node2, ReferenceContent c) {
524-
TypeInference::receiverHasImplicitDeref(node1.asExpr()) and
525-
node1.asExpr() = node2.getReceiver() and
508+
private predicate implicitDeref(Node node1, DerefBorrowNode node2, ReferenceContent c) {
509+
not node2.isBorrow() and
510+
node1.asExpr() = node2.getNode() and
526511
exists(c)
527512
}
528513

529514
pragma[nomagic]
530-
private predicate implicitBorrowToReceiver(Node node1, ReceiverNode node2, ReferenceContent c) {
531-
TypeInference::receiverHasImplicitBorrow(node1.asExpr()) and
532-
node1.asExpr() = node2.getReceiver() and
515+
private predicate implicitBorrow(Node node1, DerefBorrowNode node2, ReferenceContent c) {
516+
node2.isBorrow() and
517+
node1.asExpr() = node2.getNode() and
533518
exists(c)
534519
}
535520

@@ -539,6 +524,15 @@ module RustDataFlow implements InputSig<Location> {
539524
exists(c)
540525
}
541526

527+
private Node getFieldExprContainerNode(FieldExpr fe) {
528+
exists(Expr container | container = fe.getContainer() |
529+
not any(DerefBorrowNode n).getNode() = container and
530+
result.asExpr() = container
531+
or
532+
result.(DerefBorrowNode).getNode() = container
533+
)
534+
}
535+
542536
pragma[nomagic]
543537
additional predicate readContentStep(Node node1, Content c, Node node2) {
544538
exists(TupleStructPat pat, int pos |
@@ -563,7 +557,7 @@ module RustDataFlow implements InputSig<Location> {
563557
node1.asPat().(RefPat).getPat() = node2.asPat()
564558
or
565559
exists(FieldExpr access |
566-
node1.asExpr() = access.getContainer() and
560+
node1 = getFieldExprContainerNode(access) and
567561
node2.asExpr() = access and
568562
access = c.(FieldContent).getAnAccess()
569563
)
@@ -593,10 +587,9 @@ module RustDataFlow implements InputSig<Location> {
593587
.isVariantField([any(OptionEnum o).getSome(), any(ResultEnum r).getOk()], 0)
594588
)
595589
or
596-
exists(PrefixExpr deref |
590+
exists(DerefExpr deref |
597591
c instanceof ReferenceContent and
598-
deref.getOperatorName() = "*" and
599-
node1.asExpr() = deref.getExpr() and
592+
node1.(DerefOutNode).getDerefExpr() = deref and
600593
node2.asExpr() = deref
601594
)
602595
or
@@ -616,12 +609,10 @@ module RustDataFlow implements InputSig<Location> {
616609
referenceExprToExpr(node2.(PostUpdateNode).getPreUpdateNode(),
617610
node1.(PostUpdateNode).getPreUpdateNode(), c)
618611
or
619-
// Step from receiver expression to receiver node, in case of an implicit
620-
// dereference.
621-
implicitDerefToReceiver(node1, node2, c)
612+
implicitDeref(node1, node2, c)
622613
or
623614
// A read step dual to the store step for implicit borrows.
624-
implicitBorrowToReceiver(node2.(PostUpdateNode).getPreUpdateNode(),
615+
implicitBorrow(node2.(PostUpdateNode).getPreUpdateNode(),
625616
node1.(PostUpdateNode).getPreUpdateNode(), c)
626617
or
627618
VariableCapture::readStep(node1, c, node2)
@@ -657,7 +648,7 @@ module RustDataFlow implements InputSig<Location> {
657648
exists(AssignmentExpr assignment, FieldExpr access |
658649
assignment.getLhs() = access and
659650
node1.asExpr() = assignment.getRhs() and
660-
node2.asExpr() = access.getContainer() and
651+
node2 = getFieldExprContainerNode(access) and
661652
access = c.getAnAccess()
662653
)
663654
}
@@ -729,9 +720,7 @@ module RustDataFlow implements InputSig<Location> {
729720
or
730721
VariableCapture::storeStep(node1, c, node2)
731722
or
732-
// Step from receiver expression to receiver node, in case of an implicit
733-
// borrow.
734-
implicitBorrowToReceiver(node1, node2, c)
723+
implicitBorrow(node1, node2, c)
735724
}
736725

737726
/**

rust/ql/lib/codeql/rust/dataflow/internal/Node.qll

Lines changed: 91 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ private import codeql.rust.controlflow.ControlFlowGraph
1414
private import codeql.rust.controlflow.CfgNodes
1515
private import codeql.rust.dataflow.Ssa
1616
private import codeql.rust.dataflow.FlowSummary
17+
private import codeql.rust.internal.TypeInference as TypeInference
1718
private import Node as Node
1819
private import DataFlowImpl
1920
private import FlowSummaryImpl as FlowSummaryImpl
@@ -226,35 +227,53 @@ final class ExprArgumentNode extends ArgumentNode, ExprNode {
226227
private Call call_;
227228
private RustDataFlow::ArgumentPosition pos_;
228229

229-
ExprArgumentNode() { isArgumentForCall(n, call_, pos_) }
230+
ExprArgumentNode() {
231+
isArgumentForCall(n, call_, pos_) and
232+
not TypeInference::implicitDeref(n) and
233+
not TypeInference::implicitBorrow(n)
234+
}
230235

231236
override predicate isArgumentOf(DataFlowCall call, RustDataFlow::ArgumentPosition pos) {
232237
call.asCall() = call_ and pos = pos_
233238
}
234239
}
235240

236241
/**
237-
* The receiver of a method call _after_ any implicit borrow or dereferencing
238-
* has taken place.
242+
* A node that represents the value of an expression _after_ implicit dereferencing
243+
* or borrowing.
239244
*/
240-
final class ReceiverNode extends ArgumentNode, TReceiverNode {
241-
private Call n;
245+
class DerefBorrowNode extends Node, TDerefBorrowNode {
246+
AstNode n;
247+
boolean isBorrow;
242248

243-
ReceiverNode() { this = TReceiverNode(n, false) }
249+
DerefBorrowNode() { this = TDerefBorrowNode(n, isBorrow, false) }
244250

245-
Expr getReceiver() { result = n.getReceiver() }
251+
AstNode getNode() { result = n }
246252

247-
MethodCallExpr getMethodCall() { result = n }
253+
predicate isBorrow() { isBorrow = true }
248254

249-
override predicate isArgumentOf(DataFlowCall call, RustDataFlow::ArgumentPosition pos) {
250-
call.asCall() = n and pos = TSelfParameterPosition()
255+
override CfgScope getCfgScope() { result = n.getEnclosingCfgScope() }
256+
257+
override Location getLocation() { result = n.getLocation() }
258+
259+
override string toString() {
260+
if isBorrow = true then result = n + " [borrowed]" else result = n + " [dereferenced]"
251261
}
262+
}
252263

253-
override CfgScope getCfgScope() { result = n.getEnclosingCfgScope() }
264+
/**
265+
* A node that represents the value of an argument of a call _after_ implicit
266+
* dereferencing or borrowing.
267+
*/
268+
final class DerefBorrowArgNode extends DerefBorrowNode, ArgumentNode {
269+
private DataFlowCall call_;
270+
private RustDataFlow::ArgumentPosition pos_;
254271

255-
override Location getLocation() { result = this.getReceiver().getLocation() }
272+
DerefBorrowArgNode() { isArgumentForCall(n, call_.asCall(), pos_) }
256273

257-
override string toString() { result = "receiver for " + this.getReceiver() }
274+
override predicate isArgumentOf(DataFlowCall call, RustDataFlow::ArgumentPosition pos) {
275+
call = call_ and pos = pos_
276+
}
258277
}
259278

260279
final class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode {
@@ -329,15 +348,46 @@ abstract class OutNode extends Node {
329348
}
330349

331350
final private class ExprOutNode extends ExprNode, OutNode {
332-
ExprOutNode() { this.asExpr() instanceof Call }
351+
ExprOutNode() {
352+
exists(Call call |
353+
call = this.asExpr() and
354+
not call instanceof DerefExpr // Handled by `DerefOutNode`
355+
)
356+
}
333357

334-
/** Gets the underlying call CFG node that includes this out node. */
358+
/** Gets the underlying call node that includes this out node. */
335359
override DataFlowCall getCall(ReturnKind kind) {
336360
result.asCall() = n and
337361
kind = TNormalReturnKind()
338362
}
339363
}
340364

365+
/**
366+
* A node that represents the value of a `*` expression _before_ implicit
367+
* dereferencing:
368+
*
369+
* `*v` equivalent to `*Deref::deref(&v)`, and this node represents the
370+
* `Deref::deref(&v)` part.
371+
*/
372+
class DerefOutNode extends OutNode, TDerefOutNode {
373+
DerefExpr de;
374+
375+
DerefOutNode() { this = TDerefOutNode(de, false) }
376+
377+
DerefExpr getDerefExpr() { result = de }
378+
379+
override CfgScope getCfgScope() { result = de.getEnclosingCfgScope() }
380+
381+
override DataFlowCall getCall(ReturnKind kind) {
382+
result.asCall() = de and
383+
kind = TNormalReturnKind()
384+
}
385+
386+
override Location getLocation() { result = de.getLocation() }
387+
388+
override string toString() { result = de.toString() + " [pre-dereferenced]" }
389+
}
390+
341391
final class SummaryOutNode extends FlowSummaryNode, OutNode {
342392
private DataFlowCall call;
343393
private ReturnKind kind_;
@@ -402,16 +452,29 @@ final class ExprPostUpdateNode extends PostUpdateNode, TExprPostUpdateNode {
402452
override Location getLocation() { result = e.getLocation() }
403453
}
404454

405-
final class ReceiverPostUpdateNode extends PostUpdateNode, TReceiverNode {
406-
private Call call;
455+
final class DerefBorrowPostUpdateNode extends PostUpdateNode, TDerefBorrowNode {
456+
private Expr arg;
457+
private boolean isBorrow;
407458

408-
ReceiverPostUpdateNode() { this = TReceiverNode(call, true) }
459+
DerefBorrowPostUpdateNode() { this = TDerefBorrowNode(arg, isBorrow, true) }
409460

410-
override Node getPreUpdateNode() { result = TReceiverNode(call, false) }
461+
override DerefBorrowNode getPreUpdateNode() { result = TDerefBorrowNode(arg, isBorrow, false) }
411462

412-
override CfgScope getCfgScope() { result = call.getEnclosingCfgScope() }
463+
override CfgScope getCfgScope() { result = arg.getEnclosingCfgScope() }
413464

414-
override Location getLocation() { result = call.getReceiver().getLocation() }
465+
override Location getLocation() { result = arg.getLocation() }
466+
}
467+
468+
class DerefOutPostUpdateNode extends PostUpdateNode, TDerefOutNode {
469+
DerefExpr de;
470+
471+
DerefOutPostUpdateNode() { this = TDerefOutNode(de, true) }
472+
473+
override DerefOutNode getPreUpdateNode() { result = TDerefOutNode(de, false) }
474+
475+
override CfgScope getCfgScope() { result = de.getEnclosingCfgScope() }
476+
477+
override Location getLocation() { result = de.getLocation() }
415478
}
416479

417480
final class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNode {
@@ -467,19 +530,19 @@ newtype TNode =
467530
any(IndexExpr i).getBase(), //
468531
any(FieldExpr access).getContainer(), //
469532
any(TryExpr try).getExpr(), //
470-
any(PrefixExpr pe | pe.getOperatorName() = "*").getExpr(), //
471533
any(AwaitExpr a).getExpr(), //
472-
any(MethodCallExpr mc).getReceiver(), //
473534
getPostUpdateReverseStep(any(PostUpdateNode n).getPreUpdateNode().asExpr(), _)
474535
]
475536
)
476537
} or
477-
TReceiverNode(Call call, Boolean isPost) {
478-
call.hasEnclosingCfgScope() and
479-
call.receiverImplicitlyBorrowed() and
480-
// TODO: Handle index expressions as calls in data flow.
481-
not call instanceof IndexExpr
538+
TDerefBorrowNode(AstNode n, boolean borrow, Boolean isPost) {
539+
TypeInference::implicitDeref(n) and
540+
borrow = false
541+
or
542+
TypeInference::implicitBorrow(n) and
543+
borrow = true
482544
} or
545+
TDerefOutNode(DerefExpr de, Boolean isPost) or
483546
TSsaNode(SsaImpl::DataFlowIntegration::SsaNode node) or
484547
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) {
485548
forall(AstNode n | n = sn.getSinkElement() or n = sn.getSourceElement() |

rust/ql/lib/codeql/rust/elements/internal/FieldExprImpl.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ module Impl {
2323
*/
2424
class FieldExpr extends Generated::FieldExpr {
2525
/** Gets the record field that this access references, if any. */
26-
StructField getStructField() { result = TypeInference::resolveStructFieldExpr(this) }
26+
StructField getStructField() { result = TypeInference::resolveStructFieldExpr(this, _) }
2727

2828
/** Gets the tuple field that this access references, if any. */
29-
TupleField getTupleField() { result = TypeInference::resolveTupleFieldExpr(this) }
29+
TupleField getTupleField() { result = TypeInference::resolveTupleFieldExpr(this, _) }
3030

3131
override string toStringImpl() {
3232
exists(string abbr, string name |

rust/ql/lib/codeql/rust/frameworks/futures.model.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@ extensions:
77
- ["<futures_util::io::buf_reader::BufReader>::new", "Argument[0]", "ReturnValue", "taint", "manual"]
88
- ["<_ as futures_util::io::AsyncReadExt>::read", "Argument[self]", "Argument[0].Reference", "taint", "manual"]
99
- ["<_ as futures_util::io::AsyncReadExt>::read", "Argument[self].Reference", "Argument[0].Reference", "taint", "manual"]
10-
- ["<_ as futures_util::io::AsyncReadExt>::read_to_end", "Argument[self]", "Argument[0].Reference", "taint", "manual"]
1110
- ["<_ as futures_util::io::AsyncReadExt>::read_to_end", "Argument[self].Reference", "Argument[0].Reference", "taint", "manual"]
12-
- ["<_ as futures_util::io::AsyncBufReadExt>::read_line", "Argument[self]", "Argument[0].Reference", "taint", "manual"]
1311
- ["<_ as futures_util::io::AsyncBufReadExt>::read_line", "Argument[self].Reference", "Argument[0].Reference", "taint", "manual"]
14-
- ["<_ as futures_util::io::AsyncBufReadExt>::read_until", "Argument[self]", "Argument[1].Reference", "taint", "manual"]
1512
- ["<_ as futures_util::io::AsyncBufReadExt>::read_until", "Argument[self].Reference", "Argument[1].Reference", "taint", "manual"]
16-
- ["<_ as futures_util::io::AsyncBufReadExt>::fill_buf", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
13+
- ["<_ as futures_util::io::AsyncBufReadExt>::fill_buf", "Argument[self].Reference", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
1714
- ["<_ as futures_util::io::AsyncBufReadExt>::lines", "Argument[self]", "ReturnValue", "taint", "manual"]
1815
- ["<_ as futures_io::if_std::AsyncBufRead>::poll_fill_buf", "Argument[self].Reference", "ReturnValue.Field[core::task::poll::Poll::Ready(0)].Field[core::result::Result::Ok(0)]", "taint", "manual"]
1916
- ["<_ as futures_io::if_std::AsyncRead>::poll_read", "Argument[self].Reference", "Argument[1].Reference", "taint", "manual"]

rust/ql/lib/codeql/rust/frameworks/reqwest.model.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ extensions:
2222
- ["<reqwest::response::Response>::text", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
2323
- ["<reqwest::response::Response>::text_with_charset", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
2424
- ["<reqwest::response::Response>::bytes", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
25-
- ["<reqwest::response::Response>::chunk", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)].Field[core::option::Option::Some(0)]", "taint", "manual"]
25+
- ["<reqwest::response::Response>::chunk", "Argument[self].Reference", "ReturnValue.Future.Field[core::result::Result::Ok(0)].Field[core::option::Option::Some(0)]", "taint", "manual"]
2626
- ["<reqwest::blocking::response::Response>::text", "Argument[self]", "ReturnValue.Field[core::result::Result::Ok(0)]", "taint", "manual"]
2727
- ["<reqwest::blocking::response::Response>::text_with_charset", "Argument[self]", "ReturnValue.Field[core::result::Result::Ok(0)]", "taint", "manual"]
2828
- ["<reqwest::blocking::response::Response>::bytes", "Argument[self]", "ReturnValue.Field[core::result::Result::Ok(0)]", "taint", "manual"]
2929
- ["<reqwest::async_impl::response::Response>::text", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
3030
- ["<reqwest::async_impl::response::Response>::bytes", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)]", "taint", "manual"]
31-
- ["<reqwest::async_impl::response::Response>::chunk", "Argument[self]", "ReturnValue.Future.Field[core::result::Result::Ok(0)].Field[core::option::Option::Some(0)]", "taint", "manual"]
31+
- ["<reqwest::async_impl::response::Response>::chunk", "Argument[self].Reference", "ReturnValue.Future.Field[core::result::Result::Ok(0)].Field[core::option::Option::Some(0)]", "taint", "manual"]

0 commit comments

Comments
 (0)