-
Notifications
You must be signed in to change notification settings - Fork 3
Adding optimization rewrite pass to utilize server with information about masked columns #443
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4d6488c
5369379
36cab6e
ed6650c
cc2bbed
a6d4b29
beadb15
1b4bcac
5ea82f1
f0f512c
54ecef1
557aaeb
6b109d9
1377916
891c472
7d7580b
62db4bf
c9f6a59
7c37110
127244f
1f2dc6d
2864e4a
b278f9b
dcbb69c
feabd8a
940dd16
a6f6a37
af10c5b
0371ec5
3996ced
29e0e3f
f9c05b2
4f274fd
18379ef
f512f8b
90f0671
f6a571b
b728348
8e03b04
a3c79cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,8 @@ | |
|
|
||
| import pydough.pydough_operators as pydop | ||
| from pydough.configs import PyDoughSession | ||
| from pydough.mask_server.mask_server_candidate_visitor import MaskServerCandidateVisitor | ||
| from pydough.mask_server.mask_server_rewrite_shuttle import MaskServerRewriteShuttle | ||
| from pydough.metadata import ( | ||
| CartesianProductMetadata, | ||
| GeneralJoinMetadata, | ||
|
|
@@ -45,7 +47,10 @@ | |
| LiteralExpression, | ||
| Project, | ||
| RelationalExpression, | ||
| RelationalExpressionDispatcher, | ||
| RelationalExpressionShuttle, | ||
| RelationalExpressionShuttleDispatcher, | ||
| RelationalExpressionVisitor, | ||
| RelationalNode, | ||
| RelationalRoot, | ||
| Scan, | ||
|
|
@@ -861,7 +866,9 @@ def build_simple_table_scan( | |
| ) | ||
| unmask_columns[name] = CallExpression( | ||
| pydop.MaskedExpressionFunctionOperator( | ||
| hybrid_expr.column.column_property, True | ||
| hybrid_expr.column.column_property, | ||
| node.collection.collection.table_path, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the reason why we need to use the full table path in metadata?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EXACTLY (plus its a good idea in general) |
||
| True, | ||
| ), | ||
| hybrid_expr.column.column_property.unprotected_data_type, | ||
| [ColumnReference(name, hybrid_expr.typ)], | ||
|
|
@@ -1561,7 +1568,9 @@ def confirm_root(node: RelationalNode) -> RelationalRoot: | |
| def optimize_relational_tree( | ||
| root: RelationalRoot, | ||
| session: PyDoughSession, | ||
| additional_shuttles: list[RelationalExpressionShuttle], | ||
| additional_shuttles: list[ | ||
| RelationalExpressionShuttle | RelationalExpressionVisitor | ||
| ], | ||
| ) -> RelationalRoot: | ||
| """ | ||
| Runs optimize on the relational tree, including pushing down filters and | ||
|
|
@@ -1570,8 +1579,8 @@ def optimize_relational_tree( | |
| Args: | ||
| `root`: the relational root to optimize. | ||
| `configs`: PyDough session used during optimization. | ||
| `additional_shuttles`: additional relational expression shuttles to use | ||
| for expression simplification. | ||
| `additional_shuttles`: additional relational expression shuttles or | ||
| visitors to use for expression simplification. | ||
|
|
||
| Returns: | ||
| The optimized relational root. | ||
|
|
@@ -1633,7 +1642,7 @@ def optimize_relational_tree( | |
|
|
||
| # Run the following pipeline twice: | ||
| # A: projection pullup | ||
| # B: expression simplification | ||
| # B: expression simplification (followed by additional shuttles) | ||
| # C: filter pushdown | ||
| # D: join-aggregate transpose | ||
| # E: projection pullup again | ||
|
|
@@ -1647,7 +1656,13 @@ def optimize_relational_tree( | |
| # pullup and pushdown and so on. | ||
| for _ in range(2): | ||
| root = confirm_root(pullup_projections(root)) | ||
| simplify_expressions(root, session, additional_shuttles) | ||
| simplify_expressions(root, session) | ||
| # Run all of the other shuttles/visitors over the entire tree. | ||
| for shuttle_or_visitor in additional_shuttles: | ||
| if isinstance(shuttle_or_visitor, RelationalExpressionShuttle): | ||
| root.accept(RelationalExpressionShuttleDispatcher(shuttle_or_visitor)) | ||
| else: | ||
| root.accept(RelationalExpressionDispatcher(shuttle_or_visitor, True)) | ||
| root = confirm_root(push_filters(root, session)) | ||
| root = confirm_root(pull_aggregates_above_joins(root)) | ||
| root = confirm_root(pullup_projections(root)) | ||
|
|
@@ -1716,10 +1731,19 @@ def convert_ast_to_relational( | |
| raw_result: RelationalRoot = postprocess_root(node, columns, hybrid, output) | ||
|
|
||
| # Invoke the optimization procedures on the result to clean up the tree. | ||
| additional_shuttles: list[RelationalExpressionShuttle] = [] | ||
| additional_shuttles: list[ | ||
| RelationalExpressionShuttle | RelationalExpressionVisitor | ||
| ] = [] | ||
| # Add the mask literal comparison shuttle if the environment variable | ||
| # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. | ||
| # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. If a masking rewrite server has | ||
| # been attached to the session, include the shuttles for that as well. | ||
| if os.getenv("PYDOUGH_ENABLE_MASK_REWRITES") == "1": | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reson why
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we wanted an environment variable as a "switch" |
||
| if session.mask_server is not None: | ||
| candidate_shuttle: MaskServerCandidateVisitor = MaskServerCandidateVisitor() | ||
| additional_shuttles.append(candidate_shuttle) | ||
| additional_shuttles.append( | ||
| MaskServerRewriteShuttle(session.mask_server, candidate_shuttle) | ||
| ) | ||
| additional_shuttles.append(MaskLiteralComparisonShuttle()) | ||
| optimized_result: RelationalRoot = optimize_relational_tree( | ||
| raw_result, session, additional_shuttles | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a special reason why to use
Union["MaskServerInfo", None]instead ofMaskServerInfo | Nonewithfrom __future__ import annotations?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because it is imported via
if TYPE_CHECKING:, soMaskServerInfowon't always be imported, but the type checker will recognize"MaskServerInfo"(which can't be done with| None). This is how we avoid circular imports.