Skip to content

Commit 99523e5

Browse files
perf: Prune unused operations from sql
1 parent 86b7e72 commit 99523e5

File tree

9 files changed

+490
-252
lines changed

9 files changed

+490
-252
lines changed

bigframes/core/__init__.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,18 +304,25 @@ def assign(self, source_id: str, destination_id: str) -> ArrayValue:
304304
if destination_id in self.column_ids: # Mutate case
305305
exprs = [
306306
(
307-
ex.deref(source_id if (col_id == destination_id) else col_id),
308-
ids.ColumnId(col_id),
307+
bigframes.core.nodes.AliasedRef(
308+
ex.deref(source_id if (col_id == destination_id) else col_id),
309+
ids.ColumnId(col_id),
310+
)
309311
)
310312
for col_id in self.column_ids
311313
]
312314
else: # append case
313315
self_projection = (
314-
(ex.deref(col_id), ids.ColumnId(col_id)) for col_id in self.column_ids
316+
bigframes.core.nodes.AliasedRef.identity(ids.ColumnId(col_id))
317+
for col_id in self.column_ids
315318
)
316319
exprs = [
317320
*self_projection,
318-
(ex.deref(source_id), ids.ColumnId(destination_id)),
321+
(
322+
bigframes.core.nodes.AliasedRef(
323+
ex.deref(source_id), ids.ColumnId(destination_id)
324+
)
325+
),
319326
]
320327
return ArrayValue(
321328
nodes.SelectionNode(
@@ -337,7 +344,10 @@ def create_constant(
337344

338345
def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
339346
# This basically just drops and reorders columns - logically a no-op except as a final step
340-
selections = ((ex.deref(col_id), ids.ColumnId(col_id)) for col_id in column_ids)
347+
selections = (
348+
bigframes.core.nodes.AliasedRef.identity(ids.ColumnId(col_id))
349+
for col_id in column_ids
350+
)
341351
return ArrayValue(
342352
nodes.SelectionNode(
343353
child=self.node,
@@ -488,7 +498,9 @@ def prepare_join_names(
488498
nodes.SelectionNode(
489499
other.node,
490500
tuple(
491-
(ex.deref(old_id), ids.ColumnId(new_id))
501+
bigframes.core.nodes.AliasedRef(
502+
ex.deref(old_id), ids.ColumnId(new_id)
503+
)
492504
for old_id, new_id in r_mapping.items()
493505
),
494506
),

bigframes/core/compile/compiler.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def compile_sql(
6565
node, ordering = rewrites.pull_up_order(
6666
node, order_root=True, ordered_joins=self.strict
6767
)
68+
node = rewrites.column_pruning(node)
6869
ir = self.compile_node(node)
6970
return ir.to_sql(
7071
order_by=ordering.all_ordering_columns,
@@ -76,6 +77,7 @@ def compile_sql(
7677
node, _ = rewrites.pull_up_order(
7778
node, order_root=False, ordered_joins=self.strict
7879
)
80+
node = rewrites.column_pruning(node)
7981
ir = self.compile_node(node)
8082
return ir.to_sql(selections=output_ids)
8183

@@ -86,6 +88,7 @@ def compile_peek_sql(self, node: nodes.BigFrameNode, n_rows: int) -> str:
8688
node, _ = rewrites.pull_up_order(
8789
node, order_root=False, ordered_joins=self.strict
8890
)
91+
node = rewrites.column_pruning(node)
8992
return self.compile_node(node).to_sql(limit=n_rows, selections=ids)
9093

9194
def compile_raw(
@@ -97,6 +100,7 @@ def compile_raw(
97100
node = nodes.bottom_up(node, rewrites.rewrite_slice)
98101
node = nodes.top_down(node, rewrites.rewrite_timedelta_ops)
99102
node, ordering = rewrites.pull_up_order(node, ordered_joins=self.strict)
103+
node = rewrites.column_pruning(node)
100104
ir = self.compile_node(node)
101105
sql = ir.to_sql()
102106
return sql, node.schema.to_bigquery(), ordering
@@ -192,10 +196,12 @@ def compile_readtable(self, node: nodes.ReadTableNode):
192196
return self.compile_read_table_unordered(node.source, node.scan_list)
193197

194198
def read_table_as_unordered_ibis(
195-
self, source: nodes.BigqueryDataSource
199+
self,
200+
source: nodes.BigqueryDataSource,
201+
scan_cols: typing.Sequence[str],
196202
) -> ibis_types.Table:
197203
full_table_name = f"{source.table.project_id}.{source.table.dataset_id}.{source.table.table_id}"
198-
used_columns = tuple(col.name for col in source.table.physical_schema)
204+
used_columns = tuple(scan_cols)
199205
# Physical schema might include unused columns, unsupported datatypes like JSON
200206
physical_schema = ibis_bigquery.BigQuerySchema.to_ibis(
201207
list(i for i in source.table.physical_schema if i.name in used_columns)
@@ -216,7 +222,9 @@ def read_table_as_unordered_ibis(
216222
def compile_read_table_unordered(
217223
self, source: nodes.BigqueryDataSource, scan: nodes.ScanList
218224
):
219-
ibis_table = self.read_table_as_unordered_ibis(source)
225+
ibis_table = self.read_table_as_unordered_ibis(
226+
source, scan_cols=[col.source_id for col in scan.items]
227+
)
220228
return compiled.UnorderedIR(
221229
ibis_table,
222230
tuple(
@@ -291,7 +299,7 @@ def set_output_names(
291299
return nodes.SelectionNode(
292300
node,
293301
tuple(
294-
(ex.DerefOp(old_id), ids.ColumnId(out_id))
302+
bigframes.core.nodes.AliasedRef(ex.DerefOp(old_id), ids.ColumnId(out_id))
295303
for old_id, out_id in zip(node.ids, output_ids)
296304
),
297305
)

0 commit comments

Comments
 (0)