Skip to content

Commit 2873fd0

Browse files
jayzhan211alamb
andauthored
Add a ScalarUDFImpl::simplfy() API, move SimplifyInfo et al to datafusion_expr (#9304)
* first draft Signed-off-by: jayzhan211 <[email protected]> * clippy Signed-off-by: jayzhan211 <[email protected]> * add comments Signed-off-by: jayzhan211 <[email protected]> * move to optimize rule Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * fix explain test Signed-off-by: jayzhan211 <[email protected]> * move to simplifier Signed-off-by: jayzhan211 <[email protected]> * pass with schema Signed-off-by: jayzhan211 <[email protected]> * fix explain Signed-off-by: jayzhan211 <[email protected]> * fix doc Signed-off-by: jayzhan211 <[email protected]> * move to expr Signed-off-by: jayzhan211 <[email protected]> * change simplify signature Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * cleanup Signed-off-by: jayzhan211 <[email protected]> * fix doc Signed-off-by: jayzhan211 <[email protected]> * fix doc Signed-off-by: jayzhan211 <[email protected]> * Update datafusion/expr/src/udf.rs * Add backwards compatibile uses, inline FunctionSimplifier, rename to ExprSimplifyResult * Remove DFSchema from SimplifyInfo * Avoid requiring argument copies * Improve docs * fix link * fix doc test * Update datafusion/physical-expr/src/lib.rs * Change example simplify to always simplify its argument * Clarify comment --------- Signed-off-by: jayzhan211 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 3854419 commit 2873fd0

File tree

35 files changed

+287
-99
lines changed

35 files changed

+287
-99
lines changed

datafusion-cli/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-examples/examples/expr_api.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,16 @@ use arrow::record_batch::RecordBatch;
2424
use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
2525
use datafusion::common::{DFField, DFSchema};
2626
use datafusion::error::Result;
27-
use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
28-
use datafusion::physical_expr::execution_props::ExecutionProps;
27+
use datafusion::optimizer::simplify_expressions::ExprSimplifier;
2928
use datafusion::physical_expr::{
3029
analyze, create_physical_expr, AnalysisContext, ExprBoundaries, PhysicalExpr,
3130
};
3231
use datafusion::prelude::*;
3332
use datafusion_common::{ScalarValue, ToDFSchema};
33+
use datafusion_expr::execution_props::ExecutionProps;
3434
use datafusion_expr::expr::BinaryExpr;
3535
use datafusion_expr::interval_arithmetic::Interval;
36+
use datafusion_expr::simplify::SimplifyContext;
3637
use datafusion_expr::{ColumnarValue, ExprSchemable, Operator};
3738

3839
/// This example demonstrates the DataFusion [`Expr`] API.

datafusion-examples/examples/simple_udtf.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ use datafusion::physical_plan::memory::MemoryExec;
2828
use datafusion::physical_plan::ExecutionPlan;
2929
use datafusion::prelude::SessionContext;
3030
use datafusion_common::{plan_err, ScalarValue};
31+
use datafusion_expr::simplify::SimplifyContext;
3132
use datafusion_expr::{Expr, TableType};
32-
use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
33+
use datafusion_optimizer::simplify_expressions::ExprSimplifier;
3334
use std::fs::File;
3435
use std::io::Seek;
3536
use std::path::Path;

datafusion/core/src/datasource/listing/helpers.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,10 @@ use arrow::{
3333
use arrow_schema::Fields;
3434
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
3535
use datafusion_common::{internal_err, Column, DFField, DFSchema, DataFusionError};
36+
use datafusion_expr::execution_props::ExecutionProps;
3637
use datafusion_expr::{Expr, ScalarFunctionDefinition, Volatility};
3738
use datafusion_physical_expr::create_physical_expr;
38-
use datafusion_physical_expr::execution_props::ExecutionProps;
39-
40-
use futures::stream::{BoxStream, FuturesUnordered};
41-
use futures::{StreamExt, TryStreamExt};
39+
use futures::stream::{BoxStream, FuturesUnordered, StreamExt, TryStreamExt};
4240
use log::{debug, trace};
4341
use object_store::path::Path;
4442
use object_store::{ObjectMeta, ObjectStore};

datafusion/core/src/datasource/physical_plan/parquet/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,13 +800,14 @@ mod tests {
800800
ArrayRef, Date64Array, Int32Array, Int64Array, Int8Array, StringArray,
801801
StructArray,
802802
};
803+
803804
use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder};
804805
use arrow::record_batch::RecordBatch;
805806
use arrow_schema::Fields;
806807
use datafusion_common::{assert_contains, FileType, GetExt, ScalarValue, ToDFSchema};
808+
use datafusion_expr::execution_props::ExecutionProps;
807809
use datafusion_expr::{col, lit, when, Expr};
808810
use datafusion_physical_expr::create_physical_expr;
809-
use datafusion_physical_expr::execution_props::ExecutionProps;
810811

811812
use chrono::{TimeZone, Utc};
812813
use futures::StreamExt;

datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,9 @@ mod test {
401401
use super::*;
402402
use arrow::datatypes::Field;
403403
use datafusion_common::ToDFSchema;
404+
use datafusion_expr::execution_props::ExecutionProps;
404405
use datafusion_expr::{cast, col, lit, Expr};
405406
use datafusion_physical_expr::create_physical_expr;
406-
use datafusion_physical_expr::execution_props::ExecutionProps;
407407
use parquet::arrow::parquet_to_arrow_schema;
408408
use parquet::file::reader::{FileReader, SerializedFileReader};
409409
use rand::prelude::*;

datafusion/core/src/datasource/physical_plan/parquet/row_groups.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ mod tests {
346346
use arrow::datatypes::Schema;
347347
use arrow::datatypes::{DataType, Field};
348348
use datafusion_common::{Result, ToDFSchema};
349+
use datafusion_expr::execution_props::ExecutionProps;
349350
use datafusion_expr::{cast, col, lit, Expr};
350-
use datafusion_physical_expr::execution_props::ExecutionProps;
351351
use datafusion_physical_expr::{create_physical_expr, PhysicalExpr};
352352
use parquet::arrow::arrow_to_parquet_schema;
353353
use parquet::arrow::async_reader::ParquetObjectReader;

datafusion/core/src/execution/context/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ use datafusion_common::{
4343
tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
4444
};
4545
use datafusion_execution::registry::SerializerRegistry;
46+
pub use datafusion_expr::execution_props::ExecutionProps;
47+
use datafusion_expr::var_provider::is_system_variables;
4648
use datafusion_expr::{
4749
logical_plan::{DdlStatement, Statement},
4850
Expr, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
4951
};
50-
pub use datafusion_physical_expr::execution_props::ExecutionProps;
51-
use datafusion_physical_expr::var_provider::is_system_variables;
5252
use parking_lot::RwLock;
5353
use std::collections::hash_map::Entry;
5454
use std::string::String;

datafusion/core/src/physical_optimizer/pruning.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1341,10 +1341,10 @@ mod tests {
13411341
datatypes::{DataType, TimeUnit},
13421342
};
13431343
use datafusion_common::{ScalarValue, ToDFSchema};
1344+
use datafusion_expr::execution_props::ExecutionProps;
13441345
use datafusion_expr::expr::InList;
13451346
use datafusion_expr::{cast, is_null, try_cast, Expr};
13461347
use datafusion_physical_expr::create_physical_expr;
1347-
use datafusion_physical_expr::execution_props::ExecutionProps;
13481348
use std::collections::HashMap;
13491349
use std::ops::{Not, Rem};
13501350

datafusion/core/src/test_util/parquet.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ use crate::datasource::listing::{ListingTableUrl, PartitionedFile};
2828
use crate::datasource::object_store::ObjectStoreUrl;
2929
use crate::datasource::physical_plan::{FileScanConfig, ParquetExec};
3030
use crate::error::Result;
31-
use crate::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
31+
use crate::logical_expr::execution_props::ExecutionProps;
32+
use crate::logical_expr::simplify::SimplifyContext;
33+
use crate::optimizer::simplify_expressions::ExprSimplifier;
3234
use crate::physical_expr::create_physical_expr;
33-
use crate::physical_expr::execution_props::ExecutionProps;
3435
use crate::physical_plan::filter::FilterExec;
3536
use crate::physical_plan::metrics::MetricsSet;
3637
use crate::physical_plan::ExecutionPlan;

0 commit comments

Comments
 (0)