From 77155a13770fd5b7407de12a27aa6cddc19ad480 Mon Sep 17 00:00:00 2001 From: Shiv Bhatia Date: Sun, 28 Sep 2025 03:48:50 +0100 Subject: [PATCH 1/2] Extend datatype semantic equality check to include timestamps (#17777) * Extend datatype semantic equality to include timestamps * test * Respond to comments * cargo fmt --------- Co-authored-by: Shiv Bhatia --- datafusion/common/src/dfschema.rs | 36 ++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index f9e3b2cee40d..1bf64f0db9b4 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -747,7 +747,8 @@ impl DFSchema { } /// Returns true of two [`DataType`]s are semantically equal (same - /// name and type), ignoring both metadata and nullability, and decimal precision/scale. + /// name and type), ignoring both metadata and nullability, decimal precision/scale, + /// and timezone time units/timezones. /// /// request to upstream: pub fn datatype_is_semantically_equal(dt1: &DataType, dt2: &DataType) -> bool { @@ -806,6 +807,10 @@ impl DFSchema { DataType::Decimal256(_l_precision, _l_scale), DataType::Decimal256(_r_precision, _r_scale), ) => true, + ( + DataType::Timestamp(_l_time_unit, _l_timezone), + DataType::Timestamp(_r_time_unit, _r_timezone), + ) => true, _ => dt1 == dt2, } } @@ -1596,6 +1601,35 @@ mod tests { &DataType::Int16 )); + // Succeeds if decimal precision and scale are different + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Decimal32(1, 2), + &DataType::Decimal32(2, 1), + )); + + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Decimal64(1, 2), + &DataType::Decimal64(2, 1), + )); + + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Decimal128(1, 2), + &DataType::Decimal128(2, 1), + )); + + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Decimal256(1, 2), + &DataType::Decimal256(2, 1), + )); + + // Any two timestamp types should match + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Timestamp( + arrow::datatypes::TimeUnit::Microsecond, + Some("UTC".into()) + ), + &DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None), + )); // Test lists // Succeeds if both have the same element type, disregards names and nullability From c431fec11a4b4d7e729db49affc36cc64da7ac75 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 19 Oct 2025 16:45:41 -0400 Subject: [PATCH 2/2] remove incorectly backported test --- datafusion/common/src/dfschema.rs | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 1bf64f0db9b4..8cb6678e0068 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1601,27 +1601,6 @@ mod tests { &DataType::Int16 )); - // Succeeds if decimal precision and scale are different - assert!(DFSchema::datatype_is_semantically_equal( - &DataType::Decimal32(1, 2), - &DataType::Decimal32(2, 1), - )); - - assert!(DFSchema::datatype_is_semantically_equal( - &DataType::Decimal64(1, 2), - &DataType::Decimal64(2, 1), - )); - - assert!(DFSchema::datatype_is_semantically_equal( - &DataType::Decimal128(1, 2), - &DataType::Decimal128(2, 1), - )); - - assert!(DFSchema::datatype_is_semantically_equal( - &DataType::Decimal256(1, 2), - &DataType::Decimal256(2, 1), - )); - // Any two timestamp types should match assert!(DFSchema::datatype_is_semantically_equal( &DataType::Timestamp(