Skip to content

Commit 1075a01

Browse files
authored
Merge pull request #55 from DataDog/ahmed/cherry-pick-7d294f1dc
Cherry-pick 7d294f1 (dict type coercion fix)
2 parents d95ac0c + aec941a commit 1075a01

File tree

2 files changed

+84
-10
lines changed

2 files changed

+84
-10
lines changed

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,7 @@ pub fn comparison_coercion_numeric(
796796
return Some(lhs_type.clone());
797797
}
798798
binary_numeric_coercion(lhs_type, rhs_type)
799+
.or_else(|| dictionary_comparison_coercion_numeric(lhs_type, rhs_type, true))
799800
.or_else(|| string_coercion(lhs_type, rhs_type))
800801
.or_else(|| null_coercion(lhs_type, rhs_type))
801802
.or_else(|| string_numeric_coercion_as_numeric(lhs_type, rhs_type))
@@ -1146,38 +1147,75 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) ->
11461147
}
11471148
}
11481149

1149-
/// Coercion rules for Dictionaries: the type that both lhs and rhs
1150+
/// Generic coercion rules for Dictionaries: the type that both lhs and rhs
11501151
/// can be casted to for the purpose of a computation.
11511152
///
11521153
/// Not all operators support dictionaries, if `preserve_dictionaries` is true
1153-
/// dictionaries will be preserved if possible
1154-
fn dictionary_comparison_coercion(
1154+
/// dictionaries will be preserved if possible.
1155+
///
1156+
/// The `coerce_fn` parameter determines which comparison coercion function to use
1157+
/// for comparing the dictionary value types.
1158+
fn dictionary_comparison_coercion_generic(
11551159
lhs_type: &DataType,
11561160
rhs_type: &DataType,
11571161
preserve_dictionaries: bool,
1162+
coerce_fn: fn(&DataType, &DataType) -> Option<DataType>,
11581163
) -> Option<DataType> {
11591164
use arrow::datatypes::DataType::*;
11601165
match (lhs_type, rhs_type) {
11611166
(
11621167
Dictionary(_lhs_index_type, lhs_value_type),
11631168
Dictionary(_rhs_index_type, rhs_value_type),
1164-
) => comparison_coercion(lhs_value_type, rhs_value_type),
1169+
) => coerce_fn(lhs_value_type, rhs_value_type),
11651170
(d @ Dictionary(_, value_type), other_type)
11661171
| (other_type, d @ Dictionary(_, value_type))
11671172
if preserve_dictionaries && value_type.as_ref() == other_type =>
11681173
{
11691174
Some(d.clone())
11701175
}
1171-
(Dictionary(_index_type, value_type), _) => {
1172-
comparison_coercion(value_type, rhs_type)
1173-
}
1174-
(_, Dictionary(_index_type, value_type)) => {
1175-
comparison_coercion(lhs_type, value_type)
1176-
}
1176+
(Dictionary(_index_type, value_type), _) => coerce_fn(value_type, rhs_type),
1177+
(_, Dictionary(_index_type, value_type)) => coerce_fn(lhs_type, value_type),
11771178
_ => None,
11781179
}
11791180
}
11801181

1182+
/// Coercion rules for Dictionaries: the type that both lhs and rhs
1183+
/// can be casted to for the purpose of a computation.
1184+
///
1185+
/// Not all operators support dictionaries, if `preserve_dictionaries` is true
1186+
/// dictionaries will be preserved if possible
1187+
fn dictionary_comparison_coercion(
1188+
lhs_type: &DataType,
1189+
rhs_type: &DataType,
1190+
preserve_dictionaries: bool,
1191+
) -> Option<DataType> {
1192+
dictionary_comparison_coercion_generic(
1193+
lhs_type,
1194+
rhs_type,
1195+
preserve_dictionaries,
1196+
comparison_coercion,
1197+
)
1198+
}
1199+
1200+
/// Coercion rules for Dictionaries with numeric preference: similar to
1201+
/// [`dictionary_comparison_coercion`] but uses [`comparison_coercion_numeric`]
1202+
/// which prefers numeric types over strings when both are present.
1203+
///
1204+
/// This is used by [`comparison_coercion_numeric`] to maintain consistent
1205+
/// numeric-preferring semantics when dealing with dictionary types.
1206+
fn dictionary_comparison_coercion_numeric(
1207+
lhs_type: &DataType,
1208+
rhs_type: &DataType,
1209+
preserve_dictionaries: bool,
1210+
) -> Option<DataType> {
1211+
dictionary_comparison_coercion_generic(
1212+
lhs_type,
1213+
rhs_type,
1214+
preserve_dictionaries,
1215+
comparison_coercion_numeric,
1216+
)
1217+
}
1218+
11811219
/// Coercion rules for string concat.
11821220
/// This is a union of string coercion rules and specified rules:
11831221
/// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)

datafusion/sqllogictest/test_files/nullif.slt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,39 @@ query T
174174
SELECT NULLIF(arrow_cast('a', 'Utf8View'), null);
175175
----
176176
a
177+
178+
# Test with dictionary-encoded strings
179+
# This tests the fix for: "Dictionary(UInt32, Utf8) and Utf8 is not comparable"
180+
statement ok
181+
CREATE TABLE dict_test_base(
182+
col1 TEXT,
183+
col2 TEXT
184+
) as VALUES
185+
('foo', 'bar'),
186+
('bar', 'bar'),
187+
('baz', 'bar')
188+
;
189+
190+
# Dictionary cast with string literal
191+
query T rowsort
192+
SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), 'bar') FROM dict_test_base;
193+
----
194+
NULL
195+
baz
196+
foo
197+
198+
# String with dictionary cast
199+
query T rowsort
200+
SELECT NULLIF(col2, arrow_cast(col1, 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
201+
----
202+
NULL
203+
bar
204+
bar
205+
206+
# Both as dictionaries
207+
query T rowsort
208+
SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), arrow_cast('bar', 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
209+
----
210+
NULL
211+
baz
212+
foo

0 commit comments

Comments
 (0)