Skip to content

Commit 507d978

Browse files
authored
feat: Implement equality = and inequality <> support for StringView (#10985)
* feat: Implement equality = and inequality <> support for StringView * chore: Add tests for the StringView * chore * chore: Update tests for NULL * fix: Used build_array_string! * chore: Update string_coercion function to handle Utf8View type in binary.rs * chore: add tests * chore: ci
1 parent 810cce7 commit 507d978

File tree

6 files changed

+153
-39
lines changed

6 files changed

+153
-39
lines changed

Cargo.toml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,15 @@ unused_imports = "deny"
157157
## Temporary arrow-rs patch until 52.1.0 is released
158158

159159
[patch.crates-io]
160-
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
161-
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
162-
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
163-
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
164-
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
165-
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
166-
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
167-
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
168-
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
169-
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
170-
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
171-
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
160+
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
161+
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
162+
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
163+
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
164+
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
165+
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
166+
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
167+
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
168+
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
169+
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
170+
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
171+
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }

datafusion-cli/Cargo.lock

Lines changed: 15 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/Cargo.toml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@ rstest = "0.17"
6666
## Temporary arrow-rs patch until 52.1.0 is released
6767

6868
[patch.crates-io]
69-
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
70-
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
71-
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
72-
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
73-
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
74-
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
75-
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
76-
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
77-
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
78-
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
79-
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "72467c670f8c38130e4743347407f1a542e59e0c" }
69+
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
70+
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
71+
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
72+
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
73+
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
74+
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
75+
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
76+
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
77+
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
78+
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }
79+
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d0a88c651991b7fc4b970cf94fa77f4ec3def22d" }

datafusion/common/src/scalar/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,7 @@ impl ScalarValue {
15701570
DataType::UInt16 => build_array_primitive!(UInt16Array, UInt16),
15711571
DataType::UInt32 => build_array_primitive!(UInt32Array, UInt32),
15721572
DataType::UInt64 => build_array_primitive!(UInt64Array, UInt64),
1573+
DataType::Utf8View => build_array_string!(StringViewArray, Utf8View),
15731574
DataType::Utf8 => build_array_string!(StringArray, Utf8),
15741575
DataType::LargeUtf8 => build_array_string!(LargeStringArray, LargeUtf8),
15751576
DataType::Binary => build_array_string!(BinaryArray, Binary),
@@ -1726,7 +1727,6 @@ impl ScalarValue {
17261727
| DataType::Time64(TimeUnit::Millisecond)
17271728
| DataType::Map(_, _)
17281729
| DataType::RunEndEncoded(_, _)
1729-
| DataType::Utf8View
17301730
| DataType::BinaryView
17311731
| DataType::ListView(_)
17321732
| DataType::LargeListView(_) => {

datafusion/expr/src/type_coercion/binary.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
932932
(LargeUtf8, Utf8) => Some(LargeUtf8),
933933
(Utf8, LargeUtf8) => Some(LargeUtf8),
934934
(LargeUtf8, LargeUtf8) => Some(LargeUtf8),
935+
(Utf8View, Utf8View) | (Utf8View, Utf8) | (Utf8, Utf8View) => Some(Utf8View),
935936
_ => None,
936937
}
937938
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
# test StringViewArray with Utf8View columns
20+
statement ok
21+
create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')),
22+
(arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')),
23+
(arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')),
24+
(arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View'));
25+
26+
query B
27+
select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
28+
----
29+
false
30+
31+
query B
32+
select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
33+
----
34+
true
35+
36+
query B
37+
select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
38+
----
39+
true
40+
41+
query B
42+
select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
43+
----
44+
true
45+
46+
query ??
47+
select * from test where column1 = column2;
48+
----
49+
Xiangpeng Xiangpeng
50+
51+
query ??
52+
select * from test where column1 <> column2;
53+
----
54+
Andrew X
55+
Raphael R
56+
57+
query ??
58+
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
59+
----
60+
Andrew X
61+
62+
query ??
63+
select * from test where column1 = 'Andrew';
64+
----
65+
Andrew X
66+
67+
query ??
68+
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
69+
----
70+
Xiangpeng Xiangpeng
71+
Raphael R
72+
73+
query ??
74+
select * from test where column1 <> 'Andrew';
75+
----
76+
Xiangpeng Xiangpeng
77+
Raphael R
78+
79+
statement ok
80+
drop table test;
81+
82+
83+
# test StringViewArray with Utf8 and Utf8View columns
84+
statement ok
85+
create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
86+
('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
87+
('Raphael', arrow_cast('R', 'Utf8View')),
88+
(NULL, arrow_cast('R', 'Utf8View'));
89+
90+
query T?
91+
select * from test where column1 = column2;
92+
----
93+
Xiangpeng Xiangpeng
94+
95+
query T?
96+
select * from test where column1 <> column2;
97+
----
98+
Andrew X
99+
Raphael R
100+
101+
query T?
102+
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
103+
----
104+
Andrew X
105+
106+
query T?
107+
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
108+
----
109+
Xiangpeng Xiangpeng
110+
Raphael R
111+
112+
statement ok
113+
drop table test;

0 commit comments

Comments
 (0)