Skip to content

Commit 5b4c365

Browse files
authored
Add more StringView comparison test coverage (#10997)
* Add more StringView comparison test coverage * add reference * Add another test showing casting on columns works correctly
1 parent 507d978 commit 5b4c365

File tree

1 file changed

+211
-59
lines changed

1 file changed

+211
-59
lines changed

datafusion/sqllogictest/test_files/string_view.slt

Lines changed: 211 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -15,99 +15,251 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18+
########
19+
## Test setup
20+
########
1821

19-
# test StringViewArray with Utf8View columns
2022
statement ok
21-
create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')),
22-
(arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')),
23-
(arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')),
24-
(arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View'));
23+
create table test_source as values
24+
('Andrew', 'X'),
25+
('Xiangpeng', 'Xiangpeng'),
26+
('Raphael', 'R'),
27+
(NULL, 'R')
28+
;
2529

26-
query B
27-
select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
28-
----
29-
false
30+
# Table with the different combination of column types
31+
statement ok
32+
create table test as
33+
SELECT
34+
arrow_cast(column1, 'Utf8') as column1_utf8,
35+
arrow_cast(column2, 'Utf8') as column2_utf8,
36+
arrow_cast(column1, 'Utf8View') as column1_utf8view,
37+
arrow_cast(column2, 'Utf8View') as column2_utf8view,
38+
arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict,
39+
arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict
40+
FROM test_source;
3041

31-
query B
32-
select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
33-
----
34-
true
42+
statement ok
43+
drop table test_source
3544

36-
query B
37-
select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View');
38-
----
39-
true
45+
########
46+
## StringView to StringView
47+
########
4048

41-
query B
42-
select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
49+
# StringView scalar to StringView scalar
50+
51+
query BBBB
52+
select
53+
arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
54+
arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'),
55+
arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'),
56+
arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View');
4357
----
44-
true
58+
false true true true
59+
60+
61+
# StringView column to StringView column comparison as filters
4562

46-
query ??
47-
select * from test where column1 = column2;
63+
query TT
64+
select column1_utf8, column2_utf8 from test where column1_utf8view = column2_utf8view;
4865
----
4966
Xiangpeng Xiangpeng
5067

51-
query ??
52-
select * from test where column1 <> column2;
68+
query TT
69+
select column1_utf8, column2_utf8 from test where column1_utf8view <> column2_utf8view;
5370
----
5471
Andrew X
5572
Raphael R
5673

57-
query ??
58-
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
74+
# StringView column to StringView column
75+
query TTBB
76+
select
77+
column1_utf8, column2_utf8,
78+
column1_utf8view = column2_utf8view,
79+
column1_utf8view <> column2_utf8view
80+
from test;
5981
----
60-
Andrew X
82+
Andrew X false true
83+
Xiangpeng Xiangpeng true false
84+
Raphael R false true
85+
NULL R NULL NULL
6186

62-
query ??
63-
select * from test where column1 = 'Andrew';
87+
# StringView column to StringView scalar comparison
88+
query TTBBBB
89+
select
90+
column1_utf8, column2_utf8,
91+
column1_utf8view = arrow_cast('Andrew', 'Utf8View'),
92+
arrow_cast('Andrew', 'Utf8View') = column1_utf8view,
93+
column1_utf8view <> arrow_cast('Andrew', 'Utf8View'),
94+
arrow_cast('Andrew', 'Utf8View') <> column1_utf8view
95+
from test;
6496
----
65-
Andrew X
97+
Andrew X true true false false
98+
Xiangpeng Xiangpeng false false true true
99+
Raphael R false false true true
100+
NULL R NULL NULL NULL NULL
66101

67-
query ??
68-
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
102+
########
103+
## StringView to String
104+
########
105+
106+
# test StringViewArray with Utf8 columns
107+
query TTBBBB
108+
select
109+
column1_utf8, column2_utf8,
110+
column1_utf8view = column2_utf8,
111+
column2_utf8 = column1_utf8view,
112+
column1_utf8view <> column2_utf8,
113+
column2_utf8 <> column1_utf8view
114+
from test;
69115
----
70-
Xiangpeng Xiangpeng
71-
Raphael R
116+
Andrew X false false true true
117+
Xiangpeng Xiangpeng true true false false
118+
Raphael R false false true true
119+
NULL R NULL NULL NULL NULL
72120

73-
query ??
74-
select * from test where column1 <> 'Andrew';
121+
# StringView column to String scalar
122+
query TTBBBB
123+
select
124+
column1_utf8, column2_utf8,
125+
column1_utf8view = arrow_cast('Andrew', 'Utf8'),
126+
arrow_cast('Andrew', 'Utf8') = column1_utf8view,
127+
column1_utf8view <> arrow_cast('Andrew', 'Utf8'),
128+
arrow_cast('Andrew', 'Utf8') <> column1_utf8view
129+
from test;
75130
----
76-
Xiangpeng Xiangpeng
77-
Raphael R
131+
Andrew X true true false false
132+
Xiangpeng Xiangpeng false false true true
133+
Raphael R false false true true
134+
NULL R NULL NULL NULL NULL
78135

79-
statement ok
80-
drop table test;
136+
# String column to StringView scalar
137+
query TTBBBB
138+
select
139+
column1_utf8, column2_utf8,
140+
column1_utf8 = arrow_cast('Andrew', 'Utf8View'),
141+
arrow_cast('Andrew', 'Utf8View') = column1_utf8,
142+
column1_utf8 <> arrow_cast('Andrew', 'Utf8View'),
143+
arrow_cast('Andrew', 'Utf8View') <> column1_utf8
144+
from test;
145+
----
146+
Andrew X true true false false
147+
Xiangpeng Xiangpeng false false true true
148+
Raphael R false false true true
149+
NULL R NULL NULL NULL NULL
150+
151+
152+
########
153+
## StringView to Dictionary
154+
########
155+
156+
# test StringViewArray with Dictionary columns
157+
query TTBBBB
158+
select
159+
column1_utf8, column2_utf8,
160+
column1_utf8view = column2_dict,
161+
column2_dict = column1_utf8view,
162+
column1_utf8view <> column2_dict,
163+
column2_dict <> column1_utf8view
164+
from test;
165+
----
166+
Andrew X false false true true
167+
Xiangpeng Xiangpeng true true false false
168+
Raphael R false false true true
169+
NULL R NULL NULL NULL NULL
170+
171+
# StringView column to Dict scalar
172+
query TTBBBB
173+
select
174+
column1_utf8, column2_utf8,
175+
column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'),
176+
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view,
177+
column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'),
178+
arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view
179+
from test;
180+
----
181+
Andrew X true true false false
182+
Xiangpeng Xiangpeng false false true true
183+
Raphael R false false true true
184+
NULL R NULL NULL NULL NULL
185+
186+
# Dict column to StringView scalar
187+
query TTBBBB
188+
select
189+
column1_utf8, column2_utf8,
190+
column1_dict = arrow_cast('Andrew', 'Utf8View'),
191+
arrow_cast('Andrew', 'Utf8View') = column1_dict,
192+
column1_dict <> arrow_cast('Andrew', 'Utf8View'),
193+
arrow_cast('Andrew', 'Utf8View') <> column1_dict
194+
from test;
195+
----
196+
Andrew X true true false false
197+
Xiangpeng Xiangpeng false false true true
198+
Raphael R false false true true
199+
NULL R NULL NULL NULL NULL
200+
201+
202+
########
203+
## Coercion Rules
204+
########
81205

82206

83-
# test StringViewArray with Utf8 and Utf8View columns
84207
statement ok
85-
create table test as values ('Andrew', arrow_cast('X', 'Utf8View')),
86-
('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')),
87-
('Raphael', arrow_cast('R', 'Utf8View')),
88-
(NULL, arrow_cast('R', 'Utf8View'));
208+
set datafusion.explain.logical_plan_only = true;
89209

90-
query T?
91-
select * from test where column1 = column2;
210+
211+
# Filter should have a StringView literal and no column cast
212+
query TT
213+
explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew';
92214
----
93-
Xiangpeng Xiangpeng
215+
logical_plan
216+
01)Projection: test.column1_utf8
217+
02)--Filter: test.column1_utf8view = Utf8View("Andrew")
218+
03)----TableScan: test projection=[column1_utf8, column1_utf8view]
94219

95-
query T?
96-
select * from test where column1 <> column2;
220+
# reverse order should be the same
221+
query TT
222+
explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view;
97223
----
98-
Andrew X
99-
Raphael R
224+
logical_plan
225+
01)Projection: test.column1_utf8
226+
02)--Filter: test.column1_utf8view = Utf8View("Andrew")
227+
03)----TableScan: test projection=[column1_utf8, column1_utf8view]
100228

101-
query T?
102-
select * from test where column1 = arrow_cast('Andrew', 'Utf8View');
229+
# should not be casting the column: https://github.com/apache/datafusion/issues/10998
230+
query TT
231+
explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View');
103232
----
104-
Andrew X
233+
logical_plan
234+
01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew")
235+
02)--TableScan: test projection=[column1_utf8]
105236

106-
query T?
107-
select * from test where column1 <> arrow_cast('Andrew', 'Utf8View');
237+
query TT
238+
explain SELECT column1_utf8 from test where column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)');
108239
----
109-
Xiangpeng Xiangpeng
110-
Raphael R
240+
logical_plan
241+
01)Projection: test.column1_utf8
242+
02)--Filter: test.column1_utf8view = Utf8View("Andrew")
243+
03)----TableScan: test projection=[column1_utf8, column1_utf8view]
244+
245+
# compare string / stringview
246+
# Should cast string -> stringview (which is cheap), not stringview -> string (which is not)
247+
query TT
248+
explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8;
249+
----
250+
logical_plan
251+
01)Projection: test.column1_utf8
252+
02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View)
253+
03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]
254+
255+
query TT
256+
explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view;
257+
----
258+
logical_plan
259+
01)Projection: test.column1_utf8
260+
02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view
261+
03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]
262+
111263

112264
statement ok
113265
drop table test;

0 commit comments

Comments
 (0)