|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 |
|
| 18 | +######## |
| 19 | +## Test setup |
| 20 | +######## |
18 | 21 |
|
19 | | -# test StringViewArray with Utf8View columns |
20 | 22 | statement ok |
21 | | -create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')), |
22 | | - (arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')), |
23 | | - (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')), |
24 | | - (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View')); |
| 23 | +create table test_source as values |
| 24 | + ('Andrew', 'X'), |
| 25 | + ('Xiangpeng', 'Xiangpeng'), |
| 26 | + ('Raphael', 'R'), |
| 27 | + (NULL, 'R') |
| 28 | +; |
25 | 29 |
|
26 | | -query B |
27 | | -select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); |
28 | | ----- |
29 | | -false |
| 30 | +# Table with the different combination of column types |
| 31 | +statement ok |
| 32 | +create table test as |
| 33 | +SELECT |
| 34 | + arrow_cast(column1, 'Utf8') as column1_utf8, |
| 35 | + arrow_cast(column2, 'Utf8') as column2_utf8, |
| 36 | + arrow_cast(column1, 'Utf8View') as column1_utf8view, |
| 37 | + arrow_cast(column2, 'Utf8View') as column2_utf8view, |
| 38 | + arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict, |
| 39 | + arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict |
| 40 | +FROM test_source; |
30 | 41 |
|
31 | | -query B |
32 | | -select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); |
33 | | ----- |
34 | | -true |
| 42 | +statement ok |
| 43 | +drop table test_source |
35 | 44 |
|
36 | | -query B |
37 | | -select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); |
38 | | ----- |
39 | | -true |
| 45 | +######## |
| 46 | +## StringView to StringView |
| 47 | +######## |
40 | 48 |
|
41 | | -query B |
42 | | -select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); |
| 49 | +# StringView scalar to StringView scalar |
| 50 | + |
| 51 | +query BBBB |
| 52 | +select |
| 53 | + arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), |
| 54 | + arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'), |
| 55 | + arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), |
| 56 | + arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); |
43 | 57 | ---- |
44 | | -true |
| 58 | +false true true true |
| 59 | + |
| 60 | + |
| 61 | +# StringView column to StringView column comparison as filters |
45 | 62 |
|
46 | | -query ?? |
47 | | -select * from test where column1 = column2; |
| 63 | +query TT |
| 64 | +select column1_utf8, column2_utf8 from test where column1_utf8view = column2_utf8view; |
48 | 65 | ---- |
49 | 66 | Xiangpeng Xiangpeng |
50 | 67 |
|
51 | | -query ?? |
52 | | -select * from test where column1 <> column2; |
| 68 | +query TT |
| 69 | +select column1_utf8, column2_utf8 from test where column1_utf8view <> column2_utf8view; |
53 | 70 | ---- |
54 | 71 | Andrew X |
55 | 72 | Raphael R |
56 | 73 |
|
57 | | -query ?? |
58 | | -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); |
| 74 | +# StringView column to StringView column |
| 75 | +query TTBB |
| 76 | +select |
| 77 | + column1_utf8, column2_utf8, |
| 78 | + column1_utf8view = column2_utf8view, |
| 79 | + column1_utf8view <> column2_utf8view |
| 80 | +from test; |
59 | 81 | ---- |
60 | | -Andrew X |
| 82 | +Andrew X false true |
| 83 | +Xiangpeng Xiangpeng true false |
| 84 | +Raphael R false true |
| 85 | +NULL R NULL NULL |
61 | 86 |
|
62 | | -query ?? |
63 | | -select * from test where column1 = 'Andrew'; |
| 87 | +# StringView column to StringView scalar comparison |
| 88 | +query TTBBBB |
| 89 | +select |
| 90 | + column1_utf8, column2_utf8, |
| 91 | + column1_utf8view = arrow_cast('Andrew', 'Utf8View'), |
| 92 | + arrow_cast('Andrew', 'Utf8View') = column1_utf8view, |
| 93 | + column1_utf8view <> arrow_cast('Andrew', 'Utf8View'), |
| 94 | + arrow_cast('Andrew', 'Utf8View') <> column1_utf8view |
| 95 | +from test; |
64 | 96 | ---- |
65 | | -Andrew X |
| 97 | +Andrew X true true false false |
| 98 | +Xiangpeng Xiangpeng false false true true |
| 99 | +Raphael R false false true true |
| 100 | +NULL R NULL NULL NULL NULL |
66 | 101 |
|
67 | | -query ?? |
68 | | -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); |
| 102 | +######## |
| 103 | +## StringView to String |
| 104 | +######## |
| 105 | + |
| 106 | +# test StringViewArray with Utf8 columns |
| 107 | +query TTBBBB |
| 108 | +select |
| 109 | + column1_utf8, column2_utf8, |
| 110 | + column1_utf8view = column2_utf8, |
| 111 | + column2_utf8 = column1_utf8view, |
| 112 | + column1_utf8view <> column2_utf8, |
| 113 | + column2_utf8 <> column1_utf8view |
| 114 | +from test; |
69 | 115 | ---- |
70 | | -Xiangpeng Xiangpeng |
71 | | -Raphael R |
| 116 | +Andrew X false false true true |
| 117 | +Xiangpeng Xiangpeng true true false false |
| 118 | +Raphael R false false true true |
| 119 | +NULL R NULL NULL NULL NULL |
72 | 120 |
|
73 | | -query ?? |
74 | | -select * from test where column1 <> 'Andrew'; |
| 121 | +# StringView column to String scalar |
| 122 | +query TTBBBB |
| 123 | +select |
| 124 | + column1_utf8, column2_utf8, |
| 125 | + column1_utf8view = arrow_cast('Andrew', 'Utf8'), |
| 126 | + arrow_cast('Andrew', 'Utf8') = column1_utf8view, |
| 127 | + column1_utf8view <> arrow_cast('Andrew', 'Utf8'), |
| 128 | + arrow_cast('Andrew', 'Utf8') <> column1_utf8view |
| 129 | +from test; |
75 | 130 | ---- |
76 | | -Xiangpeng Xiangpeng |
77 | | -Raphael R |
| 131 | +Andrew X true true false false |
| 132 | +Xiangpeng Xiangpeng false false true true |
| 133 | +Raphael R false false true true |
| 134 | +NULL R NULL NULL NULL NULL |
78 | 135 |
|
79 | | -statement ok |
80 | | -drop table test; |
| 136 | +# String column to StringView scalar |
| 137 | +query TTBBBB |
| 138 | +select |
| 139 | + column1_utf8, column2_utf8, |
| 140 | + column1_utf8 = arrow_cast('Andrew', 'Utf8View'), |
| 141 | + arrow_cast('Andrew', 'Utf8View') = column1_utf8, |
| 142 | + column1_utf8 <> arrow_cast('Andrew', 'Utf8View'), |
| 143 | + arrow_cast('Andrew', 'Utf8View') <> column1_utf8 |
| 144 | +from test; |
| 145 | +---- |
| 146 | +Andrew X true true false false |
| 147 | +Xiangpeng Xiangpeng false false true true |
| 148 | +Raphael R false false true true |
| 149 | +NULL R NULL NULL NULL NULL |
| 150 | + |
| 151 | + |
| 152 | +######## |
| 153 | +## StringView to Dictionary |
| 154 | +######## |
| 155 | + |
| 156 | +# test StringViewArray with Dictionary columns |
| 157 | +query TTBBBB |
| 158 | +select |
| 159 | + column1_utf8, column2_utf8, |
| 160 | + column1_utf8view = column2_dict, |
| 161 | + column2_dict = column1_utf8view, |
| 162 | + column1_utf8view <> column2_dict, |
| 163 | + column2_dict <> column1_utf8view |
| 164 | +from test; |
| 165 | +---- |
| 166 | +Andrew X false false true true |
| 167 | +Xiangpeng Xiangpeng true true false false |
| 168 | +Raphael R false false true true |
| 169 | +NULL R NULL NULL NULL NULL |
| 170 | + |
| 171 | +# StringView column to Dict scalar |
| 172 | +query TTBBBB |
| 173 | +select |
| 174 | + column1_utf8, column2_utf8, |
| 175 | + column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), |
| 176 | + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view, |
| 177 | + column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), |
| 178 | + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view |
| 179 | +from test; |
| 180 | +---- |
| 181 | +Andrew X true true false false |
| 182 | +Xiangpeng Xiangpeng false false true true |
| 183 | +Raphael R false false true true |
| 184 | +NULL R NULL NULL NULL NULL |
| 185 | + |
| 186 | +# Dict column to StringView scalar |
| 187 | +query TTBBBB |
| 188 | +select |
| 189 | + column1_utf8, column2_utf8, |
| 190 | + column1_dict = arrow_cast('Andrew', 'Utf8View'), |
| 191 | + arrow_cast('Andrew', 'Utf8View') = column1_dict, |
| 192 | + column1_dict <> arrow_cast('Andrew', 'Utf8View'), |
| 193 | + arrow_cast('Andrew', 'Utf8View') <> column1_dict |
| 194 | +from test; |
| 195 | +---- |
| 196 | +Andrew X true true false false |
| 197 | +Xiangpeng Xiangpeng false false true true |
| 198 | +Raphael R false false true true |
| 199 | +NULL R NULL NULL NULL NULL |
| 200 | + |
| 201 | + |
| 202 | +######## |
| 203 | +## Coercion Rules |
| 204 | +######## |
81 | 205 |
|
82 | 206 |
|
83 | | -# test StringViewArray with Utf8 and Utf8View columns |
84 | 207 | statement ok |
85 | | -create table test as values ('Andrew', arrow_cast('X', 'Utf8View')), |
86 | | - ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')), |
87 | | - ('Raphael', arrow_cast('R', 'Utf8View')), |
88 | | - (NULL, arrow_cast('R', 'Utf8View')); |
| 208 | +set datafusion.explain.logical_plan_only = true; |
89 | 209 |
|
90 | | -query T? |
91 | | -select * from test where column1 = column2; |
| 210 | + |
| 211 | +# Filter should have a StringView literal and no column cast |
| 212 | +query TT |
| 213 | +explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew'; |
92 | 214 | ---- |
93 | | -Xiangpeng Xiangpeng |
| 215 | +logical_plan |
| 216 | +01)Projection: test.column1_utf8 |
| 217 | +02)--Filter: test.column1_utf8view = Utf8View("Andrew") |
| 218 | +03)----TableScan: test projection=[column1_utf8, column1_utf8view] |
94 | 219 |
|
95 | | -query T? |
96 | | -select * from test where column1 <> column2; |
| 220 | +# reverse order should be the same |
| 221 | +query TT |
| 222 | +explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view; |
97 | 223 | ---- |
98 | | -Andrew X |
99 | | -Raphael R |
| 224 | +logical_plan |
| 225 | +01)Projection: test.column1_utf8 |
| 226 | +02)--Filter: test.column1_utf8view = Utf8View("Andrew") |
| 227 | +03)----TableScan: test projection=[column1_utf8, column1_utf8view] |
100 | 228 |
|
101 | | -query T? |
102 | | -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); |
| 229 | +# should not be casting the column: https://github.com/apache/datafusion/issues/10998 |
| 230 | +query TT |
| 231 | +explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View'); |
103 | 232 | ---- |
104 | | -Andrew X |
| 233 | +logical_plan |
| 234 | +01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew") |
| 235 | +02)--TableScan: test projection=[column1_utf8] |
105 | 236 |
|
106 | | -query T? |
107 | | -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); |
| 237 | +query TT |
| 238 | +explain SELECT column1_utf8 from test where column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'); |
108 | 239 | ---- |
109 | | -Xiangpeng Xiangpeng |
110 | | -Raphael R |
| 240 | +logical_plan |
| 241 | +01)Projection: test.column1_utf8 |
| 242 | +02)--Filter: test.column1_utf8view = Utf8View("Andrew") |
| 243 | +03)----TableScan: test projection=[column1_utf8, column1_utf8view] |
| 244 | + |
| 245 | +# compare string / stringview |
| 246 | +# Should cast string -> stringview (which is cheap), not stringview -> string (which is not) |
| 247 | +query TT |
| 248 | +explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8; |
| 249 | +---- |
| 250 | +logical_plan |
| 251 | +01)Projection: test.column1_utf8 |
| 252 | +02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View) |
| 253 | +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] |
| 254 | + |
| 255 | +query TT |
| 256 | +explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view; |
| 257 | +---- |
| 258 | +logical_plan |
| 259 | +01)Projection: test.column1_utf8 |
| 260 | +02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view |
| 261 | +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] |
| 262 | + |
111 | 263 |
|
112 | 264 | statement ok |
113 | 265 | drop table test; |
0 commit comments