Skip to content

Commit 0c9ba15

Browse files
committed
Merge remote-tracking branch 'apache/master' into alamb/simplify_simplify3
2 parents 352a1ba + ecb09d9 commit 0c9ba15

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+307
-5634
lines changed

.github/workflows/python_build.yml

Lines changed: 0 additions & 131 deletions
This file was deleted.

.github/workflows/python_test.yaml

Lines changed: 0 additions & 62 deletions
This file was deleted.

ballista/rust/core/src/serde/logical_plan/to_proto.rs

Lines changed: 1 addition & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -153,115 +153,7 @@ impl TryInto<DataType> for &protobuf::ArrowType {
153153
"Protobuf deserialization error: ArrowType missing required field 'data_type'",
154154
)
155155
})?;
156-
Ok(match pb_arrow_type {
157-
protobuf::arrow_type::ArrowTypeEnum::None(_) => DataType::Null,
158-
protobuf::arrow_type::ArrowTypeEnum::Bool(_) => DataType::Boolean,
159-
protobuf::arrow_type::ArrowTypeEnum::Uint8(_) => DataType::UInt8,
160-
protobuf::arrow_type::ArrowTypeEnum::Int8(_) => DataType::Int8,
161-
protobuf::arrow_type::ArrowTypeEnum::Uint16(_) => DataType::UInt16,
162-
protobuf::arrow_type::ArrowTypeEnum::Int16(_) => DataType::Int16,
163-
protobuf::arrow_type::ArrowTypeEnum::Uint32(_) => DataType::UInt32,
164-
protobuf::arrow_type::ArrowTypeEnum::Int32(_) => DataType::Int32,
165-
protobuf::arrow_type::ArrowTypeEnum::Uint64(_) => DataType::UInt64,
166-
protobuf::arrow_type::ArrowTypeEnum::Int64(_) => DataType::Int64,
167-
protobuf::arrow_type::ArrowTypeEnum::Float16(_) => DataType::Float16,
168-
protobuf::arrow_type::ArrowTypeEnum::Float32(_) => DataType::Float32,
169-
protobuf::arrow_type::ArrowTypeEnum::Float64(_) => DataType::Float64,
170-
protobuf::arrow_type::ArrowTypeEnum::Utf8(_) => DataType::Utf8,
171-
protobuf::arrow_type::ArrowTypeEnum::LargeUtf8(_) => DataType::LargeUtf8,
172-
protobuf::arrow_type::ArrowTypeEnum::Binary(_) => DataType::Binary,
173-
protobuf::arrow_type::ArrowTypeEnum::FixedSizeBinary(size) => {
174-
DataType::FixedSizeBinary(*size)
175-
}
176-
protobuf::arrow_type::ArrowTypeEnum::LargeBinary(_) => DataType::LargeBinary,
177-
protobuf::arrow_type::ArrowTypeEnum::Date32(_) => DataType::Date32,
178-
protobuf::arrow_type::ArrowTypeEnum::Date64(_) => DataType::Date64,
179-
protobuf::arrow_type::ArrowTypeEnum::Duration(time_unit_i32) => {
180-
DataType::Duration(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
181-
}
182-
protobuf::arrow_type::ArrowTypeEnum::Timestamp(timestamp) => {
183-
DataType::Timestamp(
184-
protobuf::TimeUnit::from_i32_to_arrow(timestamp.time_unit)?,
185-
match timestamp.timezone.is_empty() {
186-
true => None,
187-
false => Some(timestamp.timezone.to_owned()),
188-
},
189-
)
190-
}
191-
protobuf::arrow_type::ArrowTypeEnum::Time32(time_unit_i32) => {
192-
DataType::Time32(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
193-
}
194-
protobuf::arrow_type::ArrowTypeEnum::Time64(time_unit_i32) => {
195-
DataType::Time64(protobuf::TimeUnit::from_i32_to_arrow(*time_unit_i32)?)
196-
}
197-
protobuf::arrow_type::ArrowTypeEnum::Interval(interval_unit_i32) => {
198-
DataType::Interval(protobuf::IntervalUnit::from_i32_to_arrow(
199-
*interval_unit_i32,
200-
)?)
201-
}
202-
protobuf::arrow_type::ArrowTypeEnum::Decimal(protobuf::Decimal {
203-
whole,
204-
fractional,
205-
}) => DataType::Decimal(*whole as usize, *fractional as usize),
206-
protobuf::arrow_type::ArrowTypeEnum::List(boxed_list) => {
207-
let field_ref = boxed_list
208-
.field_type
209-
.as_ref()
210-
.ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
211-
.as_ref();
212-
DataType::List(Box::new(field_ref.try_into()?))
213-
}
214-
protobuf::arrow_type::ArrowTypeEnum::LargeList(boxed_list) => {
215-
let field_ref = boxed_list
216-
.field_type
217-
.as_ref()
218-
.ok_or_else(|| proto_error("Protobuf deserialization error: List message was missing required field 'field_type'"))?
219-
.as_ref();
220-
DataType::LargeList(Box::new(field_ref.try_into()?))
221-
}
222-
protobuf::arrow_type::ArrowTypeEnum::FixedSizeList(boxed_list) => {
223-
let fsl_ref = boxed_list.as_ref();
224-
let pb_fieldtype = fsl_ref
225-
.field_type
226-
.as_ref()
227-
.ok_or_else(|| proto_error("Protobuf deserialization error: FixedSizeList message was missing required field 'field_type'"))?;
228-
DataType::FixedSizeList(
229-
Box::new(pb_fieldtype.as_ref().try_into()?),
230-
fsl_ref.list_size,
231-
)
232-
}
233-
protobuf::arrow_type::ArrowTypeEnum::Struct(struct_type) => {
234-
let fields = struct_type
235-
.sub_field_types
236-
.iter()
237-
.map(|field| field.try_into())
238-
.collect::<Result<Vec<_>, _>>()?;
239-
DataType::Struct(fields)
240-
}
241-
protobuf::arrow_type::ArrowTypeEnum::Union(union) => {
242-
let union_types = union
243-
.union_types
244-
.iter()
245-
.map(|field| field.try_into())
246-
.collect::<Result<Vec<_>, _>>()?;
247-
DataType::Union(union_types)
248-
}
249-
protobuf::arrow_type::ArrowTypeEnum::Dictionary(boxed_dict) => {
250-
let dict_ref = boxed_dict.as_ref();
251-
let pb_key = dict_ref
252-
.key
253-
.as_ref()
254-
.ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'key'"))?;
255-
let pb_value = dict_ref
256-
.value
257-
.as_ref()
258-
.ok_or_else(|| proto_error("Protobuf deserialization error: Dictionary message was missing required field 'value'"))?;
259-
DataType::Dictionary(
260-
Box::new(pb_key.as_ref().try_into()?),
261-
Box::new(pb_value.as_ref().try_into()?),
262-
)
263-
}
264-
})
156+
pb_arrow_type.try_into()
265157
}
266158
}
267159

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion::datasource::file_format::parquet::ParquetFormat;
19+
use datafusion::datasource::listing::ListingOptions;
20+
use datafusion::error::Result;
21+
use datafusion::prelude::*;
22+
use std::sync::Arc;
23+
24+
/// This example demonstrates executing a simple query against an Arrow data source (a directory
25+
/// with multiple Parquet files) and fetching results
26+
#[tokio::main]
27+
async fn main() -> Result<()> {
28+
// create local execution context
29+
let mut ctx = ExecutionContext::new();
30+
31+
let testdata = datafusion::arrow::util::test_util::parquet_test_data();
32+
33+
// Configure listing options
34+
let file_format = ParquetFormat::default().with_enable_pruning(true);
35+
let listing_options = ListingOptions {
36+
file_extension: ".parquet".to_owned(),
37+
format: Arc::new(file_format),
38+
table_partition_cols: vec![],
39+
collect_stat: true,
40+
target_partitions: 1,
41+
};
42+
43+
// Register a listing table - this will use all files in the directory as data sources
44+
// for the query
45+
ctx.register_listing_table(
46+
"my_table",
47+
&format!("file://{}", testdata),
48+
listing_options,
49+
None,
50+
)
51+
.await
52+
.unwrap();
53+
54+
// execute the query
55+
let df = ctx
56+
.sql(
57+
"SELECT int_col, double_col, CAST(date_string_col as VARCHAR) \
58+
FROM alltypes_plain \
59+
WHERE id > 1 AND tinyint_col < double_col",
60+
)
61+
.await?;
62+
63+
// print the results
64+
df.show().await?;
65+
66+
Ok(())
67+
}

0 commit comments

Comments
 (0)