-
Notifications
You must be signed in to change notification settings - Fork 1.7k
feat(logical-types): add NativeType and LogicalType #12853
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3827974
17a70d8
3dba963
4217970
01f0089
5b5f4c1
88e1b3c
ab16a2d
0b2ed2d
6150ea9
7ed7891
bdd0155
056a8f1
117f23b
e0923e2
f1ff963
7942f91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use crate::types::{LogicalTypeRef, NativeType}; | ||
| use std::sync::{Arc, OnceLock}; | ||
|
|
||
| macro_rules! singleton { | ||
| ($name:ident, $getter:ident, $ty:ident) => { | ||
| // TODO: Use LazyLock instead of getter function when MSRV gets bumped | ||
| static $name: OnceLock<LogicalTypeRef> = OnceLock::new(); | ||
|
|
||
| #[doc = "Getter for singleton instance of a logical type representing"] | ||
| #[doc = concat!("[`NativeType::", stringify!($ty), "`].")] | ||
| pub fn $getter() -> LogicalTypeRef { | ||
| Arc::clone($name.get_or_init(|| Arc::new(NativeType::$ty))) | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| singleton!(LOGICAL_NULL, logical_null, Null); | ||
| singleton!(LOGICAL_BOOLEAN, logical_boolean, Boolean); | ||
| singleton!(LOGICAL_INT8, logical_int8, Int8); | ||
| singleton!(LOGICAL_INT16, logical_int16, Int16); | ||
| singleton!(LOGICAL_INT32, logical_int32, Int32); | ||
| singleton!(LOGICAL_INT64, logical_int64, Int64); | ||
| singleton!(LOGICAL_UINT8, logical_uint8, UInt8); | ||
| singleton!(LOGICAL_UINT16, logical_uint16, UInt16); | ||
| singleton!(LOGICAL_UINT32, logical_uint32, UInt32); | ||
| singleton!(LOGICAL_UINT64, logical_uint64, UInt64); | ||
| singleton!(LOGICAL_FLOAT16, logical_float16, Float16); | ||
| singleton!(LOGICAL_FLOAT32, logical_float32, Float32); | ||
| singleton!(LOGICAL_FLOAT64, logical_float64, Float64); | ||
| singleton!(LOGICAL_DATE, logical_date, Date); | ||
| singleton!(LOGICAL_BINARY, logical_binary, Binary); | ||
| singleton!(LOGICAL_STRING, logical_string, String); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use arrow_schema::{Field, Fields, UnionFields}; | ||
| use std::hash::{Hash, Hasher}; | ||
| use std::{ops::Deref, sync::Arc}; | ||
|
|
||
| use super::{LogicalTypeRef, NativeType}; | ||
|
|
||
| /// A record of a logical type, its name and its nullability. | ||
| #[derive(Debug, Clone, Eq, PartialOrd, Ord)] | ||
| pub struct LogicalField { | ||
| pub name: String, | ||
| pub logical_type: LogicalTypeRef, | ||
| pub nullable: bool, | ||
| } | ||
|
|
||
| impl PartialEq for LogicalField { | ||
| fn eq(&self, other: &Self) -> bool { | ||
| self.name == other.name | ||
| && self.logical_type.eq(&other.logical_type) | ||
| && self.nullable == other.nullable | ||
| } | ||
| } | ||
|
|
||
| impl Hash for LogicalField { | ||
| fn hash<H: Hasher>(&self, state: &mut H) { | ||
| self.name.hash(state); | ||
| self.logical_type.hash(state); | ||
| self.nullable.hash(state); | ||
| } | ||
| } | ||
|
|
||
| impl From<&Field> for LogicalField { | ||
| fn from(value: &Field) -> Self { | ||
| Self { | ||
| name: value.name().clone(), | ||
| logical_type: Arc::new(NativeType::from(value.data_type().clone())), | ||
| nullable: value.is_nullable(), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// A reference counted [`LogicalField`]. | ||
| pub type LogicalFieldRef = Arc<LogicalField>; | ||
|
|
||
| /// A cheaply cloneable, owned collection of [`LogicalFieldRef`]. | ||
| #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
| pub struct LogicalFields(Arc<[LogicalFieldRef]>); | ||
|
|
||
| impl Deref for LogicalFields { | ||
| type Target = [LogicalFieldRef]; | ||
|
|
||
| fn deref(&self) -> &Self::Target { | ||
| self.0.as_ref() | ||
| } | ||
| } | ||
|
|
||
| impl From<&Fields> for LogicalFields { | ||
| fn from(value: &Fields) -> Self { | ||
| value | ||
| .iter() | ||
| .map(|field| Arc::new(LogicalField::from(field.as_ref()))) | ||
| .collect() | ||
| } | ||
| } | ||
|
|
||
| impl FromIterator<LogicalFieldRef> for LogicalFields { | ||
| fn from_iter<T: IntoIterator<Item = LogicalFieldRef>>(iter: T) -> Self { | ||
| Self(iter.into_iter().collect()) | ||
| } | ||
| } | ||
|
|
||
| /// A cheaply cloneable, owned collection of [`LogicalFieldRef`] and their | ||
| /// corresponding type ids. | ||
| #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
| pub struct LogicalUnionFields(Arc<[(i8, LogicalFieldRef)]>); | ||
|
|
||
| impl Deref for LogicalUnionFields { | ||
| type Target = [(i8, LogicalFieldRef)]; | ||
|
|
||
| fn deref(&self) -> &Self::Target { | ||
| self.0.as_ref() | ||
| } | ||
| } | ||
|
|
||
| impl From<&UnionFields> for LogicalUnionFields { | ||
| fn from(value: &UnionFields) -> Self { | ||
| value | ||
| .iter() | ||
| .map(|(i, field)| (i, Arc::new(LogicalField::from(field.as_ref())))) | ||
| .collect() | ||
| } | ||
| } | ||
|
|
||
| impl FromIterator<(i8, LogicalFieldRef)> for LogicalUnionFields { | ||
| fn from_iter<T: IntoIterator<Item = (i8, LogicalFieldRef)>>(iter: T) -> Self { | ||
| Self(iter.into_iter().collect()) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use super::NativeType; | ||
| use crate::error::Result; | ||
| use arrow_schema::DataType; | ||
| use core::fmt; | ||
| use std::{cmp::Ordering, hash::Hash, sync::Arc}; | ||
|
|
||
| /// Signature that uniquely identifies a type among other types. | ||
| #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
| pub enum TypeSignature<'a> { | ||
| /// Represents a built-in native type. | ||
| Native(&'a NativeType), | ||
| /// Represents an arrow-compatible extension type. | ||
| /// (<https://arrow.apache.org/docs/format/Columnar.html#extension-types>) | ||
| /// | ||
| /// The `name` should contain the same value as 'ARROW:extension:name'. | ||
| Extension { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we can add the comment why the extension is needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is because arrow has extension type too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got there is a link to extension Arrow types above, it should be enough |
||
| name: &'a str, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be owned String, so that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TypeSignature uses lifetimes just to make pattern matching easier when dealing with parameters (or lack thereof). pub fn important_stuff(logical_type: &dyn LogicalType) {
match logical_type.signature() {
TypeSignature::Native(NativeType::Utf8) => todo!(),
TypeSignature::Extension {
name: "JSON", // <-- changing from &'a str to String leads to: expected `String`, but found `&str`,
parameters: &[]
} => todo!(),
_ => unimplemented!()
}
} |
||
| parameters: &'a [TypeParameter<'a>], | ||
| }, | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
| pub enum TypeParameter<'a> { | ||
| Type(TypeSignature<'a>), | ||
| Number(i128), | ||
| } | ||
|
Comment on lines
+39
to
+43
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As someone unfamiliar with the entire discussion of this feature, it's not clear to me from reading the above link to the extension-types of arrow how these type parameters map to the metadata described in the link. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally everything could be encoded in the name using |
||
|
|
||
| /// A reference counted [`LogicalType`]. | ||
| pub type LogicalTypeRef = Arc<dyn LogicalType>; | ||
jayzhan211 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /// Representation of a logical type with its signature and its native backing | ||
| /// type. | ||
| /// | ||
| /// The logical type is meant to be used during the DataFusion logical planning | ||
| /// phase in order to reason about logical types without worrying about their | ||
| /// underlying physical implementation. | ||
| /// | ||
| /// ### Extension types | ||
| /// | ||
| /// [`LogicalType`] is a trait in order to allow the possibility of declaring | ||
| /// extension types: | ||
| /// | ||
| /// ``` | ||
| /// use datafusion_common::types::{LogicalType, NativeType, TypeSignature}; | ||
| /// | ||
| /// struct JSON {} | ||
| /// | ||
| /// impl LogicalType for JSON { | ||
| /// fn native(&self) -> &NativeType { | ||
| /// &NativeType::String | ||
| /// } | ||
| /// | ||
| /// fn signature(&self) -> TypeSignature<'_> { | ||
| /// TypeSignature::Extension { | ||
| /// name: "JSON", | ||
| /// parameters: &[], | ||
| /// } | ||
| /// } | ||
| /// } | ||
| /// ``` | ||
| pub trait LogicalType: Sync + Send { | ||
| /// Get the native backing type of this logical type. | ||
| fn native(&self) -> &NativeType; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I previously propose How can we do the equivalent check by the current design? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given say arrow Int64 data, i want to know whether these is numbers, timestamp, time, date or something else (eg user-defined enum). The fact that any of these hypothetical logical types could be stored as Int64 doesn't help me know. Asking logical type "could you please decode this arrow type?" doesn't help me know. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the idea is that we have LogicalType already. In logical level, they are either LogicalNumber, LogicalTimestamp or LogicalDate, and we can differ them in logical level. They can also decode as i64, i32 in physical level. So asking logical type "could you please decode this arrow type?" is to tell the relationship between logical type and physical type. We don't need to know whether the arrow i64 is number or timestamp, because we already know that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I can follow. @jayzhan211 -- can you write a small practical example? I want to make sure I understand the use case. Thanks :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We can figure this out if we meet any practical usage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For any user defined logical type you still know the backing |
||
| /// Get the unique type signature for this logical type. Logical types with identical | ||
| /// signatures are considered equal. | ||
| fn signature(&self) -> TypeSignature<'_>; | ||
|
|
||
| /// Get the default physical type to cast `origin` to in order to obtain a physical type | ||
| /// that is logically compatible with this logical type. | ||
| fn default_cast_for(&self, origin: &DataType) -> Result<DataType> { | ||
| self.native().default_cast_for(origin) | ||
| } | ||
| } | ||
|
|
||
| impl fmt::Debug for dyn LogicalType { | ||
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
| f.debug_tuple("LogicalType") | ||
| .field(&self.signature()) | ||
| .field(&self.native()) | ||
| .finish() | ||
| } | ||
| } | ||
|
|
||
| impl PartialEq for dyn LogicalType { | ||
| fn eq(&self, other: &Self) -> bool { | ||
| self.signature().eq(&other.signature()) | ||
| } | ||
| } | ||
|
|
||
| impl Eq for dyn LogicalType {} | ||
|
|
||
| impl PartialOrd for dyn LogicalType { | ||
| fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||
| Some(self.cmp(other)) | ||
| } | ||
| } | ||
|
|
||
| impl Ord for dyn LogicalType { | ||
notfilippo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| fn cmp(&self, other: &Self) -> Ordering { | ||
| self.signature() | ||
| .cmp(&other.signature()) | ||
| .then(self.native().cmp(other.native())) | ||
| } | ||
| } | ||
|
|
||
| impl Hash for dyn LogicalType { | ||
| fn hash<H: std::hash::Hasher>(&self, state: &mut H) { | ||
| self.signature().hash(state); | ||
| self.native().hash(state); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| mod builtin; | ||
| mod field; | ||
| mod logical; | ||
| mod native; | ||
|
|
||
| pub use builtin::*; | ||
| pub use field::*; | ||
| pub use logical::*; | ||
| pub use native::*; |
Uh oh!
There was an error while loading. Please reload this page.