Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
include = { workspace = true }
edition = { workspace = true }
edition = "2024"
rust-version = { workspace = true }

[lib]
Expand Down
89 changes: 63 additions & 26 deletions arrow-schema/src/datatype_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
type ArrowResult<T> = Result<T, ArrowError>;

fn make_error(val: &str, msg: &str) -> ArrowError {
let msg = format!("Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}" );
let msg = format!(
"Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
);
ArrowError::ParseError(msg)
}

Expand Down Expand Up @@ -246,7 +248,7 @@ impl<'a> Parser<'a> {
return Err(make_error(
self.val,
&format!("finding IntervalUnit for Interval, got {tok}"),
))
));
}
};
self.expect_token(Token::RParen)?;
Expand Down Expand Up @@ -328,7 +330,7 @@ impl<'a> Parser<'a> {
return Err(make_error(
self.val,
&format!("Expected a quoted string for a field name; got {tok:?}"),
))
));
}
};
self.expect_token(Token::Colon)?;
Expand All @@ -345,8 +347,10 @@ impl<'a> Parser<'a> {
tok => {
return Err(make_error(
self.val,
&format!("Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"),
))
&format!(
"Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
),
));
}
}
}
Expand Down Expand Up @@ -841,18 +845,12 @@ mod test {
("", "Error finding next token"),
("null", "Unsupported type 'null'"),
("Nu", "Unsupported type 'Nu'"),
(
r#"Timestamp(ns, +00:00)"#,
"Error unknown token: +00",
),
(r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
(
r#"Timestamp(ns, "+00:00)"#,
r#"Unterminated string at: "+00:00)"#,
),
(
r#"Timestamp(ns, "")"#,
r#"empty strings aren't allowed"#,
),
(r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
(
r#"Timestamp(ns, "+00:00"")"#,
r#"Parser error: Unterminated string at: ")"#,
Expand All @@ -864,22 +862,58 @@ mod test {
),
("Int32, ", "trailing content after parsing 'Int32'"),
("Int32(3), ", "trailing content after parsing 'Int32'"),
("FixedSizeBinary(Int32), ", "Error finding i64 for FixedSizeBinary, got 'Int32'"),
("FixedSizeBinary(3.0), ", "Error parsing 3.0 as integer: invalid digit found in string"),
(
"FixedSizeBinary(Int32), ",
"Error finding i64 for FixedSizeBinary, got 'Int32'",
),
(
"FixedSizeBinary(3.0), ",
"Error parsing 3.0 as integer: invalid digit found in string",
),
// too large for i32
("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
(
"FixedSizeBinary(4000000000), ",
"Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
),
// can't have negative precision
("Decimal32(-3, 5)", "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted"),
("Decimal64(-3, 5)", "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted"),
("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
("Decimal32(3, 500)", "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted"),
("Decimal64(3, 500)", "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted"),
("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
(
"Decimal32(-3, 5)",
"Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
),
(
"Decimal64(-3, 5)",
"Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
),
(
"Decimal128(-3, 5)",
"Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
),
(
"Decimal256(-3, 5)",
"Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
),
(
"Decimal32(3, 500)",
"Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
),
(
"Decimal64(3, 500)",
"Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
),
(
"Decimal128(3, 500)",
"Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
),
(
"Decimal256(3, 500)",
"Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
),
("Struct(f1 Int64)", "Error unknown token: f1"),
("Struct(\"f1\" Int64)", "Expected ':'"),
("Struct(\"f1\": )", "Error finding next type, got unexpected ')'"),
(
"Struct(\"f1\": )",
"Error finding next type, got unexpected ')'",
),
];

for (data_type_string, expected_message) in cases {
Expand All @@ -906,6 +940,9 @@ mod test {
fn parse_error_type() {
let err = parse_data_type("foobar").unwrap_err();
assert!(matches!(err, ArrowError::ParseError(_)));
assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar");
assert_eq!(
err.to_string(),
"Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
);
}
}
4 changes: 2 additions & 2 deletions arrow-schema/src/extension/canonical/bool8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>

use crate::{extension::ExtensionType, ArrowError, DataType};
use crate::{ArrowError, DataType, extension::ExtensionType};

/// The extension type for `8-bit Boolean`.
///
Expand Down Expand Up @@ -75,8 +75,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
4 changes: 2 additions & 2 deletions arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

use serde::{Deserialize, Serialize};

use crate::{extension::ExtensionType, ArrowError, DataType};
use crate::{ArrowError, DataType, extension::ExtensionType};

/// The extension type for fixed shape tensor.
///
Expand Down Expand Up @@ -297,8 +297,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
4 changes: 2 additions & 2 deletions arrow-schema/src/extension/canonical/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

use serde::{Deserialize, Serialize};

use crate::{extension::ExtensionType, ArrowError, DataType};
use crate::{ArrowError, DataType, extension::ExtensionType};

/// The extension type for `JSON`.
///
Expand Down Expand Up @@ -109,8 +109,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
16 changes: 12 additions & 4 deletions arrow-schema/src/extension/canonical/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,28 @@ impl TryFrom<&Field> for CanonicalExtensionType {
match value.extension_type_name() {
// An extension type name with an `arrow.` prefix
Some(name) if name.starts_with("arrow.") => match name {
FixedShapeTensor::NAME => value.try_extension_type::<FixedShapeTensor>().map(Into::into),
VariableShapeTensor::NAME => value.try_extension_type::<VariableShapeTensor>().map(Into::into),
FixedShapeTensor::NAME => value
.try_extension_type::<FixedShapeTensor>()
.map(Into::into),
VariableShapeTensor::NAME => value
.try_extension_type::<VariableShapeTensor>()
.map(Into::into),
Json::NAME => value.try_extension_type::<Json>().map(Into::into),
Uuid::NAME => value.try_extension_type::<Uuid>().map(Into::into),
Opaque::NAME => value.try_extension_type::<Opaque>().map(Into::into),
Bool8::NAME => value.try_extension_type::<Bool8>().map(Into::into),
_ => Err(ArrowError::InvalidArgumentError(format!("Unsupported canonical extension type: {name}"))),
_ => Err(ArrowError::InvalidArgumentError(format!(
"Unsupported canonical extension type: {name}"
))),
},
// Name missing the expected prefix
Some(name) => Err(ArrowError::InvalidArgumentError(format!(
"Field extension type name mismatch, expected a name with an `arrow.` prefix, found {name}"
))),
// Name missing
None => Err(ArrowError::InvalidArgumentError("Field extension type name missing".to_owned())),
None => Err(ArrowError::InvalidArgumentError(
"Field extension type name missing".to_owned(),
)),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions arrow-schema/src/extension/canonical/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

use serde::{Deserialize, Serialize};

use crate::{extension::ExtensionType, ArrowError, DataType};
use crate::{ArrowError, DataType, extension::ExtensionType};

/// The extension type for `Opaque`.
///
Expand Down Expand Up @@ -135,8 +135,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
4 changes: 2 additions & 2 deletions arrow-schema/src/extension/canonical/uuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//!
//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid>

use crate::{extension::ExtensionType, ArrowError, DataType};
use crate::{ArrowError, DataType, extension::ExtensionType};

/// The extension type for `UUID`.
///
Expand Down Expand Up @@ -80,8 +80,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
23 changes: 13 additions & 10 deletions arrow-schema/src/extension/canonical/variable_shape_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

use serde::{Deserialize, Serialize};

use crate::{extension::ExtensionType, ArrowError, DataType, Field};
use crate::{ArrowError, DataType, Field, extension::ExtensionType};

/// The extension type for `VariableShapeTensor`.
///
Expand Down Expand Up @@ -310,16 +310,19 @@ impl ExtensionType for VariableShapeTensor {
DataType::FixedSizeList(_, list_size) => {
let dimensions = usize::try_from(*list_size).expect("conversion failed");
// Make sure the metadata is valid.
let metadata = VariableShapeTensorMetadata::try_new(dimensions, metadata.dim_names, metadata.permutations, metadata.uniform_shape)?;
let metadata = VariableShapeTensorMetadata::try_new(
dimensions,
metadata.dim_names,
metadata.permutations,
metadata.uniform_shape,
)?;
let data_field = &fields[0];
match data_field.data_type() {
DataType::List(field) => {
Ok(Self {
value_type: field.data_type().clone(),
dimensions,
metadata
})
}
DataType::List(field) => Ok(Self {
value_type: field.data_type().clone(),
dimensions,
metadata,
}),
data_type => Err(ArrowError::InvalidArgumentError(format!(
"VariableShapeTensor data type mismatch, expected List for data field, found {data_type}"
))),
Expand All @@ -342,8 +345,8 @@ mod tests {
#[cfg(feature = "canonical_extension_types")]
use crate::extension::CanonicalExtensionType;
use crate::{
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
Field,
extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
};

use super::*;
Expand Down
Loading
Loading