From 475634bad174fab4281766ef6feeea069ae1bba8 Mon Sep 17 00:00:00 2001 From: Simon Gasse Date: Sun, 28 Nov 2021 23:39:07 +0100 Subject: [PATCH 1/2] Add examples for type conversion - Add `type_conversion.rs` to illustrate some common conversions. - Update the documentation for numpy users. --- examples/type_conversion.rs | 114 +++++++++++++++++++++++++ src/doc/ndarray_for_numpy_users/mod.rs | 95 +++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 examples/type_conversion.rs diff --git a/examples/type_conversion.rs b/examples/type_conversion.rs new file mode 100644 index 000000000..57c6393de --- /dev/null +++ b/examples/type_conversion.rs @@ -0,0 +1,114 @@ +#[cfg(feature = "approx")] +use {approx::assert_abs_diff_eq, ndarray::prelude::*, std::convert::TryFrom}; + +#[cfg(feature = "approx")] +fn main() { + // Converting an array from one datatype to another is implemented with the + // `ArrayBase::mapv()` function. We pass a closure that is applied to each + // element independently. This allows for more control and flexiblity in + // converting types. + // + // Below, we illustrate four different approaches for the actual conversion + // in the closure. + // - `From` ensures lossless conversions known at compile time and is the + // best default choice. + // - `TryFrom` either converts data losslessly or panics, ensuring that the + // rest of the program does not continue with unexpected data. + // - `as` never panics and may silently convert in a lossy way, depending + // on the source and target datatypes. More details can be found in the + // reference: https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast + // - Using custom logic in the closure, e.g. to clip values or for NaN + // handling in floats. + // + // For a brush-up on casting between numeric types in Rust, refer to: + // https://doc.rust-lang.org/rust-by-example/types/cast.html + + // Infallible, lossless conversion with `From` + // The trait `std::convert::From` is only implemented for conversions that + // can be guaranteed to be lossless at compile time. This is the safest + // approach. + let a_u8: Array = array![[1, 2, 3], [4, 5, 6]]; + let a_f32 = a_u8.mapv(|element| f32::from(element)); + assert_abs_diff_eq!(a_f32, array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]); + + // Fallible, lossless conversion with `TryFrom` + // `i8` numbers can be negative, in such a case, there is no perfect + // conversion to `u8` defined. In this example, all numbers are positive and + // in bounds and can be converted at runtime. But for unknown runtime input, + // this would panic with the message provided in `.expect()`. Note that you + // can also use `.unwrap()` to be more concise. + let a_i8: Array = array![120, 8, 0]; + let a_u8 = a_i8.mapv(|element| u8::try_from(element).expect("Could not convert i8 to u8")); + assert_eq!(a_u8, array![120u8, 8u8, 0u8]); + + // Unsigned to signed integer conversion with `as` + // A real-life example of this would be coordinates on a grid. + // A `usize` value can be larger than what fits into a `isize`, therefore, + // it would be safer to use `TryFrom`. Nevertheless, `as` can be used for + // either simplicity or performance. + // The example includes `usize::MAX` to illustrate potentially undesired + // behavior. It will be interpreted as -1 (noop-casting + 2-complement), see + // https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast + let a_usize: Array = array![1, 2, 3, usize::MAX]; + let a_isize = a_usize.mapv(|element| element as isize); + assert_eq!(a_isize, array![1_isize, 2_isize, 3_isize, -1_isize]); + + // Simple upcasting with `as` + // Every `u8` fits perfectly into a `u32`, therefore this is a lossless + // conversion. + // Note that it is up to the programmer to ensure the validity of the + // conversion over the lifetime of a program. With type inference, subtle + // bugs can creep in since conversions with `as` will always compile, so a + // programmer might not notice that a prior lossless conversion became a + // lossy conversion. With `From`, this would be noticed at compile-time and + // with `TryFrom`, it would also be either handled or make the program + // panic. + let a_u8: Array = array![[1, 2, 3], [4, 5, 6]]; + let a_u32 = a_u8.mapv(|element| element as u32); + assert_eq!(a_u32, array![[1u32, 2u32, 3u32], [4u32, 5u32, 6u32]]); + + // Saturating cast with `as` + // The `as` keyword performs a *saturating cast* When casting floats to + // ints. This means that numbers which do not fit into the target datatype + // will silently be clipped to the maximum/minimum numbers. Since this is + // not obvious, we discourage the intentional use of casting with `as` with + // silent saturation and recommend a custom logic instead which makes the + // intent clear. + let a_f32: Array = array![ + 256.0, // saturated to 255 + 255.7, // saturated to 255 + 255.1, // saturated to 255 + 254.7, // rounded down to 254 by cutting the decimal part + 254.1, // rounded down to 254 by cutting the decimal part + -1.0, // saturated to 0 on the lower end + f32::INFINITY, // saturated to 255 + f32::NAN, // converted to zero + ]; + let a_u8 = a_f32.mapv(|element| element as u8); + assert_eq!(a_u8, array![255, 255, 255, 254, 254, 0, 255, 0]); + + // Custom mapping logic + // Given that we pass a closure for the conversion, we can also define + // custom logic to e.g. replace NaN values and clip others. This also + // makes the intent clear. + let a_f32: Array = array![ + 270.0, // clipped to 200 + -1.2, // clipped to 0 + 4.7, // rounded up to 5 instead of just stripping decimals + f32::INFINITY, // clipped to 200 + f32::NAN, // replaced with upper bound 200 + ]; + let a_u8_custom = a_f32.mapv(|element| { + if element == f32::INFINITY || element.is_nan() { + return 200; + } + if let Some(std::cmp::Ordering::Less) = element.partial_cmp(&0.0) { + return 0; + } + 200.min(element.round() as u8) + }); + assert_eq!(a_u8_custom, array![200, 0, 5, 200, 200]); +} + +#[cfg(not(feature = "approx"))] +fn main() {} diff --git a/src/doc/ndarray_for_numpy_users/mod.rs b/src/doc/ndarray_for_numpy_users/mod.rs index f9c05c612..eff60cbea 100644 --- a/src/doc/ndarray_for_numpy_users/mod.rs +++ b/src/doc/ndarray_for_numpy_users/mod.rs @@ -524,6 +524,101 @@ //! //! //! +//! ## Type conversions +//! +//! In `ndarray`, conversions between datatypes are done with `mapv()` by +//! passing a closure to convert every element independently. +//! For the conversion itself, we have several options: +//! - `std::convert::From` ensures lossless, safe conversions at compile-time +//! and is generally recommended. +//! - `std::convert::TryFrom` can be used for potentially unsafe conversions. It +//! will return a `Result` which can be handled or `unwrap()`ed to panic if +//! any value at runtime cannot be converted losslessly. +//! - The `as` keyword compiles to lossless/lossy conversions depending on the +//! source and target datatypes. It can be useful when `TryFrom` is a +//! performance issue or does not apply. A notable difference to NumPy is that +//! `as` performs a [*saturating* cast][sat_conv] when casting +//! from floats to integers. Further information can be found in the +//! [reference on type cast expressions][as_typecast]. +//! +//! For details, be sure to check out the type conversion examples. +//! + +//! +//! +//! +//! +//! +//! +//! +//! +//! +//! +//! +//! [as_conv]: https://doc.rust-lang.org/rust-by-example/types/cast.html +//! [sat_conv]: https://blog.rust-lang.org/2020/07/16/Rust-1.45.0.html#fixing-unsoundness-in-casts +//! [as_typecast]: https://doc.rust-lang.org/reference/expressions/operator-expr.html#type-cast-expressions +//! //! ## Array manipulation //! //! NumPy | `ndarray` | Notes From bbf0c4a10ae350bb539b454b95d28ecc08e8e85d Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 4 Dec 2021 14:23:55 +0100 Subject: [PATCH 2/2] Fix style of imports in type_conversion example --- examples/type_conversion.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/type_conversion.rs b/examples/type_conversion.rs index 57c6393de..7bec2542f 100644 --- a/examples/type_conversion.rs +++ b/examples/type_conversion.rs @@ -1,5 +1,10 @@ #[cfg(feature = "approx")] -use {approx::assert_abs_diff_eq, ndarray::prelude::*, std::convert::TryFrom}; +use std::convert::TryFrom; + +#[cfg(feature = "approx")] +use approx::assert_abs_diff_eq; +#[cfg(feature = "approx")] +use ndarray::prelude::*; #[cfg(feature = "approx")] fn main() {
+//! +//! NumPy +//! +//! +//! +//! `ndarray` +//! +//! +//! +//! Notes +//! +//!
+//! +//! `a.astype(np.float32)` +//! +//! +//! +//! `a.mapv(|x| f32::from(x))` +//! +//! +//! +//! convert `u8` array infallibly to `f32` array with `std::convert::From`, generally recommended +//! +//!
+//! +//! `a.astype(np.int32)` +//! +//! +//! +//! `a.mapv(|x| i32::from(x))` +//! +//! +//! +//! upcast `u8` array to `i32` array with `std::convert::From`, preferable over `as` because it ensures at compile-time that the conversion is lossless +//! +//!
+//! +//! `a.astype(np.uint8)` +//! +//! +//! +//! `a.mapv(|x| u8::try_from(x).unwrap())` +//! +//! +//! +//! try to convert `i8` array to `u8` array, panic if any value cannot be converted lossless at runtime (e.g. negative value) +//! +//!
+//! +//! `a.astype(np.int32)` +//! +//! +//! +//! `a.mapv(|x| x as i32)` +//! +//! +//! +//! convert `f32` array to `i32` array with ["saturating" conversion][sat_conv]; care needed because it can be a lossy conversion or result in non-finite values! See [the reference for information][as_typecast]. +//! +//!