@@ -437,13 +437,84 @@ impl<'a, T: Array> Array for &'a T {
437437
438438/// A generic trait for accessing the values of an [`Array`]
439439///
440+ /// This trait helps write specialized implementations of algorithms for
441+ /// different array types. Specialized implementations allow the compiler
442+ /// to optimize the code for the specific array type, which can lead to
443+ /// significant performance improvements.
444+ ///
445+ /// # Example
446+ /// For example, to write three different implementations of a string length function
447+ /// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
448+ ///
449+ /// ```
450+ /// # use std::sync::Arc;
451+ /// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
452+ /// # use arrow_buffer::ArrowNativeType;
453+ /// # use arrow_array::cast::AsArray;
454+ /// # use arrow_array::iterator::ArrayIter;
455+ /// # use arrow_array::types::{Int32Type, Int64Type};
456+ /// # use arrow_schema::{ArrowError, DataType};
457+ /// /// This function takes a dynamically typed `ArrayRef` and calls
458+ /// /// calls one of three specialized implementations
459+ /// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
460+ /// match arg.data_type() {
461+ /// DataType::Utf8 => {
462+ /// // downcast the ArrayRef to a StringArray and call the specialized implementation
463+ /// let string_array = arg.as_string::<i32>();
464+ /// character_length_general::<Int32Type, _>(string_array)
465+ /// }
466+ /// DataType::LargeUtf8 => {
467+ /// character_length_general::<Int64Type, _>(arg.as_string::<i64>())
468+ /// }
469+ /// DataType::Utf8View => {
470+ /// character_length_general::<Int32Type, _>(arg.as_string_view())
471+ /// }
472+ /// _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
473+ /// }
474+ /// }
475+ ///
476+ /// /// A generic implementation of the character_length function
477+ /// /// This function uses the `ArrayAccessor` trait to access the values of the array
478+ /// /// so the compiler can generated specialized implementations for different array types
479+ /// ///
480+ /// /// Returns a new array with the length of each string in the input array
481+ /// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
482+ /// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
483+ /// ///
484+ /// /// This is generic on the type of the primitive array (different string arrays have
485+ /// /// different lengths) and the type of the array accessor (different string arrays
486+ /// /// have different ways to access the values)
487+ /// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
488+ /// array: V,
489+ /// ) -> Result<ArrayRef, ArrowError>
490+ /// where
491+ /// T::Native: OffsetSizeTrait,
492+ /// {
493+ /// let iter = ArrayIter::new(array);
494+ /// // Create a Int32Array / Int64Array with the length of each string
495+ /// let result = iter
496+ /// .map(|string| {
497+ /// string.map(|string: &str| {
498+ /// T::Native::from_usize(string.chars().count())
499+ /// .expect("should not fail as string.chars will always return integer")
500+ /// })
501+ /// })
502+ /// .collect::<PrimitiveArray<T>>();
503+ ///
504+ /// /// Return the result as a new ArrayRef (dynamically typed)
505+ /// Ok(Arc::new(result) as ArrayRef)
506+ /// }
507+ /// ```
508+ ///
440509/// # Validity
441510///
442- /// An [`ArrayAccessor`] must always return a well-defined value for an index that is
443- /// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true.
511+ /// An [`ArrayAccessor`] must always return a well-defined value for an index
512+ /// that is within the bounds `0..Array::len`, including for null indexes where
513+ /// [`Array::is_null`] is true.
444514///
445- /// The value at null indexes is unspecified, and implementations must not rely on a specific
446- /// value such as [`Default::default`] being returned, however, it must not be undefined
515+ /// The value at null indexes is unspecified, and implementations must not rely
516+ /// on a specific value such as [`Default::default`] being returned, however, it
517+ /// must not be undefined
447518pub trait ArrayAccessor : Array {
448519 /// The Arrow type of the element being accessed.
449520 type Item : Send + Sync ;
0 commit comments