@@ -20,6 +20,8 @@ use arrow_array::{BooleanArray, RecordBatch};
2020use arrow_schema:: ArrowError ;
2121
2222/// A predicate operating on [`RecordBatch`]
23+ ///
24+ /// See [`RowFilter`] for more information on the use of this trait.
2325pub trait ArrowPredicate : Send + ' static {
2426 /// Returns the [`ProjectionMask`] that describes the columns required
2527 /// to evaluate this predicate. All projected columns will be provided in the `batch`
@@ -29,7 +31,7 @@ pub trait ArrowPredicate: Send + 'static {
2931 /// Evaluate this predicate for the given [`RecordBatch`] containing the columns
3032 /// identified by [`Self::projection`]
3133 ///
32- /// Must return a [`BooleanArray`] that has the same length as the input
34+ /// Must return a [`BooleanArray`] that has the same length as the input
3335 /// `batch` where each row indicates whether the row should be returned:
3436 /// * `true`:the row should be returned
3537 /// * `false` or `null`: the row should not be returned
@@ -68,12 +70,17 @@ where
6870 }
6971}
7072
71- /// A [`RowFilter`] allows pushing down a filter predicate to skip IO and decode
73+ /// Filter applied *during* the parquet read process
74+ ///
75+ /// [`RowFilter`] applies predicates in order, after decoding only the columns
76+ /// required. As predicates eliminate rows, fewer rows from subsequent columns
77+ /// may be required, thus potentially reducing IO and decode.
7278///
73- /// This consists of a list of [`ArrowPredicate`] where only the rows that satisfy all
74- /// of the predicates will be returned. Any [`RowSelection`] will be applied prior
79+ /// A `RowFilter` consists of a list of [`ArrowPredicate`]s. Only the rows for which
80+ /// all the predicates evaluate to `true` will be returned.
81+ /// Any [`RowSelection`] provided to the reader will be applied prior
7582/// to the first predicate, and each predicate in turn will then be used to compute
76- /// a more refined [`RowSelection`] to use when evaluating the subsequent predicates.
83+ /// a more refined [`RowSelection`] used when evaluating the subsequent predicates.
7784///
7885/// Once all predicates have been evaluated, the final [`RowSelection`] is applied
7986/// to the top-level [`ProjectionMask`] to produce the final output [`RecordBatch`].
0 commit comments