@@ -118,9 +118,7 @@ impl ArrowPredicate for DatafusionArrowPredicate {
118118 Ok ( array) => {
119119 if let Some ( mask) = array. as_any ( ) . downcast_ref :: < BooleanArray > ( ) {
120120 let bool_arr = BooleanArray :: from ( mask. data ( ) . clone ( ) ) ;
121- // TODO is there a more efficient way to count the rows that are filtered?
122- let num_filtered =
123- bool_arr. iter ( ) . filter ( |p| !matches ! ( p, Some ( true ) ) ) . count ( ) ;
121+ let num_filtered = bool_arr. len ( ) - true_count ( & bool_arr) ;
124122 self . rows_filtered . add ( num_filtered) ;
125123 timer. stop ( ) ;
126124 Ok ( bool_arr)
@@ -138,6 +136,27 @@ impl ArrowPredicate for DatafusionArrowPredicate {
138136 }
139137}
140138
139+ /// Return the number of non null true vaulues in an array
140+ // TODO remove when https://github.com/apache/arrow-rs/issues/2963 is released
141+ fn true_count ( arr : & BooleanArray ) -> usize {
142+ match arr. data ( ) . null_buffer ( ) {
143+ Some ( nulls) => {
144+ let null_chunks = nulls. bit_chunks ( arr. offset ( ) , arr. len ( ) ) ;
145+ let value_chunks = arr. values ( ) . bit_chunks ( arr. offset ( ) , arr. len ( ) ) ;
146+ null_chunks
147+ . iter ( )
148+ . zip ( value_chunks. iter ( ) )
149+ . chain ( std:: iter:: once ( (
150+ null_chunks. remainder_bits ( ) ,
151+ value_chunks. remainder_bits ( ) ,
152+ ) ) )
153+ . map ( |( a, b) | ( a & b) . count_ones ( ) as usize )
154+ . sum ( )
155+ }
156+ None => arr. values ( ) . count_set_bits_offset ( arr. offset ( ) , arr. len ( ) ) ,
157+ }
158+ }
159+
141160/// A candidate expression for creating a `RowFilter` contains the
142161/// expression as well as data to estimate the cost of evaluating
143162/// the resulting expression.
0 commit comments