@@ -91,30 +91,23 @@ impl Query {
91
91
let mut config = SessionConfig :: default ( )
92
92
. with_parquet_pruning ( true )
93
93
. with_prefer_existing_sort ( true )
94
- . with_round_robin_repartition ( true )
95
- . with_batch_size ( 8192 ) ;
94
+ . with_information_schema ( true )
95
+ . with_batch_size ( 1000000 )
96
+ . with_coalesce_batches ( true ) ;
96
97
97
98
// For more details refer https://datafusion.apache.org/user-guide/configs.html
98
99
99
- // Reduce the number of rows read (if possible)
100
- config. options_mut ( ) . execution . parquet . enable_page_index = true ;
101
-
102
100
// Pushdown filters allows DF to push the filters as far down in the plan as possible
103
101
// and thus, reducing the number of rows decoded
104
102
config. options_mut ( ) . execution . parquet . pushdown_filters = true ;
105
103
106
104
// Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation
107
105
config. options_mut ( ) . execution . parquet . reorder_filters = true ;
108
-
109
- // Enable StringViewArray
110
- // https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/
106
+ config. options_mut ( ) . execution . parquet . binary_as_string = true ;
111
107
config
112
108
. options_mut ( )
113
109
. execution
114
- . parquet
115
- . schema_force_view_types = true ;
116
-
117
- config. options_mut ( ) . execution . parquet . binary_as_string = true ;
110
+ . use_row_number_estimates_to_optimize_partitioning = true ;
118
111
119
112
let state = SessionStateBuilder :: new ( )
120
113
. with_default_features ( )
@@ -149,7 +142,9 @@ impl Query {
149
142
. execute_logical_plan ( self . final_logical_plan ( & time_partition) )
150
143
. await ?;
151
144
152
- let fields = df
145
+ let optimised_df = df. repartition ( Partitioning :: RoundRobinBatch ( 16 ) ) ?;
146
+
147
+ let fields = optimised_df
153
148
. schema ( )
154
149
. fields ( )
155
150
. iter ( )
@@ -161,7 +156,7 @@ impl Query {
161
156
return Ok ( ( vec ! [ ] , fields) ) ;
162
157
}
163
158
164
- let results = df . collect ( ) . await ?;
159
+ let results = optimised_df . collect ( ) . await ?;
165
160
Ok ( ( results, fields) )
166
161
}
167
162
0 commit comments