Skip to content

Commit 475d1a7

Browse files
config options updated
1 parent fe492e7 commit 475d1a7

File tree

1 file changed

+9
-14
lines changed

1 file changed

+9
-14
lines changed

src/query/mod.rs

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -91,30 +91,23 @@ impl Query {
9191
let mut config = SessionConfig::default()
9292
.with_parquet_pruning(true)
9393
.with_prefer_existing_sort(true)
94-
.with_round_robin_repartition(true)
95-
.with_batch_size(8192);
94+
.with_information_schema(true)
95+
.with_batch_size(1000000)
96+
.with_coalesce_batches(true);
9697

9798
// For more details refer https://datafusion.apache.org/user-guide/configs.html
9899

99-
// Reduce the number of rows read (if possible)
100-
config.options_mut().execution.parquet.enable_page_index = true;
101-
102100
// Pushdown filters allows DF to push the filters as far down in the plan as possible
103101
// and thus, reducing the number of rows decoded
104102
config.options_mut().execution.parquet.pushdown_filters = true;
105103

106104
// Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation
107105
config.options_mut().execution.parquet.reorder_filters = true;
108-
109-
// Enable StringViewArray
110-
// https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/
106+
config.options_mut().execution.parquet.binary_as_string = true;
111107
config
112108
.options_mut()
113109
.execution
114-
.parquet
115-
.schema_force_view_types = true;
116-
117-
config.options_mut().execution.parquet.binary_as_string = true;
110+
.use_row_number_estimates_to_optimize_partitioning = true;
118111

119112
let state = SessionStateBuilder::new()
120113
.with_default_features()
@@ -149,7 +142,9 @@ impl Query {
149142
.execute_logical_plan(self.final_logical_plan(&time_partition))
150143
.await?;
151144

152-
let fields = df
145+
let optimised_df = df.repartition(Partitioning::RoundRobinBatch(16))?;
146+
147+
let fields = optimised_df
153148
.schema()
154149
.fields()
155150
.iter()
@@ -161,7 +156,7 @@ impl Query {
161156
return Ok((vec![], fields));
162157
}
163158

164-
let results = df.collect().await?;
159+
let results = optimised_df.collect().await?;
165160
Ok((results, fields))
166161
}
167162

0 commit comments

Comments
 (0)