Description
I have found these related issues/pull requests
I could not find any issues in sqlx repo related to parallel workers not being launched. However, this issue seems somewhat related to brianc/node-postgres#3344. The issue is showing very similar logs as the ones as I'm seeing and mentions that streaming query does not use parallel workers for some reason.
Description
Sqlx query is not taking advantage of parallel workers decided by query plan. It is taking double the time to execute query than other tools which are using parallel workers.
Reproduction steps
use eyre::Result;
use sqlx::{postgres::PgPoolOptions, Executor, Row};
use tracing;
#[tokio::main]
async fn main() -> Result<()> {
// Connect to database
let url = <TODO>;
let pool = PgPoolOptions::new()
.max_connections(5)
.after_connect(|conn, _| {
Box::pin(async move {
tracing::info!("New connection established");
// Enable statement logging
conn.execute("SET log_statement = 'all'").await?;
conn.execute("LOAD 'auto_explain'").await?;
conn.execute("SET auto_explain.log_min_duration = '0ms'")
.await?;
conn.execute("SET auto_explain.log_analyze = 'on'").await?;
conn.execute("SET auto_explain.log_buffers = 'on'").await?;
conn.execute("SET auto_explain.log_timing = 'on'").await?;
let settings = conn.fetch_all("SHOW ALL").await?;
tracing::info!("PostgreSQL settings: {:?}", settings);
Ok(())
})
})
.connect(url)
.await
.map_err(|e| eyre::eyre!("Failed to create database pool: {}", e))?;
// Create a wide table with many columns
sqlx::query(
r#"
CREATE TABLE IF NOT EXISTS wide_table (
id SERIAL PRIMARY KEY,
col1 TEXT, col2 TEXT, col3 TEXT, col4 TEXT, col5 TEXT,
col6 TEXT, col7 TEXT, col8 TEXT, col9 TEXT, col10 TEXT,
value INTEGER
)"#,
)
.execute(&pool)
.await?;
// Insert dummy data (1M rows to force parallel scan)
sqlx::query(
r#"
INSERT INTO wide_table (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, value)
SELECT
md5(random()::text), md5(random()::text), md5(random()::text),
md5(random()::text), md5(random()::text), md5(random()::text),
md5(random()::text), md5(random()::text), md5(random()::text),
md5(random()::text), floor(random() * 1000000)::int
FROM generate_series(1, 1000000)"#,
)
.execute(&pool)
.await?;
// Query that should trigger parallel scan
println!("Executing query with sqlx...");
let start = std::time::Instant::now();
let result = sqlx::query(
r#"
SELECT sum(value)
FROM wide_table
WHERE value > 50000000"#,
)
.fetch_optional(&pool)
.await?;
println!("Query took: {:?}", start.elapsed());
println!("Result: {}", result.is_some());
// Compare with direct psql
println!("\nTo compare, run this in psql:");
println!(
r#"
EXPLAIN (ANALYZE, BUFFERS)
SELECT sum(value) FROM wide_table WHERE value > 500000;
"#
);
Ok(())
}
Db logs from running query with sqlx with the above script
2025-01-09 10:42:13.363 GMT [24484] LOG: execute sqlx_s_2:
SELECT sum(value)
FROM wide_table
WHERE value > 50000000
2025-01-09 10:42:13.612 GMT [24484] LOG: duration: 249.180 ms plan:
Query Text:
SELECT sum(value)
FROM wide_table
WHERE value > 50000000
Finalize Aggregate (cost=54440.44..54440.45 rows=1 width=8) (actual time=249.176..249.177 rows=1 loops=1)
Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
-> Gather (cost=54440.22..54440.43 rows=2 width=8) (actual time=249.174..249.174 rows=1 loops=1)
Workers Planned: 2
Workers Launched: 0
Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
-> Partial Aggregate (cost=53440.22..53440.23 rows=1 width=8) (actual time=249.173..249.174 rows=1 loops=1)
Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
-> Parallel Seq Scan on wide_table (cost=0.00..53076.46 rows=145505 width=4) (actual time=249.171..249.171 rows=0 loops=1)
Filter: (value > 50000000)
Rows Removed by Filter: 1000000
Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
Db logs from running query with psql Workers Launched: 2
Query Text: LOAD 'auto_explain';
SET auto_explain.log_min_duration = '50ms';
SET auto_explain.log_analyze = 'on';
SET auto_explain.log_buffers = 'on';
SET auto_explain.log_timing = 'on';
EXPLAIN (ANALYZE, BUFFERS) SELECT sum(value) FROM wide_table WHERE value > 50000000
Finalize Aggregate (cost=53828.59..53828.60 rows=1 width=8) (actual time=231.420..232.170 rows=1 loops=1)
Buffers: shared hit=732 read=46888
-> Gather (cost=53828.38..53828.59 rows=2 width=8) (actual time=231.352..232.164 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=732 read=46888
-> Partial Aggregate (cost=52828.38..52828.39 rows=1 width=8) (actual time=227.663..227.663 rows=1 loops=3)
Buffers: shared hit=732 read=46888
-> Parallel Seq Scan on wide_table (cost=0.00..52828.27 rows=42 width=4) (actual time=227.659..227.660 rows=0 loops=3)
Filter: (value > 50000000)
Rows Removed by Filter: 333333
Buffers: shared hit=732 read=46888
SQLx version
"0.8.2"
Enabled SQLx features
"runtime-tokio-native-tls", "postgres", "time"
Database server and version
{ "version": "PostgreSQL 14.5 on aarch64-apple-darwin21.3.0, compiled by clang version 11.1.0, 64-bit" }
Operating system
MacOS 15.11
Rust version
rustc 1.82.0 (f6e511eec 2024-10-15) (built from a source tarball)