Skip to content

Parallel workers not used on Postgres #3673

Closed
@twitu

Description

@twitu

I have found these related issues/pull requests

I could not find any issues in sqlx repo related to parallel workers not being launched. However, this issue seems somewhat related to brianc/node-postgres#3344. The issue is showing very similar logs as the ones as I'm seeing and mentions that streaming query does not use parallel workers for some reason.

Description

Sqlx query is not taking advantage of parallel workers decided by query plan. It is taking double the time to execute query than other tools which are using parallel workers.

Reproduction steps

use eyre::Result;
use sqlx::{postgres::PgPoolOptions, Executor, Row};
use tracing;

#[tokio::main]
async fn main() -> Result<()> {
    // Connect to database
    let url = <TODO>;

    let pool = PgPoolOptions::new()
        .max_connections(5)
        .after_connect(|conn, _| {
            Box::pin(async move {
                tracing::info!("New connection established");
                // Enable statement logging
                conn.execute("SET log_statement = 'all'").await?;
                conn.execute("LOAD 'auto_explain'").await?;
                conn.execute("SET auto_explain.log_min_duration = '0ms'")
                    .await?;
                conn.execute("SET auto_explain.log_analyze = 'on'").await?;
                conn.execute("SET auto_explain.log_buffers = 'on'").await?;
                conn.execute("SET auto_explain.log_timing = 'on'").await?;

                let settings = conn.fetch_all("SHOW ALL").await?;
                tracing::info!("PostgreSQL settings: {:?}", settings);
                Ok(())
            })
        })
        .connect(url)
        .await
        .map_err(|e| eyre::eyre!("Failed to create database pool: {}", e))?;

    // Create a wide table with many columns
    sqlx::query(
        r#"
        CREATE TABLE IF NOT EXISTS wide_table (
            id SERIAL PRIMARY KEY,
            col1 TEXT, col2 TEXT, col3 TEXT, col4 TEXT, col5 TEXT,
            col6 TEXT, col7 TEXT, col8 TEXT, col9 TEXT, col10 TEXT,
            value INTEGER
        )"#,
    )
    .execute(&pool)
    .await?;

    // Insert dummy data (1M rows to force parallel scan)
    sqlx::query(
        r#"
        INSERT INTO wide_table (col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, value)
        SELECT 
            md5(random()::text), md5(random()::text), md5(random()::text),
            md5(random()::text), md5(random()::text), md5(random()::text),
            md5(random()::text), md5(random()::text), md5(random()::text),
            md5(random()::text), floor(random() * 1000000)::int
        FROM generate_series(1, 1000000)"#,
    )
    .execute(&pool)
    .await?;

    // Query that should trigger parallel scan
    println!("Executing query with sqlx...");
    let start = std::time::Instant::now();
    let result = sqlx::query(
        r#"
        SELECT sum(value) 
        FROM wide_table 
        WHERE value > 50000000"#,
    )
    .fetch_optional(&pool)
    .await?;

    println!("Query took: {:?}", start.elapsed());
    println!("Result: {}", result.is_some());

    // Compare with direct psql
    println!("\nTo compare, run this in psql:");
    println!(
        r#"
    EXPLAIN (ANALYZE, BUFFERS)
    SELECT sum(value) FROM wide_table WHERE value > 500000;
    "#
    );

    Ok(())
}

Db logs from running query with sqlx with the above script

2025-01-09 10:42:13.363 GMT [24484] LOG:  execute sqlx_s_2: 
	        SELECT sum(value) 
	        FROM wide_table 
	        WHERE value > 50000000
2025-01-09 10:42:13.612 GMT [24484] LOG:  duration: 249.180 ms  plan:
	Query Text: 
	        SELECT sum(value) 
	        FROM wide_table 
	        WHERE value > 50000000
	Finalize Aggregate  (cost=54440.44..54440.45 rows=1 width=8) (actual time=249.176..249.177 rows=1 loops=1)
	  Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
	  ->  Gather  (cost=54440.22..54440.43 rows=2 width=8) (actual time=249.174..249.174 rows=1 loops=1)
	        Workers Planned: 2
	        Workers Launched: 0
	        Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
	        ->  Partial Aggregate  (cost=53440.22..53440.23 rows=1 width=8) (actual time=249.173..249.174 rows=1 loops=1)
	              Buffers: shared hit=887 read=46733 dirtied=46748 written=46733
	              ->  Parallel Seq Scan on wide_table  (cost=0.00..53076.46 rows=145505 width=4) (actual time=249.171..249.171 rows=0 loops=1)
	                    Filter: (value > 50000000)
	                    Rows Removed by Filter: 1000000
	                    Buffers: shared hit=887 read=46733 dirtied=46748 written=46733

Db logs from running query with psql Workers Launched: 2

	Query Text: LOAD 'auto_explain';
	SET auto_explain.log_min_duration = '50ms';
	SET auto_explain.log_analyze = 'on';
	SET auto_explain.log_buffers = 'on';
	SET auto_explain.log_timing = 'on';
	EXPLAIN (ANALYZE, BUFFERS) SELECT sum(value) FROM wide_table WHERE value > 50000000
	
	Finalize Aggregate  (cost=53828.59..53828.60 rows=1 width=8) (actual time=231.420..232.170 rows=1 loops=1)
	  Buffers: shared hit=732 read=46888
	  ->  Gather  (cost=53828.38..53828.59 rows=2 width=8) (actual time=231.352..232.164 rows=3 loops=1)
	        Workers Planned: 2
	        Workers Launched: 2
	        Buffers: shared hit=732 read=46888
	        ->  Partial Aggregate  (cost=52828.38..52828.39 rows=1 width=8) (actual time=227.663..227.663 rows=1 loops=3)
	              Buffers: shared hit=732 read=46888
	              ->  Parallel Seq Scan on wide_table  (cost=0.00..52828.27 rows=42 width=4) (actual time=227.659..227.660 rows=0 loops=3)
	                    Filter: (value > 50000000)
	                    Rows Removed by Filter: 333333
	                    Buffers: shared hit=732 read=46888

SQLx version

"0.8.2"

Enabled SQLx features

"runtime-tokio-native-tls", "postgres", "time"

Database server and version

{ "version": "PostgreSQL 14.5 on aarch64-apple-darwin21.3.0, compiled by clang version 11.1.0, 64-bit" }

Operating system

MacOS 15.11

Rust version

rustc 1.82.0 (f6e511eec 2024-10-15) (built from a source tarball)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions