1
1
import type { ClickHouse , RawTaskRunPayloadV1 , TaskRunV2 } from "@internal/clickhouse" ;
2
- import { RedisOptions } from "@internal/redis" ;
2
+ import { type RedisOptions } from "@internal/redis" ;
3
3
import {
4
4
LogicalReplicationClient ,
5
5
type MessageDelete ,
@@ -8,14 +8,13 @@ import {
8
8
type PgoutputMessage ,
9
9
} from "@internal/replication" ;
10
10
import { recordSpanError , startSpan , trace , type Tracer } from "@internal/tracing" ;
11
- import { Logger , LogLevel } from "@trigger.dev/core/logger" ;
11
+ import { Logger , type LogLevel } from "@trigger.dev/core/logger" ;
12
12
import { tryCatch } from "@trigger.dev/core/utils" ;
13
13
import { parsePacketAsJson } from "@trigger.dev/core/v3/utils/ioSerialization" ;
14
- import { TaskRun } from "@trigger.dev/database" ;
14
+ import { type TaskRun } from "@trigger.dev/database" ;
15
15
import { nanoid } from "nanoid" ;
16
16
import EventEmitter from "node:events" ;
17
17
import pLimit from "p-limit" ;
18
- import { logger } from "./logger.server" ;
19
18
import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings" ;
20
19
21
20
interface TransactionEvent < T = any > {
@@ -130,6 +129,7 @@ export class RunsReplicationService {
130
129
flushInterval : options . flushIntervalMs ?? 100 ,
131
130
maxConcurrency : options . maxFlushConcurrency ?? 100 ,
132
131
callback : this . #flushBatch. bind ( this ) ,
132
+ // we can do some pre-merging to reduce the amount of data we need to send to clickhouse
133
133
mergeBatch : ( existingBatch : TaskRunInsert [ ] , newBatch : TaskRunInsert [ ] ) => {
134
134
const merged = new Map < string , TaskRunInsert > ( ) ;
135
135
@@ -487,11 +487,33 @@ export class RunsReplicationService {
487
487
488
488
const taskRunInserts = preparedInserts
489
489
. map ( ( { taskRunInsert } ) => taskRunInsert )
490
- . filter ( Boolean ) ;
490
+ . filter ( Boolean )
491
+ // batch inserts in clickhouse are more performant if the items
492
+ // are pre-sorted by the primary key
493
+ . sort ( ( a , b ) => {
494
+ if ( a . organization_id !== b . organization_id ) {
495
+ return a . organization_id < b . organization_id ? - 1 : 1 ;
496
+ }
497
+ if ( a . project_id !== b . project_id ) {
498
+ return a . project_id < b . project_id ? - 1 : 1 ;
499
+ }
500
+ if ( a . environment_id !== b . environment_id ) {
501
+ return a . environment_id < b . environment_id ? - 1 : 1 ;
502
+ }
503
+ if ( a . created_at !== b . created_at ) {
504
+ return a . created_at - b . created_at ;
505
+ }
506
+ return a . run_id < b . run_id ? - 1 : 1 ;
507
+ } ) ;
491
508
492
509
const payloadInserts = preparedInserts
493
510
. map ( ( { payloadInsert } ) => payloadInsert )
494
- . filter ( Boolean ) ;
511
+ . filter ( Boolean )
512
+ // batch inserts in clickhouse are more performant if the items
513
+ // are pre-sorted by the primary key
514
+ . sort ( ( a , b ) => {
515
+ return a . run_id < b . run_id ? - 1 : 1 ;
516
+ } ) ;
495
517
496
518
span . setAttribute ( "task_run_inserts" , taskRunInserts . length ) ;
497
519
span . setAttribute ( "payload_inserts" , payloadInserts . length ) ;
0 commit comments