Skip to content

Commit d724494

Browse files
committed
feat: add custom delimiter to csv_agg
Addresses the problem on PostgREST/postgrest#1102 Adds an overloaded `csv_agg` function: ```sql SELECT csv_agg(x, '|') AS body FROM projects x; ```
1 parent 1748da4 commit d724494

File tree

10 files changed

+168
-33
lines changed

10 files changed

+168
-33
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
loadtest:
3838
strategy:
3939
matrix:
40-
kind: ['csv_agg', 'postgrest']
40+
kind: ['csv_agg', 'csv_agg_delim', 'postgrest']
4141
name: Loadtest
4242
runs-on: ubuntu-24.04
4343
steps:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ results/
77
*.diffs
88
pgbench_log.*
99
.history
10+
pg_csv--*.sql

README.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,25 @@
44
[![Coverage Status](https://coveralls.io/repos/github/PostgREST/pg_csv/badge.svg)](https://coveralls.io/github/PostgREST/pg_csv)
55
[![Tests](https://github.com/PostgREST/pg_csv/actions/workflows/ci.yaml/badge.svg)](https://github.com/PostgREST/pg_csv/actions)
66

7+
## Installation
8+
9+
Clone this repo and run:
10+
11+
```bash
12+
make && make install
13+
```
14+
15+
To install the extension:
16+
17+
```psql
18+
create extension pg_csv;
19+
```
20+
721
## csv_agg
822

923
Aggregate that builds a CSV as per [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), quoting as required.
1024

11-
```
25+
```psql
1226
select csv_agg(x) from projects x;
1327
csv_agg
1428
-------------------
@@ -20,3 +34,21 @@ select csv_agg(x) from projects x;
2034
5,Orphan,
2135
(1 row)
2236
```
37+
38+
It also supports adding a custom delimiter.
39+
40+
```psql
41+
select csv_agg(x, '|') from projects x;
42+
csv_agg
43+
-------------------
44+
id|name|client_id+
45+
1|Windows 7|1 +
46+
2|Windows 10|1 +
47+
3|IOS|2 +
48+
4|OSX|2 +
49+
5|Orphan|
50+
(1 row)
51+
```
52+
53+
> [!IMPORTANT]
54+
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.

bench/csv_agg_delim.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
\set lim random(1000, 2000)
2+
3+
select csv_agg(t,'|') from (
4+
select * from student_emotion_assessments limit :lim
5+
) as t;

sql/pg_csv--0.1.sql

Lines changed: 0 additions & 15 deletions
This file was deleted.

sql/pg_csv.sql

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,26 @@ create function csv_agg_transfn(internal, anyelement)
33
language c
44
as 'pg_csv';
55

6+
create function csv_agg_transfn(internal, anyelement, "char")
7+
returns internal
8+
language c
9+
as 'pg_csv';
10+
611
create function csv_agg_finalfn(internal)
712
returns text
813
language c
914
as 'pg_csv';
1015

16+
create aggregate csv_agg(anyelement, "char") (
17+
sfunc = csv_agg_transfn,
18+
stype = internal,
19+
finalfunc = csv_agg_finalfn,
20+
parallel = safe
21+
);
22+
1123
create aggregate csv_agg(anyelement) (
12-
sfunc = csv_agg_transfn,
13-
stype = internal,
14-
finalfunc = csv_agg_finalfn
24+
sfunc = csv_agg_transfn,
25+
stype = internal,
26+
finalfunc = csv_agg_finalfn,
27+
parallel = safe
1528
);

src/pg_csv.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22

33
PG_MODULE_MAGIC;
44

5-
static const char NEWLINE = '\n';
6-
static const char DELIMITER = ',';
7-
static const char DQUOTE = '"';
8-
static const char CR = '\r';
5+
static const char NEWLINE = '\n';
6+
static const char DQUOTE = '"';
7+
static const char CR = '\r';
98

109
typedef struct {
1110
StringInfoData accum_buf;
@@ -14,17 +13,21 @@ typedef struct {
1413
TupleDesc tupdesc;
1514
} CsvAggState;
1615

16+
static inline bool is_reserved(char c) {
17+
return c == DQUOTE || c == NEWLINE || c == CR;
18+
}
19+
1720
// Any comma, quote, CR, LF requires quoting as per RFC https://www.ietf.org/rfc/rfc4180.txt
18-
static inline bool needs_quote(const char *s, size_t n) {
21+
static inline bool needs_quote(const char *s, size_t n, char delim) {
1922
while (n--) {
2023
char c = *s++;
21-
if (c == DELIMITER || c == DQUOTE || c == NEWLINE || c == CR) return true;
24+
if (c == delim || is_reserved(c)) return true;
2225
}
2326
return false;
2427
}
2528

26-
static inline void csv_append_field(StringInfo buf, const char *s, size_t n) {
27-
if (!needs_quote(s, n)) {
29+
static inline void csv_append_field(StringInfo buf, const char *s, size_t n, char delim) {
30+
if (!needs_quote(s, n, delim)) {
2831
appendBinaryStringInfo(buf, s, n);
2932
} else {
3033
appendStringInfoChar(buf, DQUOTE);
@@ -72,6 +75,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
7275

7376
HeapTupleHeader next = PG_GETARG_HEAPTUPLEHEADER(1);
7477

78+
char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_CHAR(2) : ',';
79+
80+
if (is_reserved(delim)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("delimiter cannot be newline, carriage return or double quote")));
81+
7582
// build header and cache tupdesc once
7683
if (!state->header_done) {
7784
TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));
@@ -83,10 +90,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
8390
continue;
8491

8592
if (i > 0) // only append delimiter after the first value
86-
appendStringInfoChar(&state->accum_buf, DELIMITER);
93+
appendStringInfoChar(&state->accum_buf, delim);
8794

8895
char *cstr = NameStr(att->attname);
89-
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
96+
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
9097
}
9198

9299
appendStringInfoChar(&state->accum_buf, NEWLINE);
@@ -119,12 +126,12 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
119126
if (att->attisdropped) // pg always keeps dropped columns, guard against this
120127
continue;
121128

122-
if (i > 0) appendStringInfoChar(&state->accum_buf, DELIMITER);
129+
if (i > 0) appendStringInfoChar(&state->accum_buf, delim);
123130

124131
if (nulls[i]) continue; // empty field for NULL
125132

126133
char *cstr = datum_to_cstring(datums[i], att->atttypid);
127-
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
134+
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
128135
}
129136

130137
PG_RETURN_POINTER(state);

src/pg_prelude.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@
4141
#include <utils/json.h>
4242
#include <utils/jsonb.h>
4343
#include <utils/lsyscache.h>
44-
#include <utils/typcache.h>
4544
#include <utils/memutils.h>
4645
#include <utils/regproc.h>
4746
#include <utils/snapmgr.h>
47+
#include <utils/typcache.h>
4848
#include <utils/varlena.h>
4949

5050
#pragma GCC diagnostic pop

test/expected/delimiters.out

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
-- semicolon delimiter
2+
SELECT csv_agg(x, ';') AS body
3+
FROM projects x;
4+
body
5+
-------------------------------
6+
id;name;client_id +
7+
1;Windows 7;1 +
8+
2;has,comma;1 +
9+
;; +
10+
4;OSX;2 +
11+
;"has""quote"; +
12+
5;"has,comma and ""quote""";7+
13+
6;"has +
14+
LF";7 +
15+
7;"has \r CR";8 +
16+
8;"has \r +
17+
CRLF""";8
18+
(1 row)
19+
20+
-- pipe delimiter
21+
SELECT csv_agg(x, '|') AS body
22+
FROM projects x;
23+
body
24+
-------------------------------
25+
id|name|client_id +
26+
1|Windows 7|1 +
27+
2|has,comma|1 +
28+
|| +
29+
4|OSX|2 +
30+
|"has""quote"| +
31+
5|"has,comma and ""quote"""|7+
32+
6|"has +
33+
LF"|7 +
34+
7|"has \r CR"|8 +
35+
8|"has \r +
36+
CRLF"""|8
37+
(1 row)
38+
39+
-- tab delimiter
40+
SELECT csv_agg(x, E'\t') AS body
41+
FROM projects x;
42+
body
43+
-------------------------------------------
44+
id name client_id +
45+
1 Windows 7 1 +
46+
2 has,comma 1 +
47+
+
48+
4 OSX 2 +
49+
"has""quote" +
50+
5 "has,comma and ""quote""" 7+
51+
6 "has +
52+
LF" 7 +
53+
7 "has \r CR" 8 +
54+
8 "has \r +
55+
CRLF""" 8
56+
(1 row)
57+
58+
-- newline is forbidden as delimiter
59+
SELECT csv_agg(x, E'\n') AS body
60+
FROM projects x;
61+
ERROR: delimiter cannot be newline, carriage return or double quote
62+
-- double quote is forbidden as delimiter
63+
SELECT csv_agg(x, '"') AS body
64+
FROM projects x;
65+
ERROR: delimiter cannot be newline, carriage return or double quote
66+
-- carriage return is forbidden as delimiter
67+
SELECT csv_agg(x, E'\r') AS body
68+
FROM projects x;
69+
ERROR: delimiter cannot be newline, carriage return or double quote

test/sql/delimiters.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- semicolon delimiter
2+
SELECT csv_agg(x, ';') AS body
3+
FROM projects x;
4+
5+
-- pipe delimiter
6+
SELECT csv_agg(x, '|') AS body
7+
FROM projects x;
8+
9+
-- tab delimiter
10+
SELECT csv_agg(x, E'\t') AS body
11+
FROM projects x;
12+
13+
-- newline is forbidden as delimiter
14+
SELECT csv_agg(x, E'\n') AS body
15+
FROM projects x;
16+
17+
-- double quote is forbidden as delimiter
18+
SELECT csv_agg(x, '"') AS body
19+
FROM projects x;
20+
21+
-- carriage return is forbidden as delimiter
22+
SELECT csv_agg(x, E'\r') AS body
23+
FROM projects x;

0 commit comments

Comments
 (0)