Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
loadtest:
strategy:
matrix:
kind: ['csv_agg', 'postgrest']
kind: ['csv_agg', 'csv_agg_delim', 'postgrest']
name: Loadtest
runs-on: ubuntu-24.04
steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ results/
*.diffs
pgbench_log.*
.history
pg_csv--*.sql
34 changes: 33 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,25 @@
[![Coverage Status](https://coveralls.io/repos/github/PostgREST/pg_csv/badge.svg)](https://coveralls.io/github/PostgREST/pg_csv)
[![Tests](https://github.com/PostgREST/pg_csv/actions/workflows/ci.yaml/badge.svg)](https://github.com/PostgREST/pg_csv/actions)

## Installation

Clone this repo and run:

```bash
make && make install
```

To install the extension:

```psql
create extension pg_csv;
```

## csv_agg

Aggregate that builds a CSV as per [RFC 4180](https://www.ietf.org/rfc/rfc4180.txt), quoting as required.

```
```psql
select csv_agg(x) from projects x;
csv_agg
-------------------
Expand All @@ -20,3 +34,21 @@ select csv_agg(x) from projects x;
5,Orphan,
(1 row)
```

It also supports adding a custom delimiter.

```psql
select csv_agg(x, '|') from projects x;
csv_agg
-------------------
id|name|client_id+
1|Windows 7|1 +
2|Windows 10|1 +
3|IOS|2 +
4|OSX|2 +
5|Orphan|
(1 row)
```

> [!IMPORTANT]
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.
5 changes: 5 additions & 0 deletions bench/csv_agg_delim.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
\set lim random(1000, 2000)

select csv_agg(t,'|') from (
select * from student_emotion_assessments limit :lim
) as t;
15 changes: 0 additions & 15 deletions sql/pg_csv--0.1.sql

This file was deleted.

19 changes: 16 additions & 3 deletions sql/pg_csv.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,26 @@ create function csv_agg_transfn(internal, anyelement)
language c
as 'pg_csv';

create function csv_agg_transfn(internal, anyelement, "char")
returns internal
language c
as 'pg_csv';

create function csv_agg_finalfn(internal)
returns text
language c
as 'pg_csv';

create aggregate csv_agg(anyelement, "char") (
sfunc = csv_agg_transfn,
stype = internal,
finalfunc = csv_agg_finalfn,
parallel = safe
);

create aggregate csv_agg(anyelement) (
sfunc = csv_agg_transfn,
stype = internal,
finalfunc = csv_agg_finalfn
sfunc = csv_agg_transfn,
stype = internal,
finalfunc = csv_agg_finalfn,
parallel = safe
);
31 changes: 19 additions & 12 deletions src/pg_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

PG_MODULE_MAGIC;

static const char NEWLINE = '\n';
static const char DELIMITER = ',';
static const char DQUOTE = '"';
static const char CR = '\r';
static const char NEWLINE = '\n';
static const char DQUOTE = '"';
static const char CR = '\r';

typedef struct {
StringInfoData accum_buf;
Expand All @@ -14,17 +13,21 @@ typedef struct {
TupleDesc tupdesc;
} CsvAggState;

static inline bool is_reserved(char c) {
return c == DQUOTE || c == NEWLINE || c == CR;
}

// Any comma, quote, CR, LF requires quoting as per RFC https://www.ietf.org/rfc/rfc4180.txt
static inline bool needs_quote(const char *s, size_t n) {
static inline bool needs_quote(const char *s, size_t n, char delim) {
while (n--) {
char c = *s++;
if (c == DELIMITER || c == DQUOTE || c == NEWLINE || c == CR) return true;
if (c == delim || is_reserved(c)) return true;
}
return false;
}

static inline void csv_append_field(StringInfo buf, const char *s, size_t n) {
if (!needs_quote(s, n)) {
static inline void csv_append_field(StringInfo buf, const char *s, size_t n, char delim) {
if (!needs_quote(s, n, delim)) {
appendBinaryStringInfo(buf, s, n);
} else {
appendStringInfoChar(buf, DQUOTE);
Expand Down Expand Up @@ -72,6 +75,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {

HeapTupleHeader next = PG_GETARG_HEAPTUPLEHEADER(1);

char delim = PG_NARGS() >= 3 && !PG_ARGISNULL(2) ? PG_GETARG_CHAR(2) : ',';

if (is_reserved(delim)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("delimiter cannot be newline, carriage return or double quote")));

// build header and cache tupdesc once
if (!state->header_done) {
TupleDesc tdesc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));
Expand All @@ -83,10 +90,10 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
continue;

if (i > 0) // only append delimiter after the first value
appendStringInfoChar(&state->accum_buf, DELIMITER);
appendStringInfoChar(&state->accum_buf, delim);

char *cstr = NameStr(att->attname);
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
}

appendStringInfoChar(&state->accum_buf, NEWLINE);
Expand Down Expand Up @@ -119,12 +126,12 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
if (att->attisdropped) // pg always keeps dropped columns, guard against this
continue;

if (i > 0) appendStringInfoChar(&state->accum_buf, DELIMITER);
if (i > 0) appendStringInfoChar(&state->accum_buf, delim);

if (nulls[i]) continue; // empty field for NULL

char *cstr = datum_to_cstring(datums[i], att->atttypid);
csv_append_field(&state->accum_buf, cstr, strlen(cstr));
csv_append_field(&state->accum_buf, cstr, strlen(cstr), delim);
}

PG_RETURN_POINTER(state);
Expand Down
2 changes: 1 addition & 1 deletion src/pg_prelude.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
#include <utils/json.h>
#include <utils/jsonb.h>
#include <utils/lsyscache.h>
#include <utils/typcache.h>
#include <utils/memutils.h>
#include <utils/regproc.h>
#include <utils/snapmgr.h>
#include <utils/typcache.h>
#include <utils/varlena.h>

#pragma GCC diagnostic pop
Expand Down
69 changes: 69 additions & 0 deletions test/expected/delimiters.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
-- semicolon delimiter
SELECT csv_agg(x, ';') AS body
FROM projects x;
body
-------------------------------
id;name;client_id +
1;Windows 7;1 +
2;has,comma;1 +
;; +
4;OSX;2 +
;"has""quote"; +
5;"has,comma and ""quote""";7+
6;"has +
LF";7 +
7;"has \r CR";8 +
8;"has \r +
CRLF""";8
(1 row)

-- pipe delimiter
SELECT csv_agg(x, '|') AS body
FROM projects x;
body
-------------------------------
id|name|client_id +
1|Windows 7|1 +
2|has,comma|1 +
|| +
4|OSX|2 +
|"has""quote"| +
5|"has,comma and ""quote"""|7+
6|"has +
LF"|7 +
7|"has \r CR"|8 +
8|"has \r +
CRLF"""|8
(1 row)

-- tab delimiter
SELECT csv_agg(x, E'\t') AS body
FROM projects x;
body
-------------------------------------------
id name client_id +
1 Windows 7 1 +
2 has,comma 1 +
+
4 OSX 2 +
"has""quote" +
5 "has,comma and ""quote""" 7+
6 "has +
LF" 7 +
7 "has \r CR" 8 +
8 "has \r +
CRLF""" 8
(1 row)

-- newline is forbidden as delimiter
SELECT csv_agg(x, E'\n') AS body
FROM projects x;
ERROR: delimiter cannot be newline, carriage return or double quote
-- double quote is forbidden as delimiter
SELECT csv_agg(x, '"') AS body
FROM projects x;
ERROR: delimiter cannot be newline, carriage return or double quote
-- carriage return is forbidden as delimiter
SELECT csv_agg(x, E'\r') AS body
FROM projects x;
ERROR: delimiter cannot be newline, carriage return or double quote
23 changes: 23 additions & 0 deletions test/sql/delimiters.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-- semicolon delimiter
SELECT csv_agg(x, ';') AS body
FROM projects x;

-- pipe delimiter
SELECT csv_agg(x, '|') AS body
FROM projects x;

-- tab delimiter
SELECT csv_agg(x, E'\t') AS body
FROM projects x;

-- newline is forbidden as delimiter
SELECT csv_agg(x, E'\n') AS body
FROM projects x;

-- double quote is forbidden as delimiter
SELECT csv_agg(x, '"') AS body
FROM projects x;

-- carriage return is forbidden as delimiter
SELECT csv_agg(x, E'\r') AS body
FROM projects x;
Loading