From 0c1904170589545a534f24d79dfe4500f2427dcd Mon Sep 17 00:00:00 2001 From: Arthur Nascimento Date: Tue, 6 Jan 2026 13:36:49 -0300 Subject: [PATCH 01/11] Makefile: fix race condition on cypher_gram_def.h (#2273) The file cypher_gram.c generates cypher_gram_def.h, which is directly necessary for cypher_parser.o and cypher_keywords.o and their respective .bc files. But that direct dependency is not reflected in the Makefile, which only had the indirect dependency of .o on .c. So on high parallel builds, the .h may not have been generated by bison yet. Additionally, the .bc files should have the same dependencies as the .o files, but those are lacking. Here is an example output where the .bc file fails to build, as it was running concurrently with the bison instance that was about to finalize cypher_gram_def.h: In file included from src/backend/parser/cypher_parser.c:24: clang-17 -Wno-ignored-attributes -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-command-line-argument -Wno-compound-token-split-by-macro -O2 -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -flto=thin -emit-llvm -c -o src/backend/parser/cypher_parser.bc src/backend/parser/cypher_parser.c .//src/include/parser/cypher_gram.h:65:10: fatal error: 'parser/cypher_gram_def.h' file not found 65 | #include "parser/cypher_gram_def.h" | ^~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. make: *** [/usr/pgsql-17/lib/pgxs/src/makefiles/../../src/Makefile.global:1085: src/backend/parser/cypher_parser.bc] Error 1 make: *** Waiting for unfinished jobs.... gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wshadow=compatible-local -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-format-truncation -O2 -g -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables -fPIC -fvisibility=hidden -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -c -o src/backend/catalog/ag_label.o src/backend/catalog/ag_label.c /usr/bin/bison -Wno-deprecated --defines=src/include/parser/cypher_gram_def.h -o src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.y Previously, cypher_parser.o was missing the dependency, so it could start before cypher_gram_def.h was available: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. As well as cypher_parser.bc, missing the dependency on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. Now cypher_parser.o correctly depends on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. And cypher_parser.bc correctly depends on cypher_gram_def.h as well: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e73f3e68..0392d8b85 100644 --- a/Makefile +++ b/Makefile @@ -147,8 +147,10 @@ src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c +src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h $(age_sql): @cat $(SQLS) > $@ From 5d4f13de64bbe4a1a982797274d51f45126843a8 Mon Sep 17 00:00:00 2001 From: M15terHyde <59905806+M15terHyde@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:52:33 -0600 Subject: [PATCH 02/11] Revise README for Python driver updates (#2298) Updated README to from psycopg2 to psycopg3 (psycopg) --- drivers/python/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/python/README.md b/drivers/python/README.md index 749b44bfb..e64f9de67 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -28,11 +28,11 @@ AGType parser and driver support for [Apache AGE](https://age.apache.org/), grap ### Features * Unmarshal AGE result data(AGType) to Vertex, Edge, Path -* Cypher query support for Psycopg2 PostgreSQL driver (enables to use cypher queries directly) +* Cypher query support for Psycopg3 PostgreSQL driver (enables to use cypher queries directly) ### Prerequisites * over Python 3.9 -* This module runs on [psycopg2](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) +* This module runs on [psycopg3](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) ``` sudo apt-get update sudo apt-get install python3-dev libpq-dev @@ -80,7 +80,7 @@ SET search_path = ag_catalog, "$user", public; ``` ### Usage -* If you are not familiar with Psycopg2 driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) +* If you are not familiar with Psycopg driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) * Simpler way to access Apache AGE [AGE Sample](samples/apache-age-note.ipynb) in Samples. * Agtype converting samples: [Agtype Sample](samples/apache-age-agtypes.ipynb) in Samples. @@ -119,7 +119,7 @@ Here the following value required Insert From networkx directed graph into an AGE database. #### Parameters -- `connection` (psycopg2.connect): Connection object to the AGE database. +- `connection` (psycopg.connect): Connection object to the AGE database. - `G` (networkx.DiGraph): Networkx directed graph to be converted and inserted. @@ -152,7 +152,7 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. #### Parameters -- `connection` (psycopg2.connect): Connection object to the PostgreSQL database. +- `connection` (psycopg.connect): Connection object to the PostgreSQL database. - `graphName` (str): Name of the graph. - `G` (None | nx.DiGraph): Optional Networkx directed graph. If provided, the data will be added to this graph. - `query` (str | None): Optional Cypher query to retrieve data from the database. @@ -167,3 +167,4 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. # Call the function to convert data into a Networkx graph graph = age_to_networkx(connection, graphName="MyGraph" ) ``` + From a9fbdaeb60f2c151f203d8650a91b0636d967366 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:27:47 -0800 Subject: [PATCH 03/11] Fix Issue 2289: handle empty list in IN expression (#2294) NOTE: This PR was created with AI tools and a human. When evaluating 'x IN []' with an empty list, the transform_AEXPR_IN function would return NULL because no expressions were processed. This caused a 'cache lookup failed for type 0' error downstream. This fix adds an early check for the empty list case: - 'x IN []' returns false (nothing can be in an empty list) Additional NOTE: Cypher does not have 'NOT IN' syntax. To check if a value is NOT in a list, use 'NOT (x IN list)'. The NOT operator will invert the false from an empty list to true as expected. The fix returns a boolean constant directly, avoiding the NULL result that caused the type lookup failure. Added regression tests. modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/parser/cypher_expr.c --- regress/expected/expr.out | 72 ++++++++++++++++++++++++++++++++ regress/sql/expr.sql | 23 ++++++++++ src/backend/parser/cypher_expr.c | 30 ++++++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 926a958d6..6d9341451 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -319,6 +319,50 @@ $$RETURN 1 IN [[null]]$$) AS r(c boolean); f (1 row) +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -9155,9 +9199,37 @@ ERROR: could not find rte for x LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... ^ HINT: variable x does not exist within scope of usage +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +NOTICE: graph "issue_2289" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + v +------- + false +(1 row) + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2289._ag_label_vertex +drop cascades to table issue_2289._ag_label_edge +NOTICE: graph "issue_2289" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_2263', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_2263._ag_label_vertex diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 7bf1f26b2..445e2d237 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -157,6 +157,20 @@ SELECT * FROM cypher('expr', $$RETURN 1 in [[1]]$$) AS r(c boolean); SELECT * FROM cypher('expr', $$RETURN 1 IN [[null]]$$) AS r(c boolean); +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -3690,9 +3704,18 @@ SELECT * FROM cypher('issue_2263', $$ CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) $$) AS (out agtype); +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 5f4de86b9..fc0335def 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -600,6 +600,34 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) Assert(is_ag_node(a->rexpr, cypher_list)); + rexpr = (cypher_list *)a->rexpr; + + /* + * Handle empty list case: x IN [] is always false, x NOT IN [] is always true. + * We need to check this before processing to avoid returning NULL result + * which causes "cache lookup failed for type 0" error. + */ + if (rexpr->elems == NIL || list_length((List *)rexpr->elems) == 0) + { + Datum bool_value; + Const *const_result; + + /* If operator is <> (NOT IN), result is true; otherwise (IN) result is false */ + if (strcmp(strVal(linitial(a->name)), "<>") == 0) + { + bool_value = BoolGetDatum(true); + } + else + { + bool_value = BoolGetDatum(false); + } + + const_result = makeConst(BOOLOID, -1, InvalidOid, sizeof(bool), + bool_value, false, true); + + return (Node *)const_result; + } + /* If the operator is <>, combine with AND not OR. */ if (strcmp(strVal(linitial(a->name)), "<>") == 0) { @@ -614,8 +642,6 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) rexprs = rvars = rnonvars = NIL; - rexpr = (cypher_list *)a->rexpr; - foreach(l, (List *) rexpr->elems) { Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l)); From 41cb5ca4e6c9ea45f087606b24163e293c7f45d8 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:55:36 -0800 Subject: [PATCH 04/11] Fix and improve index.sql regression test coverage (#2300) NOTE: This PR was created with AI tools and a human. - Remove unused copy command (leftover from deleted agload_test_graph test) - Replace broken Section 4 that referenced non-existent graph with comprehensive WHERE clause tests covering string, int, bool, and float properties with AND/OR/NOT operators - Add EXPLAIN tests to verify index usage: - Section 3: Validate GIN indices (load_city_gin_idx, load_country_gin_idx) show Bitmap Index Scan for property matching - Section 4: Validate all expression indices (city_country_code_idx, city_id_idx, city_west_coast_idx, country_life_exp_idx) show Index Scan for WHERE clause filtering All indices now have EXPLAIN verification confirming they are used as expected. modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 290 ++++++++++++++++++++++++++++++++++--- regress/sql/index.sql | 174 ++++++++++++++++++++-- 2 files changed, 436 insertions(+), 28 deletions(-) diff --git a/regress/expected/index.out b/regress/expected/index.out index 3ed7b1c33..9faead660 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; SET enable_mergejoin = ON; @@ -385,6 +384,19 @@ CREATE INDEX load_city_gin_idx ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on "City" c + Recheck Cond: (properties @> '{"city_id": 1}'::agtype) + -> Bitmap Index Scan on load_city_gin_idx + Index Cond: (properties @> '{"city_id": 1}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) RETURN c @@ -418,6 +430,19 @@ $$) as (n agtype); {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex (4 rows) +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------------------- + Bitmap Heap Scan on "Country" c + Recheck Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) + -> Bitmap Index Scan on load_country_gin_idx + Index Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -441,26 +466,259 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Scan using city_country_code_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(2 rows) + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" + "Los Angeles" + "Seattle" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + name +------ +(0 rows) + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); - count -------- - 0 +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Index Scan using city_id_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"city_id"'::agtype]) = '1'::agtype) +(2 rows) + +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" (1 row) -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + name +------------- + "Vancouver" +(1 row) + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" +(2 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Monterrey" + "Tijuana" +(2 rows) + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); -ERROR: graph "agload_test_graph" does not exist -LINE 1: SELECT COUNT(*) FROM cypher('agload_test_graph', $$ - ^ +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) +(2 rows) + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" + "Vancouver" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +--------------- + "New York" + "Toronto" + "Montreal" + "Mexico City" + "Monterrey" + "Tijuana" +(6 rows) + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" +(3 rows) + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "New York" + "Vancouver" +(2 rows) + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" +(1 row) + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Index Scan using country_life_exp_idx on "Country" c + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"life_expectancy"'::agtype]) > '80.0'::agtype) +(2 rows) + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Canada" +(1 row) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Mexico" +(1 row) + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- @@ -478,5 +736,3 @@ NOTICE: graph "cypher_index" has been dropped (1 row) -SELECT drop_graph('agload_test_graph', true); -ERROR: graph "agload_test_graph" does not exist diff --git a/regress/sql/index.sql b/regress/sql/index.sql index d9a4331a4..96e7dd81a 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -17,8 +17,6 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load - LOAD 'age'; SET search_path TO ag_catalog; @@ -219,6 +217,11 @@ ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) @@ -235,6 +238,12 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c $$) as (n agtype); +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -250,23 +259,166 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); + +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); + +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); +$$) as (plan agtype); -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); + +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); +$$) as (plan agtype); + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); + +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- SELECT drop_graph('cypher_index', true); -SELECT drop_graph('agload_test_graph', true); From 989526e3ff297fc45a427c7893fbb456bee409b2 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 10 Jan 2026 08:37:54 -0800 Subject: [PATCH 05/11] Fix and improve index.sql addendum (#2301) NOTE: This PR was created with the help of AI tools and a human. Added additional requested regression tests - *EXPLAIN for pattern with WHERE clause *EXPLAIN for pattern with filters on both country and city modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 34 ++++++++++++++++++++++++++++++++++ regress/sql/index.sql | 14 ++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/regress/expected/index.out b/regress/expected/index.out index 9faead660..745cab269 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -626,6 +626,19 @@ $$) as (name agtype); "Tijuana" (6 rows) +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(3 rows) + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -700,6 +713,27 @@ $$) as (name agtype); "Mexico" (1 row) +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Index Scan using city_west_coast_idx on "City" city + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + -> Bitmap Heap Scan on has_city _age_default_alias_0 + Recheck Cond: (start_id = city.id) + -> Bitmap Index Scan on has_city_start_id_idx + Index Cond: (start_id = city.id) + -> Index Scan using "Country_pkey" on "Country" country + Index Cond: (id = _age_default_alias_0.end_id) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"CA"'::agtype) +(11 rows) + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) diff --git a/regress/sql/index.sql b/regress/sql/index.sql index 96e7dd81a..a6e075c70 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -357,6 +357,13 @@ SELECT * FROM cypher('cypher_index', $$ ORDER BY a.city_id $$) as (name agtype); +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -404,6 +411,13 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c.name $$) as (name agtype); +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) From 0f0abc5bf47878457a3f01aaa0c8620855cf4329 Mon Sep 17 00:00:00 2001 From: Jean-Paul Abbuehl Date: Mon, 12 Jan 2026 21:10:08 +0100 Subject: [PATCH 06/11] feat: Add 32-bit platform support for graphid type (#2286) * feat: Add 32-bit platform support for graphid type This enables AGE to work on 32-bit platforms including WebAssembly (WASM). Problem: - graphid is int64 (8 bytes) with PASSEDBYVALUE - On 32-bit systems, Datum is only 4 bytes - PostgreSQL rejects pass-by-value types larger than Datum Solution: - Makefile-only change (no C code modifications) - When SIZEOF_DATUM=4 is passed to make, strip PASSEDBYVALUE from the generated SQL - If not specified, normal 64-bit behavior is preserved (PASSEDBYVALUE kept) This change is backward compatible: - 64-bit systems continue using pass-by-value - 32-bit systems now work with pass-by-reference Motivation: PGlite (PostgreSQL compiled to WebAssembly) uses 32-bit pointers and requires this patch to run AGE. Tested on: - 64-bit Linux (regression tests pass) - 32-bit WebAssembly via PGlite (all operations work) Co-authored-by: abbuehlj --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0392d8b85..17f2ed653 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,10 @@ PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4) +# When SIZEOF_DATUM=4, PASSEDBYVALUE is stripped from graphid type for pass-by-reference. +# If not specified, normal 64-bit behavior is used (PASSEDBYVALUE preserved). + src/backend/parser/cypher_keywords.o: src/include/parser/cypher_kwlist_d.h src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_KEYWORDLIST_DEPS) @@ -152,8 +156,14 @@ src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/includ src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h -$(age_sql): +# Strip PASSEDBYVALUE on 32-bit (SIZEOF_DATUM=4) for graphid pass-by-reference +$(age_sql): $(SQLS) @cat $(SQLS) > $@ +ifeq ($(SIZEOF_DATUM),4) + @echo "32-bit build: removing PASSEDBYVALUE from graphid type" + @sed 's/^ PASSEDBYVALUE,$$/ -- PASSEDBYVALUE removed for 32-bit (see Makefile)/' $@ > $@.tmp && mv $@.tmp $@ + @grep -q 'PASSEDBYVALUE removed for 32-bit' $@ || { echo "Error: PASSEDBYVALUE replacement failed in $@"; exit 1; } +endif src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes From 22dc725c59ec574d470b79e32acdb921f6697520 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 16 Jan 2026 15:12:22 -0800 Subject: [PATCH 07/11] Optimize vertex/edge field access with direct array indexing (#2302) NOTE: This PR was created using AI tools and a human. Leverage deterministic key ordering from uniqueify_agtype_object() to access vertex/edge fields in O(1) instead of O(log n) binary search. Fields are sorted by key length, giving fixed positions: - Vertex: id(0), label(1), properties(2) - Edge: id(0), label(1), end_id(2), start_id(3), properties(4) Changes: - Add field index constants and accessor macros to agtype.h - Update age_id(), age_start_id(), age_end_id(), age_label(), age_properties() to use direct field access - Add fill_agtype_value_no_copy() for read-only scalar extraction without memory allocation - Add compare_agtype_scalar_containers() fast path for scalar comparison - Update hash_agtype_value(), equals_agtype_scalar_value(), and compare_agtype_scalar_values() to use direct field access macros - Add fast path in get_one_agtype_from_variadic_args() bypassing extract_variadic_args() for single argument case - Add comprehensive regression test (30 tests) Performance impact: Improves ORDER BY, hash joins, aggregations, and Cypher functions (id, start_id, end_id, label, properties) on vertices and edges. All previous regression tests were not impacted. Additional regression test added to enhance coverage. modified: Makefile new file: regress/expected/direct_field_access.out new file: regress/sql/direct_field_access.sql modified: src/backend/utils/adt/agtype.c modified: src/backend/utils/adt/agtype_util.c modified: src/include/utils/agtype.h --- Makefile | 3 +- regress/expected/direct_field_access.out | 535 +++++++++++++++++++++++ regress/sql/direct_field_access.sql | 319 ++++++++++++++ src/backend/utils/adt/agtype.c | 136 +++++- src/backend/utils/adt/agtype_util.c | 237 +++++++++- src/include/utils/agtype.h | 103 +++++ 6 files changed, 1304 insertions(+), 29 deletions(-) create mode 100644 regress/expected/direct_field_access.out create mode 100644 regress/sql/direct_field_access.sql diff --git a/Makefile b/Makefile index 17f2ed653..ffad7d6af 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection + map_projection \ + direct_field_access ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/direct_field_access.out b/regress/expected/direct_field_access.out new file mode 100644 index 000000000..0a059cdd9 --- /dev/null +++ b/regress/expected/direct_field_access.out @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('direct_access'); +NOTICE: graph "direct_access" has been created + create_graph +-------------- + +(1 row) + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +-------+------+------+------- + false | true | true | false +(1 row) + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + lt | gt +------+------ + true | true +(1 row) + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + eq | ne +----+---- + | +(1 row) + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + n +--- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + n +--- + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1 +(9 rows) + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + s +---------- + "apple" + "banana" + "cherry" + "date" +(4 rows) + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + f +------ + 1.41 + 1.73 + 2.71 + 3.14 +(4 rows) + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + b +------- + false + false + true + true +(4 rows) + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + max_vertex +---------------------------------------------------------------------------------------------- + {"id": 844424930131973, "label": "Person", "properties": {"age": 32, "name": "Eve"}}::vertex +(1 row) + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + min_vertex +------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex +(1 row) + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + max_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131973, "properties": {"since": 2022}}::edge +(1 row) + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + min_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge +(1 row) + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" + "Eve" +(5 rows) + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + name +----------- + "Eve" + "Diana" + "Charlie" + "Bob" + "Alice" +(5 rows) + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + since +------- + 2020 + 2019 + 2021 + 2018 + 2022 +(5 rows) + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + a_name | b_name +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Alice" | "Diana" + "Alice" | "Eve" + "Bob" | "Charlie" + "Bob" | "Diana" + "Bob" | "Eve" + "Charlie" | "Diana" + "Charlie" | "Eve" + "Diana" | "Eve" +(10 rows) + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + eq | ne | lt | gt +------+------+------+------ + true | true | true | true +(1 row) + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + add | sub | mul | div +-----+-----+-----+----- + 15 | 5 | 50 | 2 +(1 row) + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + upper | lower | sz +---------+---------+---- + "HELLO" | "world" | 4 +(1 row) + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + int_val | float_val | str_val +---------+-----------+--------- + 42 | 3.14 | "42" +(1 row) + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + vertex_id +----------------- + 844424930131969 +(1 row) + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + edge_id +------------------ + 1125899906842625 +(1 row) + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + start_id | alice_id +-----------------+----------------- + 844424930131969 | 844424930131969 +(1 row) + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + end_id | bob_id +-----------------+----------------- + 844424930131970 | 844424930131970 +(1 row) + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + vertex_label +-------------- + "Person" +(1 row) + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + edge_label +------------ + "KNOWS" +(1 row) + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + vertex_props +------------------------------ + {"age": 30, "name": "Alice"} +(1 row) + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + edge_props +----------------- + {"since": 2020} +(1 row) + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + a_id | a_label | a_name | r_id | r_start | r_end | r_label | r_since | b_id | b_label | b_name +-----------------+----------+---------+------------------+-----------------+-----------------+---------+---------+-----------------+----------+-------- + 844424930131969 | "Person" | "Alice" | 1125899906842625 | 844424930131969 | 844424930131970 | "KNOWS" | 2020 | 844424930131970 | "Person" | "Bob" +(1 row) + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + eq | lt +------+------ + true | true +(1 row) + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + eq +------ + true +(1 row) + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + big_gt | neg_lt +--------+-------- + true | true +(1 row) + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + lt | eq +------+------ + true | true +(1 row) + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + zero_eq +--------- + true +(1 row) + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table direct_access._ag_label_vertex +drop cascades to table direct_access._ag_label_edge +drop cascades to table direct_access."Person" +drop cascades to table direct_access."KNOWS" +NOTICE: graph "direct_access" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/direct_field_access.sql b/regress/sql/direct_field_access.sql new file mode 100644 index 000000000..c8060be4a --- /dev/null +++ b/regress/sql/direct_field_access.sql @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('direct_access'); + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- + +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- + +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- + +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- + +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- + +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- + +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 02fc3221c..f2458a30b 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -5409,10 +5409,24 @@ Datum age_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("id() argument must be a vertex, an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: id is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_ID(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_ID(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("id() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5447,10 +5461,11 @@ Datum age_start_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("start_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "start_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: start_id is at index 3 for edge + * objects due to key length sorting (id=0, label=1, end_id=2, start_id=3). + */ + agtv_result = AGTYPE_EDGE_GET_START_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5485,10 +5500,11 @@ Datum age_end_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("end_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "end_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: end_id is at index 2 for edge + * objects due to key length sorting (id=0, label=1, end_id=2). + */ + agtv_result = AGTYPE_EDGE_GET_END_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -6038,10 +6054,25 @@ Datum age_properties(PG_FUNCTION_ARGS) errmsg("properties() argument must be a vertex, an edge or null"))); } - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "properties"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_OBJECT); + /* + * Direct field access optimization: properties is at index 2 for vertex + * (id=0, label=1, properties=2) and index 4 for edge (id=0, label=1, + * end_id=2, start_id=3, properties=4) due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_PROPERTIES(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_PROPERTIES(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("properties() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -7170,8 +7201,24 @@ Datum age_label(PG_FUNCTION_ARGS) } - /* extract the label agtype value from the vertex or edge */ - label = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_value, "label"); + /* + * Direct field access optimization: label is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_value->type == AGTV_VERTEX) + { + label = AGTYPE_VERTEX_GET_LABEL(agtv_value); + } + else if (agtv_value->type == AGTV_EDGE) + { + label = AGTYPE_EDGE_GET_LABEL(agtv_value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(label)); } @@ -10507,6 +10554,59 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, Oid *types = NULL; agtype *agtype_result = NULL; + /* + * Fast path optimization: For non-variadic calls where the argument + * is already an agtype, we can avoid the overhead of extract_variadic_args + * which allocates three arrays. This is the common case for most agtype + * comparison and arithmetic operators. + */ + if (!get_fn_expr_variadic(fcinfo->flinfo)) + { + int total_args = PG_NARGS(); + int actual_nargs = total_args - variadic_offset; + + /* Verify expected number of arguments */ + if (actual_nargs != expected_nargs) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of args %d does not match expected %d", + actual_nargs, expected_nargs))); + } + + /* Check for SQL NULL */ + if (PG_ARGISNULL(variadic_offset)) + { + return NULL; + } + + /* Check if the argument is already an agtype */ + if (get_fn_expr_argtype(fcinfo->flinfo, variadic_offset) == AGTYPEOID) + { + agtype_container *agtc; + + agtype_result = DATUM_GET_AGTYPE_P(PG_GETARG_DATUM(variadic_offset)); + agtc = &agtype_result->root; + + /* + * Is this a scalar (scalars are stored as one element arrays)? + * If so, test for agtype NULL. + */ + if (AGTYPE_CONTAINER_IS_SCALAR(agtc) && + AGTE_IS_NULL(agtc->children[0])) + { + return NULL; + } + + return agtype_result; + } + + /* + * Not an agtype, need to convert. Fall through to use + * extract_variadic_args for type conversion handling. + */ + } + + /* Standard path using extract_variadic_args */ nargs = extract_variadic_args(fcinfo, variadic_offset, false, &args, &types, &nulls); /* throw an error if the number of args is not the expected number */ diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c index 01a965cdd..b39723413 100644 --- a/src/backend/utils/adt/agtype_util.c +++ b/src/backend/utils/adt/agtype_util.c @@ -41,6 +41,14 @@ #include "utils/agtype_ext.h" +/* + * Extended type header macros - must match definitions in agtype_ext.c. + * These are used for deserializing extended agtype values (INTEGER, FLOAT, + * VERTEX, EDGE, PATH) from their binary representation. + */ +#define AGT_HEADER_TYPE uint32 +#define AGT_HEADER_SIZE sizeof(AGT_HEADER_TYPE) + /* * Maximum number of elements in an array (or key/value pairs in an object). * This is limited by two things: the size of the agtentry array must fit @@ -56,6 +64,11 @@ static void fill_agtype_value(agtype_container *container, int index, char *base_addr, uint32 offset, agtype_value *result); +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result); +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b); static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b); static agtype *convert_to_agtype(agtype_value *val); static void convert_agtype_value(StringInfo buffer, agtentry *header, @@ -264,6 +277,24 @@ int compare_agtype_containers_orderability(agtype_container *a, agtype_iterator *itb; int res = 0; + /* + * Fast path optimization for scalar values. + * + * The most common case in ORDER BY and comparison operations is comparing + * scalar values (integers, strings, floats, etc.). For these cases, we can + * avoid the overhead of the full iterator machinery by directly extracting + * and comparing the scalar values. + * + * This provides significant performance improvement because: + * 1. We avoid allocating two agtype_iterator structures + * 2. We avoid the iterator state machine overhead + * 3. We use no-copy extraction where possible + */ + if (AGTYPE_CONTAINER_IS_SCALAR(a) && AGTYPE_CONTAINER_IS_SCALAR(b)) + { + return compare_agtype_scalar_containers(a, b); + } + ita = agtype_iterator_init(a); itb = agtype_iterator_init(b); @@ -751,6 +782,173 @@ static void fill_agtype_value(agtype_container *container, int index, } } +/* + * A helper function to fill in an agtype_value WITHOUT making deep copies. + * This is used for read-only comparison operations where the agtype_value + * will not outlive the container data. The caller MUST NOT free the + * agtype_value content or use it after the container is freed. + * + * This function provides significant performance improvements for comparison + * operations by avoiding palloc/memcpy for strings and numerics. + * + * Note: For AGTV_STRING, val.string.val points directly into container data. + * Note: For AGTV_NUMERIC, val.numeric points directly into container data. + * Note: Extended types (VERTEX, EDGE, PATH) still require deserialization, + * so they use the standard fill_agtype_value path. + */ +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result) +{ + agtentry entry = container->children[index]; + + if (AGTE_IS_NULL(entry)) + { + result->type = AGTV_NULL; + } + else if (AGTE_IS_STRING(entry)) + { + result->type = AGTV_STRING; + /* Point directly into the container data - no copy */ + result->val.string.val = base_addr + offset; + result->val.string.len = get_agtype_length(container, index); + } + else if (AGTE_IS_NUMERIC(entry)) + { + result->type = AGTV_NUMERIC; + /* Point directly into the container data - no copy */ + result->val.numeric = (Numeric)(base_addr + INTALIGN(offset)); + } + else if (AGTE_IS_AGTYPE(entry)) + { + /* + * For extended types (INTEGER, FLOAT, VERTEX, EDGE, PATH), we need + * to deserialize. INTEGER and FLOAT don't allocate, but composite + * types (VERTEX, EDGE, PATH) do. For simple scalar comparisons, + * we handle INTEGER and FLOAT directly here. + */ + char *base = base_addr + INTALIGN(offset); + AGT_HEADER_TYPE agt_header = *((AGT_HEADER_TYPE *)base); + + switch (agt_header) + { + case AGT_HEADER_INTEGER: + result->type = AGTV_INTEGER; + result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE)); + break; + + case AGT_HEADER_FLOAT: + result->type = AGTV_FLOAT; + result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE)); + break; + + default: + /* + * For VERTEX, EDGE, PATH - use standard deserialization. + * These are composite types that require full parsing. + */ + ag_deserialize_extended_type(base_addr, offset, result); + break; + } + } + else if (AGTE_IS_BOOL_TRUE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = true; + } + else if (AGTE_IS_BOOL_FALSE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = false; + } + else + { + Assert(AGTE_IS_CONTAINER(entry)); + result->type = AGTV_BINARY; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = + (agtype_container *)(base_addr + INTALIGN(offset)); + result->val.binary.len = get_agtype_length(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Fast path comparison for scalar agtype containers. + * + * This function compares two scalar containers directly without the overhead + * of the full iterator machinery. It extracts the scalar values using no-copy + * fill and compares them directly. + * + * Returns: negative if a < b, 0 if a == b, positive if a > b + */ +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b) +{ + agtype_value va; + agtype_value vb; + char *base_addr_a; + char *base_addr_b; + int result; + bool need_free_a = false; + bool need_free_b = false; + + Assert(AGTYPE_CONTAINER_IS_SCALAR(a)); + Assert(AGTYPE_CONTAINER_IS_SCALAR(b)); + + /* Scalars are stored as single-element arrays */ + base_addr_a = (char *)&a->children[1]; + base_addr_b = (char *)&b->children[1]; + + /* Use no-copy fill to avoid allocations for simple types */ + fill_agtype_value_no_copy(a, 0, base_addr_a, 0, &va); + fill_agtype_value_no_copy(b, 0, base_addr_b, 0, &vb); + + /* + * Check if we need to free the values after comparison. + * Only VERTEX, EDGE, and PATH types allocate memory in no-copy mode. + */ + if (va.type == AGTV_VERTEX || va.type == AGTV_EDGE || va.type == AGTV_PATH) + { + need_free_a = true; + } + if (vb.type == AGTV_VERTEX || vb.type == AGTV_EDGE || vb.type == AGTV_PATH) + { + need_free_b = true; + } + + /* + * Compare the scalar values. If types match or are numeric compatible, + * use scalar comparison. Otherwise, use type-based ordering. + */ + if ((va.type == vb.type) || + ((va.type == AGTV_INTEGER || va.type == AGTV_FLOAT || + va.type == AGTV_NUMERIC) && + (vb.type == AGTV_INTEGER || vb.type == AGTV_FLOAT || + vb.type == AGTV_NUMERIC))) + { + result = compare_agtype_scalar_values(&va, &vb); + } + else + { + /* Type-defined order */ + result = (get_type_sort_priority(va.type) < + get_type_sort_priority(vb.type)) ? -1 : 1; + } + + /* Free any allocated memory from composite types */ + if (need_free_a) + { + pfree_agtype_value_content(&va); + } + if (need_free_b) + { + pfree_agtype_value_content(&vb); + } + + return result; +} + /* * Push agtype_value into agtype_parse_state. * @@ -1597,7 +1795,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_VERTEX: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_VERTEX_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1606,7 +1805,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_EDGE: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_EDGE_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1704,8 +1904,8 @@ static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b) case AGTV_VERTEX: { graphid a_graphid, b_graphid; - a_graphid = a->val.object.pairs[0].value.val.int_value; - b_graphid = b->val.object.pairs[0].value.val.int_value; + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; return a_graphid == b_graphid; } @@ -1790,16 +1990,33 @@ int compare_agtype_scalar_values(agtype_value *a, agtype_value *b) return compare_two_floats_orderability(a->val.float_value, b->val.float_value); case AGTV_VERTEX: - case AGTV_EDGE: { - agtype_value *a_id, *b_id; graphid a_graphid, b_graphid; - a_id = GET_AGTYPE_VALUE_OBJECT_VALUE(a, "id"); - b_id = GET_AGTYPE_VALUE_OBJECT_VALUE(b, "id"); + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; + + if (a_graphid == b_graphid) + { + return 0; + } + else if (a_graphid > b_graphid) + { + return 1; + } + else + { + return -1; + } + } + case AGTV_EDGE: + { + graphid a_graphid, b_graphid; - a_graphid = a_id->val.int_value; - b_graphid = b_id->val.int_value; + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_EDGE_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_EDGE_GET_ID(b)->val.int_value; if (a_graphid == b_graphid) { diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index ab2ba08cc..ec9125073 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -322,6 +322,109 @@ enum agtype_value_type AGTV_BINARY }; +/* + * Direct field access indices for vertex and edge objects. + * + * Vertex and edge objects are serialized with keys sorted by length first, + * then lexicographically (via uniqueify_agtype_object). This means field + * positions are deterministic and can be accessed directly without binary + * search, providing O(1) access instead of O(log n). + * + * Vertex keys by length: "id"(2), "label"(5), "properties"(10) + * Edge keys by length: "id"(2), "label"(5), "end_id"(6), "start_id"(8), "properties"(10) + */ +#define VERTEX_FIELD_ID 0 +#define VERTEX_FIELD_LABEL 1 +#define VERTEX_FIELD_PROPERTIES 2 +#define VERTEX_NUM_FIELDS 3 + +#define EDGE_FIELD_ID 0 +#define EDGE_FIELD_LABEL 1 +#define EDGE_FIELD_END_ID 2 +#define EDGE_FIELD_START_ID 3 +#define EDGE_FIELD_PROPERTIES 4 +#define EDGE_NUM_FIELDS 5 + +/* + * Macros for direct field access from vertex/edge agtype_value objects. + * These avoid the binary search overhead of GET_AGTYPE_VALUE_OBJECT_VALUE. + * Validation is integrated - macros will error if field count is incorrect. + * Uses GCC statement expressions to allow validation within expressions. + */ +#define AGTYPE_VERTEX_GET_ID(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_ID].value; \ + }) +#define AGTYPE_VERTEX_GET_LABEL(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_LABEL].value; \ + }) +#define AGTYPE_VERTEX_GET_PROPERTIES(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_PROPERTIES].value; \ + }) + +#define AGTYPE_EDGE_GET_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_ID].value; \ + }) +#define AGTYPE_EDGE_GET_LABEL(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_LABEL].value; \ + }) +#define AGTYPE_EDGE_GET_END_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_END_ID].value; \ + }) +#define AGTYPE_EDGE_GET_START_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_START_ID].value; \ + }) +#define AGTYPE_EDGE_GET_PROPERTIES(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_PROPERTIES].value; \ + }) + /* * agtype_value: In-memory representation of agtype. This is a convenient * deserialized representation, that can easily support using the "val" From f6b1eb6d8821ecf764dfd2ff71006e4fb897a4a2 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 17 Jan 2026 03:18:10 -0800 Subject: [PATCH 08/11] Upgrade Jest to v29 for node: protocol compatibility (#2307) Note: This PR was created with AI tools and a human. The pg-connection-string module (dependency of pg) now uses the node: protocol prefix for built-in modules (e.g., require('node:process')). Jest 26 does not support this syntax, causing test failures. Changes: - Upgrade jest from ^26.6.3 to ^29.7.0 - Upgrade ts-jest from ^26.5.1 to ^29.4.6 - Upgrade @types/jest from ^26.0.20 to ^29.5.14 - Update typescript to ^4.9.5 This also resolves 19 npm audit vulnerabilities (17 moderate, 2 high) that existed in the older Jest 26 dependency tree. modified: drivers/nodejs/package.json --- drivers/nodejs/package.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nodejs/package.json b/drivers/nodejs/package.json index 9f88bc2ba..6be11c780 100644 --- a/drivers/nodejs/package.json +++ b/drivers/nodejs/package.json @@ -33,7 +33,7 @@ "pg": ">=6.0.0" }, "devDependencies": { - "@types/jest": "^26.0.20", + "@types/jest": "^29.5.14", "@types/pg": "^7.14.10", "@typescript-eslint/eslint-plugin": "^4.22.1", "@typescript-eslint/parser": "^4.22.1", @@ -44,8 +44,8 @@ "eslint-plugin-jest": "^24.3.6", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", - "jest": "^26.6.3", - "ts-jest": "^26.5.1", - "typescript": "^4.1.5" + "jest": "^29.7.0", + "ts-jest": "^29.4.6", + "typescript": "^4.9.5" } } From 86f24911bce8dc19244c58ab52081c28efa512f4 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 18 Jan 2026 10:02:55 -0800 Subject: [PATCH 09/11] Fix Issue 1884: Ambiguous column reference (#2306) Fix Issue 1884: Ambiguous column reference and invalid AGT header errors. Note: This PR was created with AI tools and a human, or 2. This commit addresses two related bugs that occur when using SET to store graph elements (vertices, edges, paths) as property values: Issue 1884 - "column reference is ambiguous" error: When a Cypher query uses the same variable in both the SET expression RHS and the RETURN clause (e.g., SET n.prop = n RETURN n), PostgreSQL would report "column reference is ambiguous" because the variable appeared in multiple subqueries without proper qualification. Solution: The fix for this issue was already in place through the target entry naming scheme that qualifies column references. "Invalid AGT header value" offset error: When deserializing nested VERTEX, EDGE, or PATH values stored in properties, the system would fail with errors like "Invalid AGT header value: 0x00000041". This occurred because ag_serialize_extended_type() did not include alignment padding (padlen) in the agtentry length calculation for these types, while fill_agtype_value() uses INTALIGN() when reading, causing offset mismatch. Solution: Modified ag_serialize_extended_type() in agtype_ext.c to include padlen in the agtentry length for VERTEX, EDGE, and PATH cases, matching the existing pattern used for INTEGER, FLOAT, and NUMERIC types: *agtentry = AGTENTRY_IS_AGTYPE | (padlen + (AGTENTRY_OFFLENMASK & ...)); This ensures the serialized length accounts for alignment padding, allowing correct deserialization of nested graph elements. Appropriate regression tests were added to verify the fixes. Co-authored by: Zainab Saad <105385638+Zainab-Saad@users.noreply.github.com> modified: regress/expected/cypher_set.out modified: regress/sql/cypher_set.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/utils/adt/agtype_ext.c --- regress/expected/cypher_set.out | 266 +++++++++++++++++++++++++++++ regress/sql/cypher_set.sql | 164 ++++++++++++++++++ src/backend/parser/cypher_clause.c | 19 ++- src/backend/utils/adt/agtype_ext.c | 8 +- 4 files changed, 451 insertions(+), 6 deletions(-) diff --git a/regress/expected/cypher_set.out b/regress/expected/cypher_set.out index 1d24a7f9b..239234ed6 100644 --- a/regress/expected/cypher_set.out +++ b/regress/expected/cypher_set.out @@ -988,6 +988,245 @@ SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); --- (0 rows) +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); +NOTICE: graph "issue_1884" has been created + create_graph +-------------- + +(1 row) + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1", "self": {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1"}}::vertex}}::vertex +(1 row) + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + a | e | b +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "A"}}::vertex | {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge | {"id": 1125899906842626, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "B"}}::vertex +(1 row) + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + e +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1970324836974593, "label": "REL", "end_id": 1688849860263938, "start_id": 1688849860263937, "properties": {"dst": {"id": 1688849860263938, "label": "TestA3", "properties": {"name": "Y"}}::vertex, "src": {"id": 1688849860263937, "label": "TestA3", "properties": {"name": "X"}}::vertex}}::edge +(1 row) + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1, "copy": {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex}}::vertex +(1 row) + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + copy +------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex +(1 row) + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + a +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2533274790395905, "label": "TestB2", "properties": {"name": "start", "myEdge": {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge}}::vertex +(1 row) + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + edge +-------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge +(1 row) + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex +(1 row) + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {"myPath": [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path}}::vertex +(1 row) + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path +(1 row) + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"arr": [{"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex], "tag": "arrtest"}}::vertex +(1 row) + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + elem +--------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex +(1 row) + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + result +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"obj": {"node": {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex}, "tag": "maptest"}}::vertex +(1 row) + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + node +--------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4222124650659841, "label": "TestD1", "properties": {"ref": {"id": 4222124650659841, "label": "TestD1", "properties": {"name": "merged"}}::vertex, "name": "merged"}}::vertex +(1 row) + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4503599627370497, "label": "TestD2", "properties": {"ref": {"id": 4503599627370497, "label": "TestD2", "properties": {"name": "created"}}::vertex, "name": "created"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4785074604081153, "label": "TestE1", "properties": {"myId": 4785074604081153, "name": "functest", "myLabel": "TestE1"}}::vertex +(1 row) + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex +(1 row) + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 5066549580791809, "label": "TestE2", "properties": {"pathRels": [{"id": 5348024557502465, "label": "E2REL", "end_id": 5066549580791810, "start_id": 5066549580791809, "properties": {}}::edge], "pathNodes": [{"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex, {"id": 5066549580791810, "label": "TestE2", "properties": {}}::vertex]}}::vertex +(1 row) + -- -- Clean up -- @@ -1038,6 +1277,33 @@ NOTICE: graph "issue_1634" has been dropped (1 row) +SELECT drop_graph('issue_1884', true); +NOTICE: drop cascades to 19 other objects +DETAIL: drop cascades to table issue_1884._ag_label_vertex +drop cascades to table issue_1884._ag_label_edge +drop cascades to table issue_1884."TestA1" +drop cascades to table issue_1884."TestA2" +drop cascades to table issue_1884."LINK" +drop cascades to table issue_1884."TestA3" +drop cascades to table issue_1884."REL" +drop cascades to table issue_1884."TestB1" +drop cascades to table issue_1884."TestB2" +drop cascades to table issue_1884."B2REL" +drop cascades to table issue_1884."TestB3" +drop cascades to table issue_1884."B3REL" +drop cascades to table issue_1884."TestC1" +drop cascades to table issue_1884."TestC2" +drop cascades to table issue_1884."TestD1" +drop cascades to table issue_1884."TestD2" +drop cascades to table issue_1884."TestE1" +drop cascades to table issue_1884."TestE2" +drop cascades to table issue_1884."E2REL" +NOTICE: graph "issue_1884" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/sql/cypher_set.sql b/regress/sql/cypher_set.sql index a2667153d..e745d5d6e 100644 --- a/regress/sql/cypher_set.sql +++ b/regress/sql/cypher_set.sql @@ -379,6 +379,169 @@ SELECT * FROM cypher('issue_1634', $$ MERGE (v:PERSION {id: '1'}) SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ + +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ + +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ + +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ + +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ + +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + -- -- Clean up -- @@ -387,6 +550,7 @@ DROP FUNCTION set_test; SELECT drop_graph('cypher_set', true); SELECT drop_graph('cypher_set_1', true); SELECT drop_graph('issue_1634', true); +SELECT drop_graph('issue_1884', true); -- -- End diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index acc52349d..9960acd7b 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -71,6 +71,7 @@ #define AGE_VARNAME_MERGE_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"merge_clause" #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" +#define AGE_VARNAME_SET_VALUE AGE_DEFAULT_VARNAME_PREFIX"set_value" /* * In the transformation stage, we need to track @@ -1911,10 +1912,24 @@ cypher_update_information *transform_cypher_set_item_list( ((cypher_map*)set_item->expr)->keep_null = set_item->is_add; } - /* create target entry for the new property value */ + /* + * Create target entry for the new property value. + * + * We use a hidden variable name (AGE_VARNAME_SET_VALUE) for the + * SET expression value to prevent column name conflicts. This is + * necessary when the same variable is used on both the LHS and RHS + * of a SET clause (e.g., SET n.prop = n). Without this, the column + * name derived from the expression (e.g., "n") would duplicate the + * existing column name from the MATCH clause, causing a "column + * reference is ambiguous" error in subsequent clauses like RETURN. + * + * The hidden variable name will be filtered out by expand_pnsi_attrs + * when the targetlist is expanded for subsequent clauses. + */ item->prop_position = (AttrNumber)pstate->p_next_resno; target_item = transform_cypher_item(cpstate, set_item->expr, NULL, - EXPR_KIND_SELECT_TARGET, NULL, + EXPR_KIND_SELECT_TARGET, + AGE_VARNAME_SET_VALUE, false); if (nodeTag(target_item->expr) == T_Aggref) diff --git a/src/backend/utils/adt/agtype_ext.c b/src/backend/utils/adt/agtype_ext.c index 8fc6600d1..7a0ea991d 100644 --- a/src/backend/utils/adt/agtype_ext.c +++ b/src/backend/utils/adt/agtype_ext.c @@ -89,7 +89,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -109,7 +109,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -129,7 +129,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -175,7 +175,7 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset, break; default: - elog(ERROR, "Invalid AGT header value."); + ereport(ERROR, (errmsg("Invalid AGT header value: 0x%08x", agt_header))); } } From 1bb378077b6e4db5f4a6f5b084e7a472f1360a33 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Mon, 19 Jan 2026 22:21:02 +0500 Subject: [PATCH 10/11] Replace libcsv with pg COPY for csv loading (#2310) - Commit also adds permission checks - Resolves a critical memory spike issue on loading large file - Use pg's COPY infrastructure (BeginCopyFrom, NextCopyFromRawFields) for 64KB buffered CSV parsing instead of libcsv - Add byte based flush threshold (64KB) matching COPY behavior for memory safety - Use heap_multi_insert with BulkInsertState for optimized batch inserts - Add per batch memory context to prevent memory growth during large loads - Remove libcsv dependency (libcsv.c, csv.h) - Improves loading performance by 15-20% - No previous regression tests were impacted - Added regression tests for permissions/rls Assisted-by AI Resolved conflict with ExecInitRangeTable --- Makefile | 1 - regress/expected/age_load.out | 189 ++++++++ regress/expected/index.out | 12 +- regress/sql/age_load.sql | 125 ++++++ regress/sql/index.sql | 2 +- src/backend/utils/load/ag_load_edges.c | 388 +++++++++-------- src/backend/utils/load/ag_load_labels.c | 381 ++++++++-------- src/backend/utils/load/age_load.c | 248 ++++++++++- src/backend/utils/load/libcsv.c | 549 ------------------------ src/include/utils/load/ag_load_edges.h | 52 +-- src/include/utils/load/ag_load_labels.h | 50 +-- src/include/utils/load/age_load.h | 27 +- src/include/utils/load/csv.h | 108 ----- 13 files changed, 1000 insertions(+), 1132 deletions(-) delete mode 100644 src/backend/utils/load/libcsv.c delete mode 100644 src/include/utils/load/csv.h diff --git a/Makefile b/Makefile index ffad7d6af..a8faa2bb8 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ OBJS = src/backend/age.o \ src/backend/utils/load/ag_load_labels.o \ src/backend/utils/load/ag_load_edges.o \ src/backend/utils/load/age_load.o \ - src/backend/utils/load/libcsv.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 55d1ff1d6..1f76c31ce 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -454,6 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Test security and permissions +-- +SELECT create_graph('agload_security'); +NOTICE: graph "agload_security" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_security', 'SecEdge'); +NOTICE: ELabel "SecEdge" has been created + create_elabel +--------------- + +(1 row) + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +RESET ROLE; +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied for table Person1 +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied for table SecEdge +RESET ROLE; +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +RESET ROLE; +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 3: Row-Level Security (RLS) +-- +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; +-- Switch to load_test_user +SET ROLE load_test_user; +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +RESET ROLE; +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 4: Constraint checking (CHECK constraint) +-- +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: new row for relation "Person1" violates check constraint "check_bool_true" +DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...). +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true" +DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}). +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_security._ag_label_vertex +drop cascades to table agload_security._ag_label_edge +drop cascades to table agload_security."Person1" +drop cascades to table agload_security."Person2" +drop cascades to table agload_security."SecEdge" +NOTICE: graph "agload_security" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/expected/index.out b/regress/expected/index.out index 745cab269..ec62bf57d 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -264,19 +264,19 @@ $$) as (n agtype); (0 rows) -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; indexname | indexdef -----------------------------+------------------------------------------------------------------------------------------------ + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) - _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) - idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) - Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) - has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) - City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) (10 rows) SET enable_mergejoin = ON; diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index cefcfb4ca..976f050af 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -194,6 +194,131 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t -- SELECT drop_graph('agload_conversion', true); +-- +-- Test security and permissions +-- + +SELECT create_graph('agload_security'); +SELECT create_vlabel('agload_security', 'Person1'); +SELECT create_vlabel('agload_security', 'Person2'); +SELECT create_elabel('agload_security', 'SecEdge'); + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; + +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; + +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; + +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 3: Row-Level Security (RLS) +-- + +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; + +-- Switch to load_test_user +SET ROLE load_test_user; + +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +RESET ROLE; + +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; + +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 4: Constraint checking (CHECK constraint) +-- + +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; + +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); + -- -- End -- diff --git a/regress/sql/index.sql b/regress/sql/index.sql index a6e075c70..d4a4b24a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -165,7 +165,7 @@ SELECT * FROM cypher('cypher_index', $$ $$) as (n agtype); -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 931c6e0dc..c05bf3352 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -16,50 +16,30 @@ * specific language governing permissions and limitations * under the License. */ - #include "postgres.h" -#include "utils/load/ag_load_edges.h" -#include "utils/load/csv.h" +#include "access/heapam.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" -void edge_field_cb(void *field, size_t field_len, void *data) -{ - - csv_edge_reader *cr = (csv_edge_reader*)data; - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); - cr->cur_field += 1; -} +#include "utils/load/ag_load_edges.h" -/* Parser calls this function when it detects end of a row */ -void edge_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single edge row from COPY's raw fields. + * Edge CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + */ +static void process_edge_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + Oid graph_oid, bool load_as_agtype, + batch_insert_state *batch_state) { - - csv_edge_reader *cr = (csv_edge_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - - size_t i, n_fields; int64 start_id_int; graphid start_vertex_graph_id; int start_vertex_type_id; @@ -72,104 +52,92 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) int64 entry_id; TupleTableSlot *slot; - n_fields = cr->cur_field; + char *start_vertex_type; + char *end_vertex_type; + agtype *edge_properties; - if (cr->row == 0) - { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); + /* Generate edge ID */ + entry_id = nextval_internal(label_seq_relid, true); + edge_id = make_graphid(label_id, entry_id); - for (i = 0; icur_field; i++) - { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - edge_id = make_graphid(cr->label_id, entry_id); - - start_id_int = strtol(cr->fields[0], NULL, 10); - start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); - end_id_int = strtol(cr->fields[2], NULL, 10); - end_vertex_type_id = get_label_id(cr->fields[3], cr->graph_oid); - - start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); - end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - - /* Clear the slots contents */ - ExecClearTuple(slot); - - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); - slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); - slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); - slot->tts_values[3] = AGTYPE_P_GET_DATUM( - create_agtype_from_list_i( - cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; - slot->tts_isnull[2] = false; - slot->tts_isnull[3] = false; - - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - batch_state->num_tuples++; - - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + /* Trim whitespace from vertex type names */ + start_vertex_type = trim_whitespace(fields[1]); + end_vertex_type = trim_whitespace(fields[3]); - for (i = 0; i < n_fields; ++i) - { - pfree_if_not_null(cr->fields[i]); - } + /* Parse start vertex info */ + start_id_int = strtol(fields[0], NULL, 10); + start_vertex_type_id = get_label_id(start_vertex_type, graph_oid); - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } + /* Parse end vertex info */ + end_id_int = strtol(fields[2], NULL, 10); + end_vertex_type_id = get_label_id(end_vertex_type, graph_oid); - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; -} + /* Create graphids for start and end vertices */ + start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); + end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); -static int is_space(unsigned char c) -{ - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Build the agtype properties */ + edge_properties = create_agtype_from_list_i(header, fields, + nfields, 4, load_as_agtype); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM(edge_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + + batch_state->buffered_bytes += VARSIZE(edge_properties); + batch_state->num_tuples++; + + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - return 0; + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } } -static int is_term(unsigned char c) +/* + * Create COPY options for CSV parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; + + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load edges from CSV file using pg's COPY infrastructure. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -177,79 +145,133 @@ int create_edges_from_csv_file(char *file_path, int label_id, bool load_as_agtype) { + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Edge Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); + + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); + + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_edge_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + PG_TRY(); { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) + { + if (is_first_row) + { + int i; - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } - PG_TRY(); - { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_edge_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + graph_oid, load_as_agtype, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 1e86bbda4..5b11f68b8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,155 +17,114 @@ * under the License. */ #include "postgres.h" -#include "executor/spi.h" + +#include "access/heapam.h" +#include "access/table.h" #include "catalog/namespace.h" +#include "commands/copy.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/csv.h" - -void vertex_field_cb(void *field, size_t field_len, void *data) -{ - - csv_vertex_reader *cr = (csv_vertex_reader *) data; - - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); - cr->cur_field += 1; -} -void vertex_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single vertex row from COPY's raw fields. + * Vertex CSV format: [id,] [properties...] + */ +static void process_vertex_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + bool id_field_exists, bool load_as_agtype, + int64 *curr_seq_num, + batch_insert_state *batch_state) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - size_t i, n_fields; graphid vertex_id; int64 entry_id; TupleTableSlot *slot; + agtype *vertex_properties; - n_fields = cr->cur_field; - - if (cr->row == 0) + /* Generate or use provided entry_id */ + if (id_field_exists) { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); - - for (i = 0; icur_field; i++) + entry_id = strtol(fields[0], NULL, 10); + if (entry_id > *curr_seq_num) { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); + /* This is needed to ensure the sequence is up-to-date */ + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(label_seq_relid), + Int64GetDatum(entry_id)); + *curr_seq_num = entry_id; } } else { - if (cr->id_field_exists) - { - entry_id = strtol(cr->fields[0], NULL, 10); - if (entry_id > cr->curr_seq_num) - { - DirectFunctionCall2(setval_oid, - ObjectIdGetDatum(cr->label_seq_relid), - Int64GetDatum(entry_id)); - cr->curr_seq_num = entry_id; - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - } + entry_id = nextval_internal(label_seq_relid, true); + } - vertex_id = make_graphid(cr->label_id, entry_id); + vertex_id = make_graphid(label_id, entry_id); - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; - /* Clear the slots contents */ - ExecClearTuple(slot); + /* Clear the slots contents */ + ExecClearTuple(slot); - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - slot->tts_values[1] = AGTYPE_P_GET_DATUM( - create_agtype_from_list(cr->header, cr->fields, - n_fields, entry_id, - cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; + /* Build the agtype properties */ + vertex_properties = create_agtype_from_list(header, fields, + nfields, entry_id, + load_as_agtype); - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM(vertex_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; - batch_state->num_tuples++; + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + batch_state->buffered_bytes += VARSIZE(vertex_properties); + batch_state->num_tuples++; - for (i = 0; i < n_fields; ++i) + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - pfree_if_not_null(cr->fields[i]); + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } - - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } - - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; } -static int is_space(unsigned char c) +/* + * Create COPY options for csv parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; -} -static int is_term(unsigned char c) -{ - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load vertex labels from csv file using pg's COPY infrastructure. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -174,96 +133,146 @@ int create_labels_from_csv_file(char *file_path, bool id_field_exists, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_vertex_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + int64 curr_seq_num = 0; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + if (id_field_exists) { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + */ + curr_seq_num = nextval_internal(label_seq_relid, true); } - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); PG_TRY(); { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist - NULL means all columns */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + if (is_first_row) + { + int i; - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } + + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_vertex_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + id_field_exists, load_as_agtype, + &curr_seq_num, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index c7cf0677f..e4f10d7e4 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,24 +18,81 @@ */ #include "postgres.h" + +#include "access/heapam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" #include "catalog/indexing.h" +#include "catalog/pg_authid.h" #include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "parser/parse_relation.h" +#include "utils/acl.h" #include "utils/json.h" +#include "utils/rel.h" +#include "utils/rls.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" -#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); static char *build_safe_filename(char *name); +static void check_file_read_permission(void); +static void check_table_permissions(Oid relid); +static void check_rls_for_load(Oid relid); #define AGE_BASE_CSV_DIRECTORY "/tmp/age/" #define AGE_CSV_FILE_EXTENSION ".csv" +/* + * Trim leading and trailing whitespace from a string. + * Returns a newly allocated string with whitespace removed. + * Returns empty string for NULL input. + */ +char *trim_whitespace(const char *str) +{ + const char *start; + const char *end; + size_t len; + + if (str == NULL) + { + return pstrdup(""); + } + + /* Find first non-whitespace character */ + start = str; + while (*start && (*start == ' ' || *start == '\t' || + *start == '\n' || *start == '\r')) + { + start++; + } + + /* If string is all whitespace, return empty string */ + if (*start == '\0') + { + return pstrdup(""); + } + + /* Find last non-whitespace character */ + end = str + strlen(str) - 1; + while (end > start && (*end == ' ' || *end == '\t' || + *end == '\n' || *end == '\r')) + { + end--; + } + + /* Copy the trimmed string */ + len = end - start + 1; + return pnstrdup(start, len); +} + static char *build_safe_filename(char *name) { int length; @@ -88,6 +145,51 @@ static char *build_safe_filename(char *name) return resolved; } +/* + * Check if the current user has permission to read server files. + * Only users with the pg_read_server_files role can load from files. + */ +static void check_file_read_permission(void) +{ + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to LOAD from a file"), + errdetail("Only roles with privileges of the \"%s\" role may LOAD from a file.", + "pg_read_server_files"))); + } +} + +/* + * Check if the current user has INSERT permission on the target table. + */ +static void check_table_permissions(Oid relid) +{ + AclResult aclresult; + + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_INSERT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, get_rel_name(relid)); + } +} + +/* + * Check if RLS is enabled on the target table. + * CSV loading is not supported with row-level security. + */ +static void check_rls_for_load(Oid relid) +{ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOAD from file is not supported with row-level security"), + errhint("Use Cypher CREATE clause instead."))); + } +} + agtype *create_empty_agtype(void) { agtype* out; @@ -118,6 +220,14 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) char *new_csv_val; agtype_value *res; + /* Handle NULL or empty input - return null agtype value */ + if (csv_val == NULL || csv_val[0] == '\0') + { + res = palloc(sizeof(agtype_value)); + res->type = AGTV_NULL; + return res; + } + if (!json_validate(cstring_to_text(csv_val), false, false)) { /* wrap the string with double-quote */ @@ -175,18 +285,40 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, for (i = 0; itype = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -228,18 +360,40 @@ agtype* create_agtype_from_list_i(char **header, char **fields, for (i = start_index; i < fields_len; i++) { + char *trimmed_value; + + /* Skip empty header fields (e.g., from trailing commas) */ + if (header[i] == NULL || header[i][0] == '\0') + { + continue; + } + key_agtype = string_to_agtype_value(header[i]); result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); + /* Trim whitespace from field value */ + trimmed_value = trim_whitespace(fields[i]); + if (load_as_agtype) { - value_agtype = csv_value_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(trimmed_value); } else { - value_agtype = string_to_agtype_value(fields[i]); + /* Handle empty field values */ + if (trimmed_value[0] == '\0') + { + value_agtype = palloc(sizeof(agtype_value)); + value_agtype->type = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -362,11 +516,24 @@ void insert_batch(batch_insert_state *batch_state) List *result; int i; + /* Check constraints for each tuple before inserting */ + if (batch_state->resultRelInfo->ri_RelationDesc->rd_att->constr) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + ExecConstraints(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate); + } + } + /* Insert the tuples */ heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, batch_state->slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - + GetCurrentCommandId(true), + TABLE_INSERT_SKIP_FSM, /* Skip free space map for bulk */ + batch_state->bistate); /* Use bulk insert state */ + /* Insert index entries for the tuples */ if (batch_state->resultRelInfo->ri_NumIndices > 0) { @@ -405,6 +572,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool id_field_exists; bool load_as_agtype; @@ -427,6 +595,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -447,6 +618,11 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_VERTEX); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); @@ -459,7 +635,6 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(load_edges_from_file); Datum load_edges_from_file(PG_FUNCTION_ARGS) { - Name graph_name; Name label_name; text* file_name; @@ -467,6 +642,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool load_as_agtype; @@ -488,6 +664,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -507,6 +686,11 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_EDGE); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); @@ -597,19 +781,42 @@ void init_batch_insert(batch_insert_state **batch_state, Oid relid; EState *estate; ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + RTEPermissionInfo *perminfo; + List *range_table = NIL; + List *perminfos = NIL; int i; - /* Open the relation */ + /* Get the relation OID */ relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, RowExclusiveLock); /* Initialize executor state */ estate = CreateExecutorState(); - /* Initialize resultRelInfo */ + /* Create range table entry for ExecConstraints */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; + rte->rellockmode = RowExclusiveLock; + rte->perminfoindex = 1; + range_table = list_make1(rte); + + /* Create permission info */ + perminfo = makeNode(RTEPermissionInfo); + perminfo->relid = relid; + perminfo->requiredPerms = ACL_INSERT; + perminfos = list_make1(perminfo); + + /* Initialize range table in executor state */ + ExecInitRangeTable(estate, range_table, perminfos, NULL); + + /* Initialize resultRelInfo - this opens the relation */ resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Get relation from resultRelInfo (opened by ExecInitResultRelation) */ + relation = resultRelInfo->ri_RelationDesc; /* Open the indices */ ExecOpenIndices(resultRelInfo, false); @@ -619,8 +826,9 @@ void init_batch_insert(batch_insert_state **batch_state, (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); (*batch_state)->estate = estate; (*batch_state)->resultRelInfo = resultRelInfo; - (*batch_state)->max_tuples = BATCH_SIZE; (*batch_state)->num_tuples = 0; + (*batch_state)->buffered_bytes = 0; + (*batch_state)->bistate = GetBulkInsertState(); /* Create slots */ for (i = 0; i < BATCH_SIZE; i++) @@ -651,12 +859,14 @@ void finish_batch_insert(batch_insert_state **batch_state) ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); } - /* Clean up, close the indices and relation */ - ExecCloseIndices((*batch_state)->resultRelInfo); - table_close((*batch_state)->resultRelInfo->ri_RelationDesc, - RowExclusiveLock); + /* Free BulkInsertState */ + FreeBulkInsertState((*batch_state)->bistate); + + /* Close result relations and range table relations */ + ExecCloseResultRelations((*batch_state)->estate); + ExecCloseRangeTableRelations((*batch_state)->estate); - /* Clean up batch state */ + /* Clean up executor state */ FreeExecutorState((*batch_state)->estate); pfree((*batch_state)->slots); pfree(*batch_state); diff --git a/src/backend/utils/load/libcsv.c b/src/backend/utils/load/libcsv.c deleted file mode 100644 index f0e8b46be..000000000 --- a/src/backend/utils/load/libcsv.c +++ /dev/null @@ -1,549 +0,0 @@ -/* -libcsv - parse and write csv data -Copyright (C) 2008 Robert Gamble - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include - -#if __STDC_VERSION__ >= 199901L -# include -#else - /* C89 doesn't have stdint.h or SIZE_MAX */ -# define SIZE_MAX ((size_t)-1) -#endif - -#include "utils/load/csv.h" - -#define VERSION "3.0.3" - -#define ROW_NOT_BEGUN 0 -#define FIELD_NOT_BEGUN 1 -#define FIELD_BEGUN 2 -#define FIELD_MIGHT_HAVE_ENDED 3 - -/* - Explanation of states - ROW_NOT_BEGUN There have not been any fields encountered for this row - FIELD_NOT_BEGUN There have been fields but we are currently not in one - FIELD_BEGUN We are in a field - FIELD_MIGHT_HAVE_ENDED - We encountered a double quote inside a quoted field, the - field is either ended or the quote is literal -*/ - -#define MEM_BLK_SIZE 128 - -#define SUBMIT_FIELD(p) \ - do { \ - if (!quoted) \ - entry_pos -= spaces; \ - if (p->options & CSV_APPEND_NULL) \ - ((p)->entry_buf[entry_pos]) = '\0'; \ - if (cb1 && (p->options & CSV_EMPTY_IS_NULL) && !quoted && entry_pos == 0) \ - cb1(NULL, entry_pos, data); \ - else if (cb1) \ - cb1(p->entry_buf, entry_pos, data); \ - pstate = FIELD_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_ROW(p, c) \ - do { \ - if (cb2) \ - cb2(c, data); \ - pstate = ROW_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) - -static const char *csv_errors[] = {"success", - "error parsing data while strict checking enabled", - "memory exhausted while increasing buffer size", - "data size too large", - "invalid status code"}; - -int -csv_error(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Return the current status of the parser */ - return p->status; -} - -const char * -csv_strerror(int status) -{ - /* Return a textual description of status */ - if (status >= CSV_EINVALID || status < 0) - return csv_errors[CSV_EINVALID]; - else - return csv_errors[status]; -} - -int -csv_get_opts(const struct csv_parser *p) -{ - /* Return the currently set options of parser */ - if (p == NULL) - return -1; - - return p->options; -} - -int -csv_set_opts(struct csv_parser *p, unsigned char options) -{ - /* Set the options */ - if (p == NULL) - return -1; - - p->options = options; - return 0; -} - -int -csv_init(struct csv_parser *p, unsigned char options) -{ - /* Initialize a csv_parser object returns 0 on success, -1 on error */ - if (p == NULL) - return -1; - - p->entry_buf = NULL; - p->pstate = ROW_NOT_BEGUN; - p->quoted = 0; - p->spaces = 0; - p->entry_pos = 0; - p->entry_size = 0; - p->status = 0; - p->options = options; - p->quote_char = CSV_QUOTE; - p->delim_char = CSV_COMMA; - p->is_space = NULL; - p->is_term = NULL; - p->blk_size = MEM_BLK_SIZE; - p->malloc_func = NULL; - p->realloc_func = realloc; - p->free_func = free; - - return 0; -} - -void -csv_free(struct csv_parser *p) -{ - /* Free the entry_buffer of csv_parser object */ - if (p == NULL) - return; - - if (p->entry_buf && p->free_func) - p->free_func(p->entry_buf); - - p->entry_buf = NULL; - p->entry_size = 0; - - return; -} - -int -csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - int quoted; - int pstate; - size_t spaces; - size_t entry_pos; - - if (p == NULL) - return -1; - - /* Finalize parsing. Needed, for example, when file does not end in a newline */ - quoted = p->quoted; - pstate = p->pstate; - spaces = p->spaces; - entry_pos = p->entry_pos; - - if ((pstate == FIELD_BEGUN) && p->quoted && (p->options & CSV_STRICT) && (p->options & CSV_STRICT_FINI)) { - /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ - p->status = CSV_EPARSE; - return -1; - } - - switch (pstate) { - case FIELD_MIGHT_HAVE_ENDED: - p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ - entry_pos = p->entry_pos; - /*lint -fallthrough */ - case FIELD_NOT_BEGUN: - case FIELD_BEGUN: - /* Unnecessary: - quoted = p->quoted, pstate = p->pstate; - spaces = p->spaces, entry_pos = p->entry_pos; - */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, -1); - break; - case ROW_NOT_BEGUN: /* Already ended properly */ - ; - } - - /* Reset parser */ - p->spaces = p->quoted = p->entry_pos = p->status = 0; - p->pstate = ROW_NOT_BEGUN; - - return 0; -} - -void -csv_set_delim(struct csv_parser *p, unsigned char c) -{ - /* Set the delimiter */ - if (p) p->delim_char = c; -} - -void -csv_set_quote(struct csv_parser *p, unsigned char c) -{ - /* Set the quote character */ - if (p) p->quote_char = c; -} - -unsigned char -csv_get_delim(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the delimiter */ - return p->delim_char; -} - -unsigned char -csv_get_quote(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the quote character */ - return p->quote_char; -} - -void -csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the space function */ - if (p) p->is_space = f; -} - -void -csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the term function */ - if (p) p->is_term = f; -} - -void -csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) -{ - /* Set the realloc function used to increase buffer size */ - if (p && f) p->realloc_func = f; -} - -void -csv_set_free_func(struct csv_parser *p, void (*f)(void *)) -{ - /* Set the free function used to free the buffer */ - if (p && f) p->free_func = f; -} - -void -csv_set_blk_size(struct csv_parser *p, size_t size) -{ - /* Set the block size used to increment buffer size */ - if (p) p->blk_size = size; -} - -size_t -csv_get_buffer_size(const struct csv_parser *p) -{ - /* Get the size of the entry buffer */ - if (p) - return p->entry_size; - return 0; -} - -static int -csv_increase_buffer(struct csv_parser *p) -{ - size_t to_add; - void *vp; - - if (p == NULL) return 0; - if (p->realloc_func == NULL) return 0; - - /* Increase the size of the entry buffer. Attempt to increase size by - * p->blk_size, if this is larger than SIZE_MAX try to increase current - * buffer size to SIZE_MAX. If allocation fails, try to allocate halve - * the size and try again until successful or increment size is zero. - */ - - to_add = p->blk_size; - - if ( p->entry_size >= SIZE_MAX - to_add ) - to_add = SIZE_MAX - p->entry_size; - - if (!to_add) { - p->status = CSV_ETOOBIG; - return -1; - } - - while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) { - to_add /= 2; - if (!to_add) { - p->status = CSV_ENOMEM; - return -1; - } - } - - /* Update entry buffer pointer and entry_size if successful */ - p->entry_buf = vp; - p->entry_size += to_add; - return 0; -} - -size_t -csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - unsigned const char *us = s; /* Access input data as array of unsigned char */ - unsigned char c; /* The character we are currently processing */ - size_t pos = 0; /* The number of characters we have processed in this call */ - - /* Store key fields into local variables for performance */ - unsigned char delim = p->delim_char; - unsigned char quote = p->quote_char; - int (*is_space)(unsigned char) = p->is_space; - int (*is_term)(unsigned char) = p->is_term; - int quoted = p->quoted; - int pstate = p->pstate; - size_t spaces = p->spaces; - size_t entry_pos = p->entry_pos; - - - if (!p->entry_buf && pos < len) { - /* Buffer hasn't been allocated yet and len > 0 */ - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - while (pos < len) { - /* Check memory usage, increase buffer if necessary */ - if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - c = us[pos++]; - - switch (pstate) { - case ROW_NOT_BEGUN: - case FIELD_NOT_BEGUN: - if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ - continue; - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (pstate == FIELD_NOT_BEGUN) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { /* ROW_NOT_BEGUN */ - /* Don't submit empty rows by default */ - if (p->options & CSV_REPALL_NL) { - SUBMIT_ROW(p, c); - } - } - continue; - } else if (c == delim) { /* Comma */ - SUBMIT_FIELD(p); - break; - } else if (c == quote) { /* Quote */ - pstate = FIELD_BEGUN; - quoted = 1; - } else { /* Anything else */ - pstate = FIELD_BEGUN; - quoted = 0; - SUBMIT_CHAR(p, c); - } - break; - case FIELD_BEGUN: - if (c == quote) { /* Quote */ - if (quoted) { - SUBMIT_CHAR(p, c); - pstate = FIELD_MIGHT_HAVE_ENDED; - } else { - /* STRICT ERROR - double quote inside non-quoted field */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - SUBMIT_CHAR(p, c); - spaces = 0; - } - } else if (c == delim) { /* Comma */ - if (quoted) { - SUBMIT_CHAR(p, c); - } else { - SUBMIT_FIELD(p); - } - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (!quoted) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { - SUBMIT_CHAR(p, c); - } - } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ - SUBMIT_CHAR(p, c); - spaces++; - } else { /* Anything else */ - SUBMIT_CHAR(p, c); - spaces = 0; - } - break; - case FIELD_MIGHT_HAVE_ENDED: - /* This only happens when a quote character is encountered in a quoted field */ - if (c == delim) { /* Comma */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ - SUBMIT_CHAR(p, c); - spaces++; - } else if (c == quote) { /* Quote */ - if (spaces) { - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - spaces = 0; - SUBMIT_CHAR(p, c); - } else { - /* Two quotes in a row */ - pstate = FIELD_BEGUN; - } - } else { /* Anything else */ - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - pstate = FIELD_BEGUN; - spaces = 0; - SUBMIT_CHAR(p, c); - } - break; - default: - break; - } - } - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; -} - -size_t -csv_write (void *dest, size_t dest_size, const void *src, size_t src_size) -{ - return csv_write2(dest, dest_size, src, src_size, CSV_QUOTE); -} - -int -csv_fwrite (FILE *fp, const void *src, size_t src_size) -{ - return csv_fwrite2(fp, src, src_size, CSV_QUOTE); -} - -size_t -csv_write2 (void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) -{ - unsigned char *cdest = dest; - const unsigned char *csrc = src; - size_t chars = 0; - - if (src == NULL) - return 0; - - if (dest == NULL) - dest_size = 0; - - if (dest_size > 0) - *cdest++ = quote; - chars++; - - while (src_size) { - if (*csrc == quote) { - if (dest_size > chars) - *cdest++ = quote; - if (chars < SIZE_MAX) chars++; - } - if (dest_size > chars) - *cdest++ = *csrc; - if (chars < SIZE_MAX) chars++; - src_size--; - csrc++; - } - - if (dest_size > chars) - *cdest = quote; - if (chars < SIZE_MAX) chars++; - - return chars; -} - -int -csv_fwrite2 (FILE *fp, const void *src, size_t src_size, unsigned char quote) -{ - const unsigned char *csrc = src; - - if (fp == NULL || src == NULL) - return 0; - - if (fputc(quote, fp) == EOF) - return EOF; - - while (src_size) { - if (*csrc == quote) { - if (fputc(quote, fp) == EOF) - return EOF; - } - if (fputc(*csrc, fp) == EOF) - return EOF; - src_size--; - csrc++; - } - - if (fputc(quote, fp) == EOF) { - return EOF; - } - - return 0; -} diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index df663b1dd..4db00d93a 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,42 +17,28 @@ * under the License. */ -#include "access/heapam.h" -#include "utils/load/age_load.h" - #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - char *start_vertex; - char *end_vertex; - bool load_as_agtype; - batch_insert_state *batch_state; -} csv_edge_reader; - - -void edge_field_cb(void *field, size_t field_len, void *data); -void edge_row_cb(int delim __attribute__((unused)), void *data); +#include "utils/load/age_load.h" +/* + * Load edges from a CSV file using pg's COPY infrastructure. + * + * CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the edge label + * label_id - ID of the label + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *label_name, int label_id, - bool load_as_agtype); - -#endif /*AG_LOAD_EDGES_H */ + char *label_name, int label_id, + bool load_as_agtype); +#endif /* AG_LOAD_EDGES_H */ diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index b8ed1572e..c3d517f30 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -17,46 +17,26 @@ * under the License. */ - #ifndef AG_LOAD_LABELS_H #define AG_LOAD_LABELS_H -#include "access/heapam.h" #include "utils/load/age_load.h" -struct counts { - long unsigned fields; - long unsigned allvalues; - long unsigned rows; -}; - -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - bool id_field_exists; - bool load_as_agtype; - int curr_seq_num; - batch_insert_state *batch_state; -} csv_vertex_reader; - - -void vertex_field_cb(void *field, size_t field_len, void *data); -void vertex_row_cb(int delim __attribute__((unused)), void *data); - +/* + * Load vertex labels from a CSV file using pg's COPY infrastructure. + * CSV format: [id,] [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the vertex label + * label_id - ID of the label + * id_field_exists - If true, first CSV column contains the vertex ID + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 72f11493d..6573c79f3 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -17,6 +17,10 @@ * under the License. */ +#ifndef AG_LOAD_H +#define AG_LOAD_H + +#include "access/heapam.h" #include "commands/sequence.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -27,10 +31,8 @@ #include "commands/graph_commands.h" #include "utils/ag_cache.h" -#ifndef AGE_ENTITY_CREATOR_H -#define AGE_ENTITY_CREATOR_H - #define BATCH_SIZE 1000 +#define MAX_BUFFERED_BYTES 65535 /* 64KB, same as pg COPY */ typedef struct batch_insert_state { @@ -38,26 +40,29 @@ typedef struct batch_insert_state ResultRelInfo *resultRelInfo; TupleTableSlot **slots; int num_tuples; - int max_tuples; + size_t buffered_bytes; + BulkInsertState bistate; } batch_insert_state; -agtype* create_empty_agtype(void); - -agtype* create_agtype_from_list(char **header, char **fields, +agtype *create_empty_agtype(void); +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id, bool load_as_agtype); -agtype* create_agtype_from_list_i(char **header, char **fields, +agtype *create_agtype_from_list_i(char **header, char **fields, size_t fields_len, size_t start_index, bool load_as_agtype); + void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, - agtype* end_properties); -void insert_batch(batch_insert_state *batch_state); + agtype *edge_properties); void init_batch_insert(batch_insert_state **batch_state, char *label_name, Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); void finish_batch_insert(batch_insert_state **batch_state); -#endif /* AGE_ENTITY_CREATOR_H */ +char *trim_whitespace(const char *str); + +#endif /* AG_LOAD_H */ diff --git a/src/include/utils/load/csv.h b/src/include/utils/load/csv.h deleted file mode 100644 index 062536977..000000000 --- a/src/include/utils/load/csv.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Created by Shoaib on 12/5/2021. -*/ - -/* -libcsv - parse and write csv data -Copyright (C) 2008-2021 Robert Gamble -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifndef LIBCSV_H__ -#define LIBCSV_H__ -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define CSV_MAJOR 3 -#define CSV_MINOR 0 -#define CSV_RELEASE 3 - -/* Error Codes */ -#define CSV_SUCCESS 0 -#define CSV_EPARSE 1 /* Parse error in strict mode */ -#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ -#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ -#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ - - -/* parser options */ -#define CSV_STRICT 1 /* enable strict mode */ -#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ -#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last - field is quoted and doesn't contain ending - quote */ -#define CSV_APPEND_NULL 8 /* Ensure that all fields are null-terminated */ -#define CSV_EMPTY_IS_NULL 16 /* Pass null pointer to cb1 function when - empty, unquoted fields are encountered */ - - -/* Character values */ -#define CSV_TAB 0x09 -#define CSV_SPACE 0x20 -#define CSV_CR 0x0d -#define CSV_LF 0x0a -#define CSV_COMMA 0x2c -#define CSV_QUOTE 0x22 - -struct csv_parser { - int pstate; /* Parser state */ - int quoted; /* Is the current field a quoted field? */ - size_t spaces; /* Number of continuous spaces after quote or in a non-quoted field */ - unsigned char * entry_buf; /* Entry buffer */ - size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ - size_t entry_size; /* Size of entry buffer */ - int status; /* Operation status */ - unsigned char options; - unsigned char quote_char; - unsigned char delim_char; - int (*is_space)(unsigned char); - int (*is_term)(unsigned char); - size_t blk_size; - void *(*malloc_func)(size_t); /* not used */ - void *(*realloc_func)(void *, size_t); /* function used to allocate buffer memory */ - void (*free_func)(void *); /* function used to free buffer memory */ -}; - -/* Function Prototypes */ -int csv_init(struct csv_parser *p, unsigned char options); -int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -void csv_free(struct csv_parser *p); -int csv_error(const struct csv_parser *p); -const char * csv_strerror(int error); -size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size); -int csv_fwrite(FILE *fp, const void *src, size_t src_size); -size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote); -int csv_fwrite2(FILE *fp, const void *src, size_t src_size, unsigned char quote); -int csv_get_opts(const struct csv_parser *p); -int csv_set_opts(struct csv_parser *p, unsigned char options); -void csv_set_delim(struct csv_parser *p, unsigned char c); -void csv_set_quote(struct csv_parser *p, unsigned char c); -unsigned char csv_get_delim(const struct csv_parser *p); -unsigned char csv_get_quote(const struct csv_parser *p); -void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_realloc_func(struct csv_parser *p, void *(*)(void *, size_t)); -void csv_set_free_func(struct csv_parser *p, void (*)(void *)); -void csv_set_blk_size(struct csv_parser *p, size_t); -size_t csv_get_buffer_size(const struct csv_parser *p); - -#ifdef __cplusplus -} -#endif - -#endif From ba60c8560f860dc7983406dfa1b6e9e6e45a4e64 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Tue, 20 Jan 2026 23:44:19 +0500 Subject: [PATCH 11/11] Add RLS support and fix permission checks (#2309) - Previously, age only set ACL_SELECT and ACL_INSERT in RTEPermissionInfo, bypassing pg's privilege checking for DELETE and UPDATE operations. - Additionally, RLS policies were not enforced because AGE uses CMD_SELECT for all Cypher queries, causing the rewriter to skip RLS policy application. Permission fixes: - Add ACL_DELETE permission flag for DELETE clause operations - Add ACL_UPDATE permission flag for SET/REMOVE clause operations - Recursively search RTEs including subqueries for permission info RLS support: - Implemented at executor level because age transforms all cypher queries to CMD_SELECT, so pg's rewriter never adds RLS policies for INSERT/UPDATE/DELETE operations. There isnt an appropriate rewriter hook to modify this behavior, so we do it in executor instead. - Add setup_wcos() to apply WITH CHECK policies at execution time for CREATE, SET, and MERGE operations - Add setup_security_quals() and check_security_quals() to apply USING policies for UPDATE and DELETE operations - USING policies silently filter rows (matching pg behavior) - WITH CHECK policies raise errors on violation - DETACH DELETE raises error if edge RLS blocks deletion to prevent dangling edges - Add permission checks and rls in startnode/endnode functions - Add regression tests Assisted-by AI Resolved Conflicts: src/backend/executor/cypher_create.c src/backend/executor/cypher_delete.c src/backend/executor/cypher_merge.c src/backend/executor/cypher_set.c src/backend/executor/cypher_utils.c --- Makefile | 3 +- regress/expected/security.out | 1657 ++++++++++++++++++++++++++ regress/sql/security.sql | 1451 ++++++++++++++++++++++ src/backend/executor/cypher_create.c | 7 + src/backend/executor/cypher_delete.c | 97 ++ src/backend/executor/cypher_merge.c | 9 +- src/backend/executor/cypher_set.c | 88 +- src/backend/executor/cypher_utils.c | 779 ++++++++++++ src/backend/parser/cypher_clause.c | 103 ++ src/backend/utils/adt/agtype.c | 27 +- src/include/executor/cypher_utils.h | 22 + 11 files changed, 4238 insertions(+), 5 deletions(-) create mode 100644 regress/expected/security.out create mode 100644 regress/sql/security.sql diff --git a/Makefile b/Makefile index a8faa2bb8..2d2912571 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ REGRESS = scan \ jsonb_operators \ list_comprehension \ map_projection \ - direct_field_access + direct_field_access \ + security ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/security.out b/regress/expected/security.out new file mode 100644 index 000000000..59e58cb05 --- /dev/null +++ b/regress/expected/security.out @@ -0,0 +1,1657 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- Test Privileges +-- +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); +NOTICE: graph "security_test" has been created + create_graph +-------------- + +(1 row) + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- +-- Create test roles with different permission levels +-- +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ +SET ROLE security_test_noread; +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); +ERROR: permission denied for table Person +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); +ERROR: permission denied for table _ag_label_vertex +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); +ERROR: permission denied for table _ag_label_vertex +RESET ROLE; +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; +SET ROLE security_test_base_only; +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex + {"id": 844424930131970, "label": "Person", "properties": {"age": 25, "name": "Bob"}}::vertex + {"id": 1125899906842625, "label": "Document", "properties": {"title": "Secret", "content": "classified"}}::vertex +(3 rows) + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + e +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge + {"id": 1688849860263937, "label": "OWNS", "end_id": 1125899906842625, "start_id": 844424930131969, "properties": {}}::edge +(2 rows) + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+---------- + "Alice" | "Secret" +(1 row) + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); +ERROR: permission denied for table Person +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); +ERROR: permission denied for schema security_test +LINE 1: SELECT * FROM cypher('security_test', $$ + ^ +RESET ROLE; +SET ROLE security_test_insert; +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-----------+----- + "Charlie" | 35 +(1 row) + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +SET ROLE security_test_update; +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Alice" | 31 +(1 row) + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+----------- + "Bob" | "reading" +(1 row) + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+------- + "Bob" | +(1 row) + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_delete; +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ +RESET ROLE; +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_insert; +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-------+----- + "Eve" | 28 +(1 row) + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + name +------- + "Eve" +(1 row) + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_full; +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Frank" | 51 +(1 row) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ +RESET ROLE; +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; +SET ROLE security_test_person_only; +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + name +--------- + "Alice" +(1 row) + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); +ERROR: permission denied for table Document +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); +ERROR: permission denied for table Document +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ +RESET ROLE; +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; +SET ROLE security_test_noexec; +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); +ERROR: permission denied for function drop_graph +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); +ERROR: permission denied for function create_vlabel +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); +ERROR: permission denied for function create_elabel +RESET ROLE; +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; +SET ROLE security_test_noexec; +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); +ERROR: permission denied for schema security_test +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +RESET ROLE; +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" +SET ROLE security_test_edge_only; +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: permission denied for table Person +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: permission denied for table Person +RESET ROLE; +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; +SET ROLE security_test_edge_only; +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+---------- + "Alice" | "Bob" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- Cleanup +-- ============================================================================ +RESET ROLE; +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; +-- Drop test graph +SELECT drop_graph('security_test', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table security_test._ag_label_vertex +drop cascades to table security_test._ag_label_edge +drop cascades to table security_test."Person" +drop cascades to table security_test."Document" +drop cascades to table security_test."KNOWS" +drop cascades to table security_test."OWNS" +NOTICE: graph "security_test" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Row-Level Security (RLS) Tests +-- +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); +NOTICE: graph "rls_graph" has been created + create_graph +-------------- + +(1 row) + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Bob" + "Diana" +(2 rows) + +RESET ROLE; +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +---------------- + "User1Created" +(1 row) + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + name +------ +(0 rows) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true +(1 row) + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +------+--------- +(0 rows) + +RESET ROLE; +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true + "Bob" | +(2 rows) + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner +SET ROLE rls_user1; +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + name | verified +---------+---------- + "Alice" | true +(1 row) + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + owner +------------- + "rls_user1" +(1 row) + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+---------- + "Alice" | "active" +(1 row) + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +------+-------- +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ +-- Keep existing update policy, test REMOVE operation +SET ROLE rls_user1; +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+-------- + "Alice" | +(1 row) + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + name | dept +------+------ +(0 rows) + +RESET ROLE; +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + dept +--------------- + "Engineering" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY person_insert_all ON rls_graph."Person"; +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest2" + "DeleteTest3" +(2 rows) + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; +SET ROLE rls_user1; +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest3" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +---------+------------- + "Alice" | "rls_user1" +(1 row) + +RESET ROLE; +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); +SET ROLE rls_user1; +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2021 + 2022 +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "KNOWS" +RESET ROLE; +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + strength +---------- + "strong" +(1 row) + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2021 | "updated" +(1 row) + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+------- +(0 rows) + +RESET ROLE; +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2020 | + 2021 | "updated" +(2 rows) + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); +SET ROLE rls_user1; +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); +ERROR: cannot delete edge due to row-level security policy on "KNOWS" +HINT: DETACH DELETE requires permission to delete all connected edges. +RESET ROLE; +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DetachTest1" +(1 row) + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); +SET ROLE rls_user1; +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + title +-------------- + "Public Doc" +(1 row) + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+-------------- + "Alice" | "Public Doc" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); +SET ROLE rls_user1; +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" +(3 rows) + +RESET ROLE; +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + name | level +-----------+------- + "Alice" | 1 + "Bob" | 2 + "Charlie" | 1 +(3 rows) + +RESET ROLE; +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +RESET ROLE; +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ +DROP POLICY person_very_restrictive ON rls_graph."Person"; +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +----------------+------------- + "NewFromMatch" | "rls_user1" +(1 row) + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true +(1 row) + +RESET ROLE; +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true + "Bob" | +(2 rows) + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: access to vertex 844424930131970 denied by row-level security policy on "Person" +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: access to vertex 844424930131969 denied by row-level security policy on "Person" +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+----------- + "Alice" | "Charlie" +(1 row) + +RESET ROLE; +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ +RESET ROLE; +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; +-- Drop test graph +SELECT drop_graph('rls_graph', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table rls_graph._ag_label_vertex +drop cascades to table rls_graph._ag_label_edge +drop cascades to table rls_graph."Person" +drop cascades to table rls_graph."Document" +drop cascades to table rls_graph."KNOWS" +drop cascades to table rls_graph."AUTHORED" +NOTICE: graph "rls_graph" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/security.sql b/regress/sql/security.sql new file mode 100644 index 000000000..344dd23d4 --- /dev/null +++ b/regress/sql/security.sql @@ -0,0 +1,1451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- Test Privileges +-- + +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + +-- +-- Create test roles with different permission levels +-- + +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; + +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; + +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; + +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; + +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; + +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; + +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables + +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ + +SET ROLE security_test_noread; + +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); + +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); + +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +RESET ROLE; + +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; + +SET ROLE security_test_base_only; + +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ + +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); + +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); + +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_delete; + +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ + +RESET ROLE; + +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_full; + +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ + +RESET ROLE; + +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; + +SET ROLE security_test_person_only; + +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); + +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); + +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ + +RESET ROLE; + +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; + +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; + +SET ROLE security_test_noexec; + +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); + +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); + +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); + +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); + +RESET ROLE; + +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; + +SET ROLE security_test_noexec; + +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); + +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); + +RESET ROLE; + +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; + +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ + +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" + +SET ROLE security_test_edge_only; + +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); + +RESET ROLE; + +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; + +SET ROLE security_test_edge_only; + +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +RESET ROLE; + +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; + +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; + +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; + +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; + +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; + +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; + +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; + +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; + +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; + +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; + +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; + +-- Drop test graph +SELECT drop_graph('security_test', true); + +-- +-- Row-Level Security (RLS) Tests +-- + +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS + +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; + +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ + +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; + +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ + +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; + +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +RESET ROLE; + +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +RESET ROLE; + +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; + +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner + +SET ROLE rls_user1; + +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); + +RESET ROLE; + +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; + +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ + +-- Keep existing update policy, test REMOVE operation + +SET ROLE rls_user1; + +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + +RESET ROLE; + +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; + +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +DROP POLICY person_insert_all ON rls_graph."Person"; + +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; + +SET ROLE rls_user1; + +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; + +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +RESET ROLE; + +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ + +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; + +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); + +SET ROLE rls_user1; + +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ + +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ + +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +RESET ROLE; + +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + +RESET ROLE; + +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ + +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); + +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); + +SET ROLE rls_user1; + +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ + +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; + +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); + +SET ROLE rls_user1; + +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ + +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; + +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); + +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); + +SET ROLE rls_user1; + +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + +RESET ROLE; + +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ + +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; + +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); + +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ + +DROP POLICY person_very_restrictive ON rls_graph."Person"; + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; + +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + +RESET ROLE; + +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; + +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ + +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; + +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; + +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; + +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); + +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; + +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ + +RESET ROLE; + +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; + +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; + +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; + +-- Drop test graph +SELECT drop_graph('rls_graph', true); diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 2031fe8d8..495eb3a08 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -122,6 +123,12 @@ static void begin_cypher_create(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } } diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index f86c6126b..0b486ad5e 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -22,6 +22,9 @@ #include "executor/executor.h" #include "storage/bufmgr.h" #include "common/hashfn.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -371,6 +374,16 @@ static void process_delete_list(CustomScanState *node) ExprContext *econtext = css->css.ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; EState *estate = node->ss.ps.state; + HTAB *qual_cache = NULL; + HASHCTL hashctl; + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("delete_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); foreach(lc, css->delete_data->delete_items) { @@ -383,6 +396,7 @@ static void process_delete_list(CustomScanState *node) char *label_name; Integer *pos; int entity_position; + Oid relid; item = lfirst(lc); @@ -401,6 +415,7 @@ static void process_delete_list(CustomScanState *node) label_name = pnstrdup(label->val.string.val, label->val.string.len); resultRelInfo = create_entity_result_rel_info(estate, css->delete_data->graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); /* * Setup the scan key to require the id field on-disc to match the @@ -448,6 +463,36 @@ static void process_delete_list(CustomScanState *node) continue; } + /* Check RLS security quals (USING policy) before delete */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + bool found; + + /* Get cached security quals and slot for this label */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_DELETE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + entry->withCheckOptions = NIL; + entry->withCheckOptionExprs = NIL; + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + + /* Silently skip if USING policy filters out this row */ + if (!check_security_quals(entry->qualExprs, entry->slot, econtext)) + { + table_endscan(scan_desc); + destroy_entity_result_rel_info(resultRelInfo); + continue; + } + } + /* * For vertices, we insert the vertex ID in the hashtable * vertex_id_htab. This hashtable is used later to process @@ -467,6 +512,9 @@ static void process_delete_list(CustomScanState *node) table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } + + /* Clean up the cache */ + hash_destroy(qual_cache); } /* @@ -490,9 +538,14 @@ static void check_for_connected_edges(CustomScanState *node) TableScanDesc scan_desc; HeapTuple tuple; TupleTableSlot *slot; + Oid relid; + bool rls_enabled = false; + List *qualExprs = NIL; + ExprContext *econtext = NULL; resultRelInfo = create_entity_result_rel_info(estate, graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); estate->es_snapshot->curcid = GetCurrentCommandId(false); estate->es_output_cid = GetCurrentCommandId(false); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, @@ -501,6 +554,22 @@ static void check_for_connected_edges(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* + * For DETACH DELETE with RLS enabled, compile the security qual + * expressions once per label for efficient evaluation. + */ + if (css->delete_data->detach) + { + /* Setup RLS security quals for this label */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + rls_enabled = true; + econtext = css->css.ss.ps.ps_ExprContext; + qualExprs = setup_security_quals(resultRelInfo, estate, node, + CMD_DELETE); + } + } + /* for each row */ while (true) { @@ -538,6 +607,34 @@ static void check_for_connected_edges(CustomScanState *node) { if (css->delete_data->detach) { + AclResult aclresult; + + /* Check that the user has DELETE permission on the edge table */ + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_DELETE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, label_name); + } + + /* Check RLS security quals (USING policy) before delete */ + if (rls_enabled) + { + /* + * For DETACH DELETE, error out if edge RLS check fails. + * Unlike normal DELETE which silently skips, we cannot + * silently skip edges here as it would leave dangling + * edges pointing to deleted vertices. + */ + if (!check_security_quals(qualExprs, slot, econtext)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot delete edge due to row-level security policy on \"%s\"", + label_name), + errhint("DETACH DELETE requires permission to delete all connected edges."))); + } + } + delete_entity(estate, resultRelInfo, tuple); } else diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 6cfa70d48..a1bb4686c 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -20,11 +20,12 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/datum.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" -#include "utils/datum.h" /* * The following structure is used to hold a single vertex or edge component @@ -182,6 +183,12 @@ static void begin_cypher_merge(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } /* diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index 9fd599eed..a1063af32 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -19,8 +19,10 @@ #include "postgres.h" +#include "common/hashfn.h" #include "executor/executor.h" #include "storage/bufmgr.h" +#include "utils/rls.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" @@ -136,6 +138,13 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + result = table_tuple_update(resultRelInfo->ri_RelationDesc, &tuple->t_self, elemTupleSlot, cid, estate->es_snapshot, @@ -372,9 +381,20 @@ static void process_update_list(CustomScanState *node) EState *estate = css->css.ss.ps.state; int *luindex = NULL; int lidx = 0; + HTAB *qual_cache = NULL; + HASHCTL hashctl; /* allocate an array to hold the last update index of each 'entity' */ luindex = palloc0(sizeof(int) * scanTupleSlot->tts_nvalid); + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("update_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + /* * Iterate through the SET items list and store the loop index of each * 'entity' update. As there is only one entry for each entity, this will @@ -522,6 +542,38 @@ static void process_update_list(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* Setup RLS policies if RLS is enabled */ + if (check_enable_rls(resultRelInfo->ri_RelationDesc->rd_id, + InvalidOid, true) == RLS_ENABLED) + { + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + RLSCacheEntry *entry; + bool found; + + /* Get cached RLS state for this label, or set it up */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + /* Setup WITH CHECK policies */ + setup_wcos(resultRelInfo, estate, node, CMD_UPDATE); + entry->withCheckOptions = resultRelInfo->ri_WithCheckOptions; + entry->withCheckOptionExprs = resultRelInfo->ri_WithCheckOptionExprs; + + /* Setup security quals */ + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_UPDATE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + } + else + { + /* Use cached WCOs */ + resultRelInfo->ri_WithCheckOptions = entry->withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = entry->withCheckOptionExprs; + } + } + /* * Now that we have the updated properties, create a either a vertex or * edge Datum for the in-memory update, and setup the tupleTableSlot @@ -597,8 +649,36 @@ static void process_update_list(CustomScanState *node) */ if (HeapTupleIsValid(heap_tuple)) { - heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, - heap_tuple); + bool should_update = true; + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + + /* Check RLS security quals (USING policy) before update */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + + /* Entry was already created earlier when setting up WCOs */ + entry = hash_search(qual_cache, &relid, HASH_FIND, NULL); + if (!entry) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("missing RLS cache entry for relation %u", + relid))); + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + should_update = check_security_quals(entry->qualExprs, + entry->slot, + econtext); + } + + /* Silently skip if USING policy filters out this row */ + if (should_update) + { + heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, + heap_tuple); + } } /* close the ScanDescription */ table_endscan(scan_desc); @@ -612,6 +692,10 @@ static void process_update_list(CustomScanState *node) /* increment loop index */ lidx++; } + + /* Clean up the cache */ + hash_destroy(qual_cache); + /* free our lookup array */ pfree_if_not_null(luindex); } diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index d7a55f709..eff829925 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -25,14 +25,35 @@ #include "postgres.h" #include "executor/executor.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rowsecurity.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" #include "executor/cypher_utils.h" #include "utils/ag_cache.h" +/* RLS helper function declarations */ +static void get_policies_for_relation(Relation relation, CmdType cmd, + Oid user_id, List **permissive_policies, + List **restrictive_policies); +static void add_with_check_options(Relation rel, int rt_index, WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, bool *hasSubLinks, + bool force_using); +static void add_security_quals(int rt_index, List *permissive_policies, + List *restrictive_policies, + List **securityQuals, bool *hasSubLinks); +static void sort_policies_by_name(List *policies); +static int row_security_policy_cmp(const ListCell *a, const ListCell *b); +static bool check_role_for_policy(ArrayType *policy_roles, Oid user_id); + /* * Given the graph name and the label name, create a ResultRelInfo for the table * those two variables represent. Open the Indices too. @@ -256,6 +277,13 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_INSERT_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + /* Insert the tuple normally */ table_tuple_insert(resultRelInfo->ri_RelationDesc, elemTupleSlot, cid, 0, NULL); @@ -269,3 +297,754 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, return tuple; } + +/* + * setup_wcos + * + * WithCheckOptions are added during the rewrite phase, but since AGE uses + * CMD_SELECT for all queries, WCOs don't get added for CREATE/SET/MERGE + * operations. This function compensates by adding WCOs at execution time. + * + * Based on PostgreSQL's row security implementation in rowsecurity.c + */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *withCheckOptions = NIL; + List *wcoExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + WCOKind wco_kind; + bool hasSubLinks = false; + + /* Determine the WCO kind based on command type */ + if (cmd == CMD_INSERT) + { + wco_kind = WCO_RLS_INSERT_CHECK; + } + else if (cmd == CMD_UPDATE) + { + wco_kind = WCO_RLS_UPDATE_CHECK; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_wcos"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build WithCheckOptions from the policies */ + add_with_check_options(rel, rt_index, wco_kind, + permissive_policies, + restrictive_policies, + &withCheckOptions, + &hasSubLinks, + false); + + /* Compile the WCO expressions */ + foreach(lc, withCheckOptions) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, lc); + ExprState *wcoExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(wco->qual, List)) + { + wco->qual = (Node *) list_make1(wco->qual); + } + + wcoExpr = ExecInitQual((List *) wco->qual, (PlanState *) node); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + /* Set up the ResultRelInfo with WCOs */ + resultRelInfo->ri_WithCheckOptions = withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; +} + +/* + * get_policies_for_relation + * + * Returns lists of permissive and restrictive policies to be applied to the + * specified relation, based on the command type and role. + * + * This includes any policies added by extensions. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id, + List **permissive_policies, + List **restrictive_policies) +{ + ListCell *item; + + *permissive_policies = NIL; + *restrictive_policies = NIL; + + /* No policies if RLS descriptor is not present */ + if (relation->rd_rsdesc == NULL) + { + return; + } + + /* First find all internal policies for the relation. */ + foreach(item, relation->rd_rsdesc->policies) + { + bool cmd_matches = false; + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + /* Always add ALL policies, if they exist. */ + if (policy->polcmd == '*') + { + cmd_matches = true; + } + else + { + /* Check whether the policy applies to the specified command type */ + switch (cmd) + { + case CMD_SELECT: + if (policy->polcmd == ACL_SELECT_CHR) + { + cmd_matches = true; + } + break; + case CMD_INSERT: + if (policy->polcmd == ACL_INSERT_CHR) + { + cmd_matches = true; + } + break; + case CMD_UPDATE: + if (policy->polcmd == ACL_UPDATE_CHR) + { + cmd_matches = true; + } + break; + case CMD_DELETE: + if (policy->polcmd == ACL_DELETE_CHR) + { + cmd_matches = true; + } + break; + case CMD_MERGE: + /* + * We do not support a separate policy for MERGE command. + * Instead it derives from the policies defined for other + * commands. + */ + break; + default: + elog(ERROR, "unrecognized policy command type %d", + (int) cmd); + break; + } + } + + /* + * Add this policy to the relevant list of policies if it applies to + * the specified role. + */ + if (cmd_matches && check_role_for_policy(policy->roles, user_id)) + { + if (policy->permissive) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + else + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + /* + * We sort restrictive policies by name so that any WCOs they generate are + * checked in a well-defined order. + */ + sort_policies_by_name(*restrictive_policies); + + /* + * Then add any permissive or restrictive policies defined by extensions. + * These are simply appended to the lists of internal policies, if they + * apply to the specified role. + */ + if (row_security_policy_hook_restrictive) + { + List *hook_policies = + (*row_security_policy_hook_restrictive) (cmd, relation); + + /* + * As with built-in restrictive policies, we sort any hook-provided + * restrictive policies by name also. Note that we also intentionally + * always check all built-in restrictive policies, in name order, + * before checking restrictive policies added by hooks, in name order. + */ + sort_policies_by_name(hook_policies); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + if (row_security_policy_hook_permissive) + { + List *hook_policies = + (*row_security_policy_hook_permissive) (cmd, relation); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + } + } +} + +/* + * add_with_check_options + * + * Add WithCheckOptions of the specified kind to check that new records + * added by an INSERT or UPDATE are consistent with the specified RLS + * policies. Normally new data must satisfy the WITH CHECK clauses from the + * policies. If a policy has no explicit WITH CHECK clause, its USING clause + * is used instead. In the special case of an UPDATE arising from an + * INSERT ... ON CONFLICT DO UPDATE, existing records are first checked using + * a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING + * clauses from RLS policies. + * + * New WCOs are added to withCheckOptions, and hasSubLinks is set to true if + * any of the check clauses added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_with_check_options(Relation rel, + int rt_index, + WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, + bool *hasSubLinks, + bool force_using) +{ + ListCell *item; + List *permissive_quals = NIL; + +#define QUAL_FOR_WCO(policy) \ + ( !force_using && \ + (policy)->with_check_qual != NULL ? \ + (policy)->with_check_qual : (policy)->qual ) + + /* + * First collect up the permissive policy clauses, similar to + * add_security_quals. + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + permissive_quals = lappend(permissive_quals, copyObject(qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * There must be at least one permissive qual found or no rows are allowed + * to be added. This is the same as in add_security_quals. + * + * If there are no permissive_quals then we fall through and return a + * single 'false' WCO, preventing all new rows. + */ + if (permissive_quals != NIL) + { + /* + * Add a single WithCheckOption for all the permissive policy clauses, + * combining them together using OR. This check has no policy name, + * since if the check fails it means that no policy granted permission + * to perform the update, rather than any particular policy being + * violated. + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->cascaded = false; + + if (list_length(permissive_quals) == 1) + { + wco->qual = (Node *) linitial(permissive_quals); + } + else + { + wco->qual = (Node *) makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes(wco->qual, 1, rt_index, 0); + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + + /* + * Now add WithCheckOptions for each of the restrictive policy clauses + * (which will be combined together using AND). We use a separate + * WithCheckOption for each restrictive policy to allow the policy + * name to be included in error reports if the policy is violated. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + qual = copyObject(qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = pstrdup(policy->policy_name); + wco->qual = (Node *) qual; + wco->cascaded = false; + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + *hasSubLinks |= policy->hassublinks; + } + } + } + else + { + /* + * If there were no policy clauses to check new data, add a single + * always-false WCO (a default-deny policy). + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->qual = (Node *) makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true); + wco->cascaded = false; + + *withCheckOptions = lappend(*withCheckOptions, wco); + } +} + +/* + * sort_policies_by_name + * + * This is only used for restrictive policies, ensuring that any + * WithCheckOptions they generate are applied in a well-defined order. + * This is not necessary for permissive policies, since they are all combined + * together using OR into a single WithCheckOption check. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +sort_policies_by_name(List *policies) +{ + list_sort(policies, row_security_policy_cmp); +} + +/* + * list_sort comparator to sort RowSecurityPolicy entries by name + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static int +row_security_policy_cmp(const ListCell *a, const ListCell *b) +{ + const RowSecurityPolicy *pa = (const RowSecurityPolicy *) lfirst(a); + const RowSecurityPolicy *pb = (const RowSecurityPolicy *) lfirst(b); + + /* Guard against NULL policy names from extensions */ + if (pa->policy_name == NULL) + { + return pb->policy_name == NULL ? 0 : 1; + } + if (pb->policy_name == NULL) + { + return -1; + } + + return strcmp(pa->policy_name, pb->policy_name); +} + +/* + * check_role_for_policy - + * determines if the policy should be applied for the current role + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static bool +check_role_for_policy(ArrayType *policy_roles, Oid user_id) +{ + int i; + Oid *roles = (Oid *) ARR_DATA_PTR(policy_roles); + + /* Quick fall-thru for policies applied to all roles */ + if (roles[0] == ACL_ID_PUBLIC) + { + return true; + } + + for (i = 0; i < ARR_DIMS(policy_roles)[0]; i++) + { + if (has_privs_of_role(user_id, roles[i])) + { + return true; + } + } + + return false; +} + +/* + * add_security_quals + * + * Add security quals to enforce the specified RLS policies, restricting + * access to existing data in a table. If there are no policies controlling + * access to the table, then all access is prohibited --- i.e., an implicit + * default-deny policy is used. + * + * New security quals are added to securityQuals, and hasSubLinks is set to + * true if any of the quals added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_security_quals(int rt_index, + List *permissive_policies, + List *restrictive_policies, + List **securityQuals, + bool *hasSubLinks) +{ + ListCell *item; + List *permissive_quals = NIL; + Expr *rowsec_expr; + + /* + * First collect up the permissive quals. If we do not find any + * permissive policies then no rows are visible (this is handled below). + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (policy->qual != NULL) + { + permissive_quals = lappend(permissive_quals, + copyObject(policy->qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * We must have permissive quals, always, or no rows are visible. + * + * If we do not, then we simply return a single 'false' qual which results + * in no rows being visible. + */ + if (permissive_quals != NIL) + { + /* + * We now know that permissive policies exist, so we can now add + * security quals based on the USING clauses from the restrictive + * policies. Since these need to be combined together using AND, we + * can just add them one at a time. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual; + + if (policy->qual != NULL) + { + qual = copyObject(policy->qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + *securityQuals = list_append_unique(*securityQuals, qual); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * Then add a single security qual combining together the USING + * clauses from all the permissive policies using OR. + */ + if (list_length(permissive_quals) == 1) + { + rowsec_expr = (Expr *) linitial(permissive_quals); + } + else + { + rowsec_expr = makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes((Node *) rowsec_expr, 1, rt_index, 0); + *securityQuals = list_append_unique(*securityQuals, rowsec_expr); + } + else + { + /* + * A permissive policy must exist for rows to be visible at all. + * Therefore, if there were no permissive policies found, return a + * single always-false clause. + */ + *securityQuals = lappend(*securityQuals, + makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true)); + } +} + +/* + * setup_security_quals + * + * Security quals (USING policies) are added during the rewrite phase, but + * since AGE uses CMD_SELECT for all queries, they don't get added for + * UPDATE/DELETE operations. This function sets up security quals at + * execution time to be evaluated against each tuple before modification. + * + * Returns a list of compiled ExprState for the security quals. + */ +List * +setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + List *qualExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + bool hasSubLinks = false; + + /* Only UPDATE and DELETE have security quals */ + if (cmd != CMD_UPDATE && cmd != CMD_DELETE) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_security_quals"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* If no RLS policies exist, return empty list */ + if (rel->rd_rsdesc == NULL) + { + return NIL; + } + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies */ + add_security_quals(rt_index, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* Compile the security qual expressions */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(qual, List)) + { + qual = (Expr *) list_make1(qual); + } + + qualExpr = ExecInitQual((List *) qual, (PlanState *) node); + qualExprs = lappend(qualExprs, qualExpr); + } + + return qualExprs; +} + +/* + * check_security_quals + * + * Evaluate security quals against a tuple. Returns true if all quals pass + * (row can be modified), false if any qual fails (row should be silently + * skipped). + * + * This matches PostgreSQL's behavior where USING expressions for UPDATE/DELETE + * silently filter rows rather than raising errors. + */ +bool +check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext) +{ + ListCell *lc; + TupleTableSlot *saved_scantuple; + bool result = true; + + if (qualExprs == NIL) + { + return true; + } + + /* Save and set up the scan tuple for expression evaluation */ + saved_scantuple = econtext->ecxt_scantuple; + econtext->ecxt_scantuple = slot; + + foreach(lc, qualExprs) + { + ExprState *qualExpr = (ExprState *) lfirst(lc); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + econtext->ecxt_scantuple = saved_scantuple; + return result; +} + +/* + * check_rls_for_tuple + * + * Check RLS policies for a tuple without needing full executor context. + * Used by standalone functions like startNode()/endNode() that access + * tables directly. + * + * Returns true if the tuple passes RLS checks (or if RLS is not enabled), + * false if the tuple should be filtered out. + */ +bool +check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + ListCell *lc; + Oid user_id; + bool hasSubLinks = false; + bool result = true; + EState *estate; + ExprContext *econtext; + TupleTableSlot *slot; + + /* If RLS is not enabled, tuple passes */ + if (check_enable_rls(RelationGetRelid(rel), InvalidOid, true) != RLS_ENABLED) + { + return true; + } + + /* If no RLS policies exist on the relation, tuple passes */ + if (rel->rd_rsdesc == NULL) + { + return true; + } + + /* Get the policies for the specified command type */ + user_id = GetUserId(); + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies (use rt_index=1) */ + add_security_quals(1, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* If no quals, tuple passes */ + if (securityQuals == NIL) + { + return true; + } + + /* Create minimal execution environment */ + estate = CreateExecutorState(); + econtext = CreateExprContext(estate); + + /* Create tuple slot and store the tuple */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + econtext->ecxt_scantuple = slot; + + /* Compile and evaluate each qual */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + List *qualList; + + /* ExecPrepareQual expects a List */ + if (!IsA(qual, List)) + { + qualList = list_make1(qual); + } + else + { + qualList = (List *) qual; + } + + /* Use ExecPrepareQual for standalone expression evaluation */ + qualExpr = ExecPrepareQual(qualList, estate); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + /* Clean up */ + ExecDropSingleTupleTableSlot(slot); + FreeExprContext(econtext, true); + FreeExecutorState(estate); + + return result; +} diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 9960acd7b..991e3f785 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -346,6 +346,100 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Recursively searches through RTEs including subqueries. + */ +static bool +add_rte_permissions_recurse(List *rtable, List *rteperminfos, + Oid relid, AclMode permissions) +{ + ListCell *lc; + + /* First check the perminfos at this level */ + foreach(lc, rteperminfos) + { + RTEPermissionInfo *perminfo = lfirst(lc); + + if (perminfo->relid == relid) + { + perminfo->requiredPerms |= permissions; + return true; + } + } + + /* Then recurse into subqueries */ + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + + if (rte->rtekind == RTE_SUBQUERY && rte->subquery != NULL) + { + if (add_rte_permissions_recurse(rte->subquery->rtable, + rte->subquery->rteperminfos, + relid, permissions)) + { + return true; + } + } + } + + return false; +} + +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Searches through p_rteperminfos and subqueries for a matching relOid + * and adds the specified permissions to requiredPerms. + */ +static void +add_rte_permissions(ParseState *pstate, Oid relid, AclMode permissions) +{ + add_rte_permissions_recurse(pstate->p_rtable, pstate->p_rteperminfos, + relid, permissions); +} + +/* + * Add required permissions to the label table for a given entity variable. + * Looks up the entity by variable name, extracts its label, and adds + * the specified permissions to the corresponding RTEPermissionInfo. + */ +static void +add_entity_permissions(cypher_parsestate *cpstate, char *var_name, + AclMode permissions) +{ + ParseState *pstate = (ParseState *)cpstate; + transform_entity *entity; + char *label = NULL; + Oid relid; + + entity = find_variable(cpstate, var_name); + if (entity == NULL) + { + return; + } + + if (entity->type == ENT_VERTEX) + { + label = entity->entity.node->label; + } + else if (entity->type == ENT_EDGE) + { + label = entity->entity.rel->label; + } + + if (label == NULL) + { + return; + } + + relid = get_label_relation(label, cpstate->graph_oid); + if (OidIsValid(relid)) + { + add_rte_permissions(pstate, relid, permissions); + } +} + /* * transform a cypher_clause */ @@ -1561,6 +1655,9 @@ static List *transform_cypher_delete_item_list(cypher_parsestate *cpstate, parser_errposition(pstate, col->location))); } + /* Add ACL_DELETE permission to the entity's label table */ + add_entity_permissions(cpstate, val->sval, ACL_DELETE); + add_volatile_wrapper_to_target_entry(query->targetList, resno); pos = makeInteger(resno); @@ -1726,6 +1823,9 @@ cypher_update_information *transform_cypher_remove_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); @@ -1903,6 +2003,9 @@ cypher_update_information *transform_cypher_set_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index f2458a30b..c552727d8 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -44,7 +44,10 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/parse_coerce.h" +#include "nodes/nodes.h" +#include "utils/acl.h" #include "utils/builtins.h" +#include "executor/cypher_utils.h" #include "utils/float.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" @@ -5625,15 +5628,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, HeapTuple tuple; TupleDesc tupdesc; Datum id, properties, result; + AclResult aclresult; /* get the specific graph namespace (schema) */ Oid graph_namespace_oid = get_namespace_oid(graph, false); /* get the specific vertex label table (schema.vertex_label) */ Oid vertex_label_table_oid = get_relname_relid(vertex_label, - graph_namespace_oid); + graph_namespace_oid); /* get the active snapshot */ Snapshot snapshot = GetActiveSnapshot(); + /* check for SELECT permission on the table */ + aclresult = pg_class_aclcheck(vertex_label_table_oid, GetUserId(), + ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, vertex_label); + } + /* initialize the scan key */ ScanKeyInit(&scan_keys[0], 1, BTEqualStrategyNumber, F_OIDEQ, Int64GetDatum(graphid)); @@ -5646,11 +5658,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, /* bail if the tuple isn't valid */ if (!HeapTupleIsValid(tuple)) { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("graphid %lu does not exist", graphid))); } + /* Check RLS policies - error if filtered out */ + if (!check_rls_for_tuple(graph_vertex_label, tuple, CMD_SELECT)) + { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("access to vertex %lu denied by row-level security policy on \"%s\"", + graphid, vertex_label))); + } + /* get the tupdesc - we don't need to release this one */ tupdesc = RelationGetDescr(graph_vertex_label); /* bail if the number of columns differs */ diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 0798f153c..fc4067455 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -21,6 +21,7 @@ #define AG_CYPHER_UTILS_H #include "access/heapam.h" +#include "nodes/execnodes.h" #include "nodes/cypher_nodes.h" #include "utils/agtype.h" @@ -127,4 +128,25 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, TupleTableSlot *elemTupleSlot, EState *estate, CommandId cid); +/* RLS support */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +List *setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +bool check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext); +bool check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd); + +/* Hash table entry for caching RLS state per label */ +typedef struct RLSCacheEntry +{ + Oid relid; /* hash key */ + /* Security quals (USING policies) for UPDATE/DELETE */ + List *qualExprs; + TupleTableSlot *slot; /* slot for old tuple (RLS check) */ + /* WCOs - used only in SET */ + List *withCheckOptions; + List *withCheckOptionExprs; +} RLSCacheEntry; + #endif