diff --git a/Makefile b/Makefile index 3e73f3e68..2d2912571 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ OBJS = src/backend/age.o \ src/backend/utils/load/ag_load_labels.o \ src/backend/utils/load/ag_load_edges.o \ src/backend/utils/load/age_load.o \ - src/backend/utils/load/libcsv.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o @@ -112,7 +111,9 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection + map_projection \ + direct_field_access \ + security ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) @@ -138,6 +139,10 @@ PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4) +# When SIZEOF_DATUM=4, PASSEDBYVALUE is stripped from graphid type for pass-by-reference. +# If not specified, normal 64-bit behavior is used (PASSEDBYVALUE preserved). + src/backend/parser/cypher_keywords.o: src/include/parser/cypher_kwlist_d.h src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_KEYWORDLIST_DEPS) @@ -147,11 +152,19 @@ src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c +src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h -$(age_sql): +# Strip PASSEDBYVALUE on 32-bit (SIZEOF_DATUM=4) for graphid pass-by-reference +$(age_sql): $(SQLS) @cat $(SQLS) > $@ +ifeq ($(SIZEOF_DATUM),4) + @echo "32-bit build: removing PASSEDBYVALUE from graphid type" + @sed 's/^ PASSEDBYVALUE,$$/ -- PASSEDBYVALUE removed for 32-bit (see Makefile)/' $@ > $@.tmp && mv $@.tmp $@ + @grep -q 'PASSEDBYVALUE removed for 32-bit' $@ || { echo "Error: PASSEDBYVALUE replacement failed in $@"; exit 1; } +endif src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes diff --git a/drivers/nodejs/package.json b/drivers/nodejs/package.json index 9f88bc2ba..6be11c780 100644 --- a/drivers/nodejs/package.json +++ b/drivers/nodejs/package.json @@ -33,7 +33,7 @@ "pg": ">=6.0.0" }, "devDependencies": { - "@types/jest": "^26.0.20", + "@types/jest": "^29.5.14", "@types/pg": "^7.14.10", "@typescript-eslint/eslint-plugin": "^4.22.1", "@typescript-eslint/parser": "^4.22.1", @@ -44,8 +44,8 @@ "eslint-plugin-jest": "^24.3.6", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", - "jest": "^26.6.3", - "ts-jest": "^26.5.1", - "typescript": "^4.1.5" + "jest": "^29.7.0", + "ts-jest": "^29.4.6", + "typescript": "^4.9.5" } } diff --git a/drivers/python/README.md b/drivers/python/README.md index 749b44bfb..e64f9de67 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -28,11 +28,11 @@ AGType parser and driver support for [Apache AGE](https://age.apache.org/), grap ### Features * Unmarshal AGE result data(AGType) to Vertex, Edge, Path -* Cypher query support for Psycopg2 PostgreSQL driver (enables to use cypher queries directly) +* Cypher query support for Psycopg3 PostgreSQL driver (enables to use cypher queries directly) ### Prerequisites * over Python 3.9 -* This module runs on [psycopg2](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) +* This module runs on [psycopg3](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) ``` sudo apt-get update sudo apt-get install python3-dev libpq-dev @@ -80,7 +80,7 @@ SET search_path = ag_catalog, "$user", public; ``` ### Usage -* If you are not familiar with Psycopg2 driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) +* If you are not familiar with Psycopg driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) * Simpler way to access Apache AGE [AGE Sample](samples/apache-age-note.ipynb) in Samples. * Agtype converting samples: [Agtype Sample](samples/apache-age-agtypes.ipynb) in Samples. @@ -119,7 +119,7 @@ Here the following value required Insert From networkx directed graph into an AGE database. #### Parameters -- `connection` (psycopg2.connect): Connection object to the AGE database. +- `connection` (psycopg.connect): Connection object to the AGE database. - `G` (networkx.DiGraph): Networkx directed graph to be converted and inserted. @@ -152,7 +152,7 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. #### Parameters -- `connection` (psycopg2.connect): Connection object to the PostgreSQL database. +- `connection` (psycopg.connect): Connection object to the PostgreSQL database. - `graphName` (str): Name of the graph. - `G` (None | nx.DiGraph): Optional Networkx directed graph. If provided, the data will be added to this graph. - `query` (str | None): Optional Cypher query to retrieve data from the database. @@ -167,3 +167,4 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. # Call the function to convert data into a Networkx graph graph = age_to_networkx(connection, graphName="MyGraph" ) ``` + diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 55d1ff1d6..1f76c31ce 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -454,6 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Test security and permissions +-- +SELECT create_graph('agload_security'); +NOTICE: graph "agload_security" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_security', 'SecEdge'); +NOTICE: ELabel "SecEdge" has been created + create_elabel +--------------- + +(1 row) + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +RESET ROLE; +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied for table Person1 +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied for table SecEdge +RESET ROLE; +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +RESET ROLE; +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 3: Row-Level Security (RLS) +-- +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; +-- Switch to load_test_user +SET ROLE load_test_user; +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +RESET ROLE; +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 4: Constraint checking (CHECK constraint) +-- +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: new row for relation "Person1" violates check constraint "check_bool_true" +DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...). +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true" +DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}). +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_security._ag_label_vertex +drop cascades to table agload_security._ag_label_edge +drop cascades to table agload_security."Person1" +drop cascades to table agload_security."Person2" +drop cascades to table agload_security."SecEdge" +NOTICE: graph "agload_security" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/expected/cypher_set.out b/regress/expected/cypher_set.out index 1d24a7f9b..239234ed6 100644 --- a/regress/expected/cypher_set.out +++ b/regress/expected/cypher_set.out @@ -988,6 +988,245 @@ SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); --- (0 rows) +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); +NOTICE: graph "issue_1884" has been created + create_graph +-------------- + +(1 row) + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1", "self": {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1"}}::vertex}}::vertex +(1 row) + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + a | e | b +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "A"}}::vertex | {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge | {"id": 1125899906842626, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "B"}}::vertex +(1 row) + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + e +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1970324836974593, "label": "REL", "end_id": 1688849860263938, "start_id": 1688849860263937, "properties": {"dst": {"id": 1688849860263938, "label": "TestA3", "properties": {"name": "Y"}}::vertex, "src": {"id": 1688849860263937, "label": "TestA3", "properties": {"name": "X"}}::vertex}}::edge +(1 row) + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1, "copy": {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex}}::vertex +(1 row) + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + copy +------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex +(1 row) + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + a +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2533274790395905, "label": "TestB2", "properties": {"name": "start", "myEdge": {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge}}::vertex +(1 row) + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + edge +-------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge +(1 row) + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex +(1 row) + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {"myPath": [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path}}::vertex +(1 row) + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path +(1 row) + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"arr": [{"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex], "tag": "arrtest"}}::vertex +(1 row) + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + elem +--------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex +(1 row) + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + result +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"obj": {"node": {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex}, "tag": "maptest"}}::vertex +(1 row) + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + node +--------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4222124650659841, "label": "TestD1", "properties": {"ref": {"id": 4222124650659841, "label": "TestD1", "properties": {"name": "merged"}}::vertex, "name": "merged"}}::vertex +(1 row) + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4503599627370497, "label": "TestD2", "properties": {"ref": {"id": 4503599627370497, "label": "TestD2", "properties": {"name": "created"}}::vertex, "name": "created"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4785074604081153, "label": "TestE1", "properties": {"myId": 4785074604081153, "name": "functest", "myLabel": "TestE1"}}::vertex +(1 row) + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex +(1 row) + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 5066549580791809, "label": "TestE2", "properties": {"pathRels": [{"id": 5348024557502465, "label": "E2REL", "end_id": 5066549580791810, "start_id": 5066549580791809, "properties": {}}::edge], "pathNodes": [{"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex, {"id": 5066549580791810, "label": "TestE2", "properties": {}}::vertex]}}::vertex +(1 row) + -- -- Clean up -- @@ -1038,6 +1277,33 @@ NOTICE: graph "issue_1634" has been dropped (1 row) +SELECT drop_graph('issue_1884', true); +NOTICE: drop cascades to 19 other objects +DETAIL: drop cascades to table issue_1884._ag_label_vertex +drop cascades to table issue_1884._ag_label_edge +drop cascades to table issue_1884."TestA1" +drop cascades to table issue_1884."TestA2" +drop cascades to table issue_1884."LINK" +drop cascades to table issue_1884."TestA3" +drop cascades to table issue_1884."REL" +drop cascades to table issue_1884."TestB1" +drop cascades to table issue_1884."TestB2" +drop cascades to table issue_1884."B2REL" +drop cascades to table issue_1884."TestB3" +drop cascades to table issue_1884."B3REL" +drop cascades to table issue_1884."TestC1" +drop cascades to table issue_1884."TestC2" +drop cascades to table issue_1884."TestD1" +drop cascades to table issue_1884."TestD2" +drop cascades to table issue_1884."TestE1" +drop cascades to table issue_1884."TestE2" +drop cascades to table issue_1884."E2REL" +NOTICE: graph "issue_1884" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/expected/direct_field_access.out b/regress/expected/direct_field_access.out new file mode 100644 index 000000000..0a059cdd9 --- /dev/null +++ b/regress/expected/direct_field_access.out @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('direct_access'); +NOTICE: graph "direct_access" has been created + create_graph +-------------- + +(1 row) + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +-------+------+------+------- + false | true | true | false +(1 row) + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + lt | gt +------+------ + true | true +(1 row) + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + eq | ne +----+---- + | +(1 row) + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + n +--- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + n +--- + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1 +(9 rows) + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + s +---------- + "apple" + "banana" + "cherry" + "date" +(4 rows) + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + f +------ + 1.41 + 1.73 + 2.71 + 3.14 +(4 rows) + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + b +------- + false + false + true + true +(4 rows) + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + max_vertex +---------------------------------------------------------------------------------------------- + {"id": 844424930131973, "label": "Person", "properties": {"age": 32, "name": "Eve"}}::vertex +(1 row) + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + min_vertex +------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex +(1 row) + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + max_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131973, "properties": {"since": 2022}}::edge +(1 row) + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + min_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge +(1 row) + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" + "Eve" +(5 rows) + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + name +----------- + "Eve" + "Diana" + "Charlie" + "Bob" + "Alice" +(5 rows) + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + since +------- + 2020 + 2019 + 2021 + 2018 + 2022 +(5 rows) + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + a_name | b_name +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Alice" | "Diana" + "Alice" | "Eve" + "Bob" | "Charlie" + "Bob" | "Diana" + "Bob" | "Eve" + "Charlie" | "Diana" + "Charlie" | "Eve" + "Diana" | "Eve" +(10 rows) + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + eq | ne | lt | gt +------+------+------+------ + true | true | true | true +(1 row) + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + add | sub | mul | div +-----+-----+-----+----- + 15 | 5 | 50 | 2 +(1 row) + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + upper | lower | sz +---------+---------+---- + "HELLO" | "world" | 4 +(1 row) + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + int_val | float_val | str_val +---------+-----------+--------- + 42 | 3.14 | "42" +(1 row) + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + vertex_id +----------------- + 844424930131969 +(1 row) + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + edge_id +------------------ + 1125899906842625 +(1 row) + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + start_id | alice_id +-----------------+----------------- + 844424930131969 | 844424930131969 +(1 row) + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + end_id | bob_id +-----------------+----------------- + 844424930131970 | 844424930131970 +(1 row) + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + vertex_label +-------------- + "Person" +(1 row) + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + edge_label +------------ + "KNOWS" +(1 row) + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + vertex_props +------------------------------ + {"age": 30, "name": "Alice"} +(1 row) + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + edge_props +----------------- + {"since": 2020} +(1 row) + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + a_id | a_label | a_name | r_id | r_start | r_end | r_label | r_since | b_id | b_label | b_name +-----------------+----------+---------+------------------+-----------------+-----------------+---------+---------+-----------------+----------+-------- + 844424930131969 | "Person" | "Alice" | 1125899906842625 | 844424930131969 | 844424930131970 | "KNOWS" | 2020 | 844424930131970 | "Person" | "Bob" +(1 row) + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + eq | lt +------+------ + true | true +(1 row) + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + eq +------ + true +(1 row) + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + big_gt | neg_lt +--------+-------- + true | true +(1 row) + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + lt | eq +------+------ + true | true +(1 row) + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + zero_eq +--------- + true +(1 row) + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table direct_access._ag_label_vertex +drop cascades to table direct_access._ag_label_edge +drop cascades to table direct_access."Person" +drop cascades to table direct_access."KNOWS" +NOTICE: graph "direct_access" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 926a958d6..6d9341451 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -319,6 +319,50 @@ $$RETURN 1 IN [[null]]$$) AS r(c boolean); f (1 row) +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -9155,9 +9199,37 @@ ERROR: could not find rte for x LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... ^ HINT: variable x does not exist within scope of usage +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +NOTICE: graph "issue_2289" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + v +------- + false +(1 row) + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2289._ag_label_vertex +drop cascades to table issue_2289._ag_label_edge +NOTICE: graph "issue_2289" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_2263', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_2263._ag_label_vertex diff --git a/regress/expected/index.out b/regress/expected/index.out index 3ed7b1c33..ec62bf57d 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; SET enable_mergejoin = ON; @@ -265,19 +264,19 @@ $$) as (n agtype); (0 rows) -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; indexname | indexdef -----------------------------+------------------------------------------------------------------------------------------------ + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) - _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) - idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) - Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) - has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) - City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) (10 rows) SET enable_mergejoin = ON; @@ -385,6 +384,19 @@ CREATE INDEX load_city_gin_idx ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on "City" c + Recheck Cond: (properties @> '{"city_id": 1}'::agtype) + -> Bitmap Index Scan on load_city_gin_idx + Index Cond: (properties @> '{"city_id": 1}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) RETURN c @@ -418,6 +430,19 @@ $$) as (n agtype); {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex (4 rows) +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------------------- + Bitmap Heap Scan on "Country" c + Recheck Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) + -> Bitmap Index Scan on load_country_gin_idx + Index Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -441,26 +466,293 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Scan using city_country_code_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(2 rows) + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" + "Los Angeles" + "Seattle" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + name +------ +(0 rows) + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); - count -------- - 0 +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Index Scan using city_id_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"city_id"'::agtype]) = '1'::agtype) +(2 rows) + +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" (1 row) -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + name +------------- + "Vancouver" +(1 row) + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" +(2 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Monterrey" + "Tijuana" +(2 rows) + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); -ERROR: graph "agload_test_graph" does not exist -LINE 1: SELECT COUNT(*) FROM cypher('agload_test_graph', $$ - ^ +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) +(2 rows) + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" + "Vancouver" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +--------------- + "New York" + "Toronto" + "Montreal" + "Mexico City" + "Monterrey" + "Tijuana" +(6 rows) + +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(3 rows) + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" +(3 rows) + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "New York" + "Vancouver" +(2 rows) + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" +(1 row) + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Index Scan using country_life_exp_idx on "Country" c + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"life_expectancy"'::agtype]) > '80.0'::agtype) +(2 rows) + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Canada" +(1 row) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Mexico" +(1 row) + +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Index Scan using city_west_coast_idx on "City" city + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + -> Bitmap Heap Scan on has_city _age_default_alias_0 + Recheck Cond: (start_id = city.id) + -> Bitmap Index Scan on has_city_start_id_idx + Index Cond: (start_id = city.id) + -> Index Scan using "Country_pkey" on "Country" country + Index Cond: (id = _age_default_alias_0.end_id) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"CA"'::agtype) +(11 rows) + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- @@ -478,5 +770,3 @@ NOTICE: graph "cypher_index" has been dropped (1 row) -SELECT drop_graph('agload_test_graph', true); -ERROR: graph "agload_test_graph" does not exist diff --git a/regress/expected/security.out b/regress/expected/security.out new file mode 100644 index 000000000..59e58cb05 --- /dev/null +++ b/regress/expected/security.out @@ -0,0 +1,1657 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- Test Privileges +-- +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); +NOTICE: graph "security_test" has been created + create_graph +-------------- + +(1 row) + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- +-- Create test roles with different permission levels +-- +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ +SET ROLE security_test_noread; +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); +ERROR: permission denied for table Person +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); +ERROR: permission denied for table _ag_label_vertex +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); +ERROR: permission denied for table _ag_label_vertex +RESET ROLE; +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; +SET ROLE security_test_base_only; +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex + {"id": 844424930131970, "label": "Person", "properties": {"age": 25, "name": "Bob"}}::vertex + {"id": 1125899906842625, "label": "Document", "properties": {"title": "Secret", "content": "classified"}}::vertex +(3 rows) + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + e +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge + {"id": 1688849860263937, "label": "OWNS", "end_id": 1125899906842625, "start_id": 844424930131969, "properties": {}}::edge +(2 rows) + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+---------- + "Alice" | "Secret" +(1 row) + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); +ERROR: permission denied for table Person +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); +ERROR: permission denied for schema security_test +LINE 1: SELECT * FROM cypher('security_test', $$ + ^ +RESET ROLE; +SET ROLE security_test_insert; +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-----------+----- + "Charlie" | 35 +(1 row) + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +SET ROLE security_test_update; +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Alice" | 31 +(1 row) + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+----------- + "Bob" | "reading" +(1 row) + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+------- + "Bob" | +(1 row) + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_delete; +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ +RESET ROLE; +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_insert; +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-------+----- + "Eve" | 28 +(1 row) + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + name +------- + "Eve" +(1 row) + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_full; +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Frank" | 51 +(1 row) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ +RESET ROLE; +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; +SET ROLE security_test_person_only; +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + name +--------- + "Alice" +(1 row) + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); +ERROR: permission denied for table Document +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); +ERROR: permission denied for table Document +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ +RESET ROLE; +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; +SET ROLE security_test_noexec; +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); +ERROR: permission denied for function drop_graph +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); +ERROR: permission denied for function create_vlabel +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); +ERROR: permission denied for function create_elabel +RESET ROLE; +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; +SET ROLE security_test_noexec; +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); +ERROR: permission denied for schema security_test +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +RESET ROLE; +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" +SET ROLE security_test_edge_only; +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: permission denied for table Person +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: permission denied for table Person +RESET ROLE; +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; +SET ROLE security_test_edge_only; +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+---------- + "Alice" | "Bob" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- Cleanup +-- ============================================================================ +RESET ROLE; +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; +-- Drop test graph +SELECT drop_graph('security_test', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table security_test._ag_label_vertex +drop cascades to table security_test._ag_label_edge +drop cascades to table security_test."Person" +drop cascades to table security_test."Document" +drop cascades to table security_test."KNOWS" +drop cascades to table security_test."OWNS" +NOTICE: graph "security_test" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Row-Level Security (RLS) Tests +-- +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); +NOTICE: graph "rls_graph" has been created + create_graph +-------------- + +(1 row) + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Bob" + "Diana" +(2 rows) + +RESET ROLE; +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +---------------- + "User1Created" +(1 row) + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + name +------ +(0 rows) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true +(1 row) + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +------+--------- +(0 rows) + +RESET ROLE; +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true + "Bob" | +(2 rows) + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner +SET ROLE rls_user1; +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + name | verified +---------+---------- + "Alice" | true +(1 row) + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + owner +------------- + "rls_user1" +(1 row) + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+---------- + "Alice" | "active" +(1 row) + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +------+-------- +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ +-- Keep existing update policy, test REMOVE operation +SET ROLE rls_user1; +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+-------- + "Alice" | +(1 row) + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + name | dept +------+------ +(0 rows) + +RESET ROLE; +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + dept +--------------- + "Engineering" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY person_insert_all ON rls_graph."Person"; +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest2" + "DeleteTest3" +(2 rows) + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; +SET ROLE rls_user1; +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest3" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +---------+------------- + "Alice" | "rls_user1" +(1 row) + +RESET ROLE; +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); +SET ROLE rls_user1; +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2021 + 2022 +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "KNOWS" +RESET ROLE; +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + strength +---------- + "strong" +(1 row) + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2021 | "updated" +(1 row) + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+------- +(0 rows) + +RESET ROLE; +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2020 | + 2021 | "updated" +(2 rows) + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); +SET ROLE rls_user1; +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); +ERROR: cannot delete edge due to row-level security policy on "KNOWS" +HINT: DETACH DELETE requires permission to delete all connected edges. +RESET ROLE; +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DetachTest1" +(1 row) + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); +SET ROLE rls_user1; +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + title +-------------- + "Public Doc" +(1 row) + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+-------------- + "Alice" | "Public Doc" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); +SET ROLE rls_user1; +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" +(3 rows) + +RESET ROLE; +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + name | level +-----------+------- + "Alice" | 1 + "Bob" | 2 + "Charlie" | 1 +(3 rows) + +RESET ROLE; +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +RESET ROLE; +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ +DROP POLICY person_very_restrictive ON rls_graph."Person"; +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +----------------+------------- + "NewFromMatch" | "rls_user1" +(1 row) + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true +(1 row) + +RESET ROLE; +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true + "Bob" | +(2 rows) + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: access to vertex 844424930131970 denied by row-level security policy on "Person" +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: access to vertex 844424930131969 denied by row-level security policy on "Person" +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+----------- + "Alice" | "Charlie" +(1 row) + +RESET ROLE; +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ +RESET ROLE; +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; +-- Drop test graph +SELECT drop_graph('rls_graph', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table rls_graph._ag_label_vertex +drop cascades to table rls_graph._ag_label_edge +drop cascades to table rls_graph."Person" +drop cascades to table rls_graph."Document" +drop cascades to table rls_graph."KNOWS" +drop cascades to table rls_graph."AUTHORED" +NOTICE: graph "rls_graph" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index cefcfb4ca..976f050af 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -194,6 +194,131 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t -- SELECT drop_graph('agload_conversion', true); +-- +-- Test security and permissions +-- + +SELECT create_graph('agload_security'); +SELECT create_vlabel('agload_security', 'Person1'); +SELECT create_vlabel('agload_security', 'Person2'); +SELECT create_elabel('agload_security', 'SecEdge'); + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; + +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; + +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; + +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 3: Row-Level Security (RLS) +-- + +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; + +-- Switch to load_test_user +SET ROLE load_test_user; + +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +RESET ROLE; + +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; + +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 4: Constraint checking (CHECK constraint) +-- + +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; + +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); + -- -- End -- diff --git a/regress/sql/cypher_set.sql b/regress/sql/cypher_set.sql index a2667153d..e745d5d6e 100644 --- a/regress/sql/cypher_set.sql +++ b/regress/sql/cypher_set.sql @@ -379,6 +379,169 @@ SELECT * FROM cypher('issue_1634', $$ MERGE (v:PERSION {id: '1'}) SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ + +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ + +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ + +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ + +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ + +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + -- -- Clean up -- @@ -387,6 +550,7 @@ DROP FUNCTION set_test; SELECT drop_graph('cypher_set', true); SELECT drop_graph('cypher_set_1', true); SELECT drop_graph('issue_1634', true); +SELECT drop_graph('issue_1884', true); -- -- End diff --git a/regress/sql/direct_field_access.sql b/regress/sql/direct_field_access.sql new file mode 100644 index 000000000..c8060be4a --- /dev/null +++ b/regress/sql/direct_field_access.sql @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('direct_access'); + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- + +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- + +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- + +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- + +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- + +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- + +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 7bf1f26b2..445e2d237 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -157,6 +157,20 @@ SELECT * FROM cypher('expr', $$RETURN 1 in [[1]]$$) AS r(c boolean); SELECT * FROM cypher('expr', $$RETURN 1 IN [[null]]$$) AS r(c boolean); +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -3690,9 +3704,18 @@ SELECT * FROM cypher('issue_2263', $$ CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) $$) AS (out agtype); +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); diff --git a/regress/sql/index.sql b/regress/sql/index.sql index d9a4331a4..d4a4b24a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -17,8 +17,6 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load - LOAD 'age'; SET search_path TO ag_catalog; @@ -167,7 +165,7 @@ SELECT * FROM cypher('cypher_index', $$ $$) as (n agtype); -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; @@ -219,6 +217,11 @@ ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) @@ -235,6 +238,12 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c $$) as (n agtype); +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -250,23 +259,180 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); + +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); + +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); +$$) as (plan agtype); -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); + +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); +$$) as (plan agtype); + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); + +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- SELECT drop_graph('cypher_index', true); -SELECT drop_graph('agload_test_graph', true); diff --git a/regress/sql/security.sql b/regress/sql/security.sql new file mode 100644 index 000000000..344dd23d4 --- /dev/null +++ b/regress/sql/security.sql @@ -0,0 +1,1451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- Test Privileges +-- + +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + +-- +-- Create test roles with different permission levels +-- + +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; + +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; + +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; + +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; + +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; + +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; + +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables + +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ + +SET ROLE security_test_noread; + +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); + +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); + +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +RESET ROLE; + +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; + +SET ROLE security_test_base_only; + +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ + +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); + +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); + +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_delete; + +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ + +RESET ROLE; + +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_full; + +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ + +RESET ROLE; + +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; + +SET ROLE security_test_person_only; + +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); + +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); + +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ + +RESET ROLE; + +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; + +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; + +SET ROLE security_test_noexec; + +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); + +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); + +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); + +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); + +RESET ROLE; + +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; + +SET ROLE security_test_noexec; + +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); + +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); + +RESET ROLE; + +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; + +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ + +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" + +SET ROLE security_test_edge_only; + +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); + +RESET ROLE; + +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; + +SET ROLE security_test_edge_only; + +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +RESET ROLE; + +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; + +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; + +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; + +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; + +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; + +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; + +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; + +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; + +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; + +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; + +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; + +-- Drop test graph +SELECT drop_graph('security_test', true); + +-- +-- Row-Level Security (RLS) Tests +-- + +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS + +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; + +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ + +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; + +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ + +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; + +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +RESET ROLE; + +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +RESET ROLE; + +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; + +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner + +SET ROLE rls_user1; + +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); + +RESET ROLE; + +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; + +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ + +-- Keep existing update policy, test REMOVE operation + +SET ROLE rls_user1; + +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + +RESET ROLE; + +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; + +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +DROP POLICY person_insert_all ON rls_graph."Person"; + +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; + +SET ROLE rls_user1; + +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; + +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +RESET ROLE; + +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ + +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; + +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); + +SET ROLE rls_user1; + +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ + +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ + +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +RESET ROLE; + +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + +RESET ROLE; + +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ + +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); + +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); + +SET ROLE rls_user1; + +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ + +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; + +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); + +SET ROLE rls_user1; + +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ + +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; + +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); + +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); + +SET ROLE rls_user1; + +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + +RESET ROLE; + +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ + +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; + +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); + +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ + +DROP POLICY person_very_restrictive ON rls_graph."Person"; + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; + +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + +RESET ROLE; + +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; + +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ + +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; + +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; + +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; + +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); + +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; + +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ + +RESET ROLE; + +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; + +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; + +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; + +-- Drop test graph +SELECT drop_graph('rls_graph', true); diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 2031fe8d8..495eb3a08 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -122,6 +123,12 @@ static void begin_cypher_create(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } } diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index f86c6126b..0b486ad5e 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -22,6 +22,9 @@ #include "executor/executor.h" #include "storage/bufmgr.h" #include "common/hashfn.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -371,6 +374,16 @@ static void process_delete_list(CustomScanState *node) ExprContext *econtext = css->css.ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; EState *estate = node->ss.ps.state; + HTAB *qual_cache = NULL; + HASHCTL hashctl; + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("delete_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); foreach(lc, css->delete_data->delete_items) { @@ -383,6 +396,7 @@ static void process_delete_list(CustomScanState *node) char *label_name; Integer *pos; int entity_position; + Oid relid; item = lfirst(lc); @@ -401,6 +415,7 @@ static void process_delete_list(CustomScanState *node) label_name = pnstrdup(label->val.string.val, label->val.string.len); resultRelInfo = create_entity_result_rel_info(estate, css->delete_data->graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); /* * Setup the scan key to require the id field on-disc to match the @@ -448,6 +463,36 @@ static void process_delete_list(CustomScanState *node) continue; } + /* Check RLS security quals (USING policy) before delete */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + bool found; + + /* Get cached security quals and slot for this label */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_DELETE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + entry->withCheckOptions = NIL; + entry->withCheckOptionExprs = NIL; + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + + /* Silently skip if USING policy filters out this row */ + if (!check_security_quals(entry->qualExprs, entry->slot, econtext)) + { + table_endscan(scan_desc); + destroy_entity_result_rel_info(resultRelInfo); + continue; + } + } + /* * For vertices, we insert the vertex ID in the hashtable * vertex_id_htab. This hashtable is used later to process @@ -467,6 +512,9 @@ static void process_delete_list(CustomScanState *node) table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } + + /* Clean up the cache */ + hash_destroy(qual_cache); } /* @@ -490,9 +538,14 @@ static void check_for_connected_edges(CustomScanState *node) TableScanDesc scan_desc; HeapTuple tuple; TupleTableSlot *slot; + Oid relid; + bool rls_enabled = false; + List *qualExprs = NIL; + ExprContext *econtext = NULL; resultRelInfo = create_entity_result_rel_info(estate, graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); estate->es_snapshot->curcid = GetCurrentCommandId(false); estate->es_output_cid = GetCurrentCommandId(false); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, @@ -501,6 +554,22 @@ static void check_for_connected_edges(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* + * For DETACH DELETE with RLS enabled, compile the security qual + * expressions once per label for efficient evaluation. + */ + if (css->delete_data->detach) + { + /* Setup RLS security quals for this label */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + rls_enabled = true; + econtext = css->css.ss.ps.ps_ExprContext; + qualExprs = setup_security_quals(resultRelInfo, estate, node, + CMD_DELETE); + } + } + /* for each row */ while (true) { @@ -538,6 +607,34 @@ static void check_for_connected_edges(CustomScanState *node) { if (css->delete_data->detach) { + AclResult aclresult; + + /* Check that the user has DELETE permission on the edge table */ + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_DELETE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, label_name); + } + + /* Check RLS security quals (USING policy) before delete */ + if (rls_enabled) + { + /* + * For DETACH DELETE, error out if edge RLS check fails. + * Unlike normal DELETE which silently skips, we cannot + * silently skip edges here as it would leave dangling + * edges pointing to deleted vertices. + */ + if (!check_security_quals(qualExprs, slot, econtext)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot delete edge due to row-level security policy on \"%s\"", + label_name), + errhint("DETACH DELETE requires permission to delete all connected edges."))); + } + } + delete_entity(estate, resultRelInfo, tuple); } else diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 6cfa70d48..a1bb4686c 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -20,11 +20,12 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/datum.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" -#include "utils/datum.h" /* * The following structure is used to hold a single vertex or edge component @@ -182,6 +183,12 @@ static void begin_cypher_merge(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } /* diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index 9fd599eed..a1063af32 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -19,8 +19,10 @@ #include "postgres.h" +#include "common/hashfn.h" #include "executor/executor.h" #include "storage/bufmgr.h" +#include "utils/rls.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" @@ -136,6 +138,13 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + result = table_tuple_update(resultRelInfo->ri_RelationDesc, &tuple->t_self, elemTupleSlot, cid, estate->es_snapshot, @@ -372,9 +381,20 @@ static void process_update_list(CustomScanState *node) EState *estate = css->css.ss.ps.state; int *luindex = NULL; int lidx = 0; + HTAB *qual_cache = NULL; + HASHCTL hashctl; /* allocate an array to hold the last update index of each 'entity' */ luindex = palloc0(sizeof(int) * scanTupleSlot->tts_nvalid); + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("update_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + /* * Iterate through the SET items list and store the loop index of each * 'entity' update. As there is only one entry for each entity, this will @@ -522,6 +542,38 @@ static void process_update_list(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* Setup RLS policies if RLS is enabled */ + if (check_enable_rls(resultRelInfo->ri_RelationDesc->rd_id, + InvalidOid, true) == RLS_ENABLED) + { + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + RLSCacheEntry *entry; + bool found; + + /* Get cached RLS state for this label, or set it up */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + /* Setup WITH CHECK policies */ + setup_wcos(resultRelInfo, estate, node, CMD_UPDATE); + entry->withCheckOptions = resultRelInfo->ri_WithCheckOptions; + entry->withCheckOptionExprs = resultRelInfo->ri_WithCheckOptionExprs; + + /* Setup security quals */ + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_UPDATE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + } + else + { + /* Use cached WCOs */ + resultRelInfo->ri_WithCheckOptions = entry->withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = entry->withCheckOptionExprs; + } + } + /* * Now that we have the updated properties, create a either a vertex or * edge Datum for the in-memory update, and setup the tupleTableSlot @@ -597,8 +649,36 @@ static void process_update_list(CustomScanState *node) */ if (HeapTupleIsValid(heap_tuple)) { - heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, - heap_tuple); + bool should_update = true; + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + + /* Check RLS security quals (USING policy) before update */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + + /* Entry was already created earlier when setting up WCOs */ + entry = hash_search(qual_cache, &relid, HASH_FIND, NULL); + if (!entry) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("missing RLS cache entry for relation %u", + relid))); + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + should_update = check_security_quals(entry->qualExprs, + entry->slot, + econtext); + } + + /* Silently skip if USING policy filters out this row */ + if (should_update) + { + heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, + heap_tuple); + } } /* close the ScanDescription */ table_endscan(scan_desc); @@ -612,6 +692,10 @@ static void process_update_list(CustomScanState *node) /* increment loop index */ lidx++; } + + /* Clean up the cache */ + hash_destroy(qual_cache); + /* free our lookup array */ pfree_if_not_null(luindex); } diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index d7a55f709..eff829925 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -25,14 +25,35 @@ #include "postgres.h" #include "executor/executor.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rowsecurity.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" #include "executor/cypher_utils.h" #include "utils/ag_cache.h" +/* RLS helper function declarations */ +static void get_policies_for_relation(Relation relation, CmdType cmd, + Oid user_id, List **permissive_policies, + List **restrictive_policies); +static void add_with_check_options(Relation rel, int rt_index, WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, bool *hasSubLinks, + bool force_using); +static void add_security_quals(int rt_index, List *permissive_policies, + List *restrictive_policies, + List **securityQuals, bool *hasSubLinks); +static void sort_policies_by_name(List *policies); +static int row_security_policy_cmp(const ListCell *a, const ListCell *b); +static bool check_role_for_policy(ArrayType *policy_roles, Oid user_id); + /* * Given the graph name and the label name, create a ResultRelInfo for the table * those two variables represent. Open the Indices too. @@ -256,6 +277,13 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_INSERT_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + /* Insert the tuple normally */ table_tuple_insert(resultRelInfo->ri_RelationDesc, elemTupleSlot, cid, 0, NULL); @@ -269,3 +297,754 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, return tuple; } + +/* + * setup_wcos + * + * WithCheckOptions are added during the rewrite phase, but since AGE uses + * CMD_SELECT for all queries, WCOs don't get added for CREATE/SET/MERGE + * operations. This function compensates by adding WCOs at execution time. + * + * Based on PostgreSQL's row security implementation in rowsecurity.c + */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *withCheckOptions = NIL; + List *wcoExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + WCOKind wco_kind; + bool hasSubLinks = false; + + /* Determine the WCO kind based on command type */ + if (cmd == CMD_INSERT) + { + wco_kind = WCO_RLS_INSERT_CHECK; + } + else if (cmd == CMD_UPDATE) + { + wco_kind = WCO_RLS_UPDATE_CHECK; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_wcos"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build WithCheckOptions from the policies */ + add_with_check_options(rel, rt_index, wco_kind, + permissive_policies, + restrictive_policies, + &withCheckOptions, + &hasSubLinks, + false); + + /* Compile the WCO expressions */ + foreach(lc, withCheckOptions) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, lc); + ExprState *wcoExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(wco->qual, List)) + { + wco->qual = (Node *) list_make1(wco->qual); + } + + wcoExpr = ExecInitQual((List *) wco->qual, (PlanState *) node); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + /* Set up the ResultRelInfo with WCOs */ + resultRelInfo->ri_WithCheckOptions = withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; +} + +/* + * get_policies_for_relation + * + * Returns lists of permissive and restrictive policies to be applied to the + * specified relation, based on the command type and role. + * + * This includes any policies added by extensions. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id, + List **permissive_policies, + List **restrictive_policies) +{ + ListCell *item; + + *permissive_policies = NIL; + *restrictive_policies = NIL; + + /* No policies if RLS descriptor is not present */ + if (relation->rd_rsdesc == NULL) + { + return; + } + + /* First find all internal policies for the relation. */ + foreach(item, relation->rd_rsdesc->policies) + { + bool cmd_matches = false; + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + /* Always add ALL policies, if they exist. */ + if (policy->polcmd == '*') + { + cmd_matches = true; + } + else + { + /* Check whether the policy applies to the specified command type */ + switch (cmd) + { + case CMD_SELECT: + if (policy->polcmd == ACL_SELECT_CHR) + { + cmd_matches = true; + } + break; + case CMD_INSERT: + if (policy->polcmd == ACL_INSERT_CHR) + { + cmd_matches = true; + } + break; + case CMD_UPDATE: + if (policy->polcmd == ACL_UPDATE_CHR) + { + cmd_matches = true; + } + break; + case CMD_DELETE: + if (policy->polcmd == ACL_DELETE_CHR) + { + cmd_matches = true; + } + break; + case CMD_MERGE: + /* + * We do not support a separate policy for MERGE command. + * Instead it derives from the policies defined for other + * commands. + */ + break; + default: + elog(ERROR, "unrecognized policy command type %d", + (int) cmd); + break; + } + } + + /* + * Add this policy to the relevant list of policies if it applies to + * the specified role. + */ + if (cmd_matches && check_role_for_policy(policy->roles, user_id)) + { + if (policy->permissive) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + else + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + /* + * We sort restrictive policies by name so that any WCOs they generate are + * checked in a well-defined order. + */ + sort_policies_by_name(*restrictive_policies); + + /* + * Then add any permissive or restrictive policies defined by extensions. + * These are simply appended to the lists of internal policies, if they + * apply to the specified role. + */ + if (row_security_policy_hook_restrictive) + { + List *hook_policies = + (*row_security_policy_hook_restrictive) (cmd, relation); + + /* + * As with built-in restrictive policies, we sort any hook-provided + * restrictive policies by name also. Note that we also intentionally + * always check all built-in restrictive policies, in name order, + * before checking restrictive policies added by hooks, in name order. + */ + sort_policies_by_name(hook_policies); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + if (row_security_policy_hook_permissive) + { + List *hook_policies = + (*row_security_policy_hook_permissive) (cmd, relation); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + } + } +} + +/* + * add_with_check_options + * + * Add WithCheckOptions of the specified kind to check that new records + * added by an INSERT or UPDATE are consistent with the specified RLS + * policies. Normally new data must satisfy the WITH CHECK clauses from the + * policies. If a policy has no explicit WITH CHECK clause, its USING clause + * is used instead. In the special case of an UPDATE arising from an + * INSERT ... ON CONFLICT DO UPDATE, existing records are first checked using + * a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING + * clauses from RLS policies. + * + * New WCOs are added to withCheckOptions, and hasSubLinks is set to true if + * any of the check clauses added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_with_check_options(Relation rel, + int rt_index, + WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, + bool *hasSubLinks, + bool force_using) +{ + ListCell *item; + List *permissive_quals = NIL; + +#define QUAL_FOR_WCO(policy) \ + ( !force_using && \ + (policy)->with_check_qual != NULL ? \ + (policy)->with_check_qual : (policy)->qual ) + + /* + * First collect up the permissive policy clauses, similar to + * add_security_quals. + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + permissive_quals = lappend(permissive_quals, copyObject(qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * There must be at least one permissive qual found or no rows are allowed + * to be added. This is the same as in add_security_quals. + * + * If there are no permissive_quals then we fall through and return a + * single 'false' WCO, preventing all new rows. + */ + if (permissive_quals != NIL) + { + /* + * Add a single WithCheckOption for all the permissive policy clauses, + * combining them together using OR. This check has no policy name, + * since if the check fails it means that no policy granted permission + * to perform the update, rather than any particular policy being + * violated. + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->cascaded = false; + + if (list_length(permissive_quals) == 1) + { + wco->qual = (Node *) linitial(permissive_quals); + } + else + { + wco->qual = (Node *) makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes(wco->qual, 1, rt_index, 0); + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + + /* + * Now add WithCheckOptions for each of the restrictive policy clauses + * (which will be combined together using AND). We use a separate + * WithCheckOption for each restrictive policy to allow the policy + * name to be included in error reports if the policy is violated. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + qual = copyObject(qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = pstrdup(policy->policy_name); + wco->qual = (Node *) qual; + wco->cascaded = false; + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + *hasSubLinks |= policy->hassublinks; + } + } + } + else + { + /* + * If there were no policy clauses to check new data, add a single + * always-false WCO (a default-deny policy). + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->qual = (Node *) makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true); + wco->cascaded = false; + + *withCheckOptions = lappend(*withCheckOptions, wco); + } +} + +/* + * sort_policies_by_name + * + * This is only used for restrictive policies, ensuring that any + * WithCheckOptions they generate are applied in a well-defined order. + * This is not necessary for permissive policies, since they are all combined + * together using OR into a single WithCheckOption check. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +sort_policies_by_name(List *policies) +{ + list_sort(policies, row_security_policy_cmp); +} + +/* + * list_sort comparator to sort RowSecurityPolicy entries by name + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static int +row_security_policy_cmp(const ListCell *a, const ListCell *b) +{ + const RowSecurityPolicy *pa = (const RowSecurityPolicy *) lfirst(a); + const RowSecurityPolicy *pb = (const RowSecurityPolicy *) lfirst(b); + + /* Guard against NULL policy names from extensions */ + if (pa->policy_name == NULL) + { + return pb->policy_name == NULL ? 0 : 1; + } + if (pb->policy_name == NULL) + { + return -1; + } + + return strcmp(pa->policy_name, pb->policy_name); +} + +/* + * check_role_for_policy - + * determines if the policy should be applied for the current role + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static bool +check_role_for_policy(ArrayType *policy_roles, Oid user_id) +{ + int i; + Oid *roles = (Oid *) ARR_DATA_PTR(policy_roles); + + /* Quick fall-thru for policies applied to all roles */ + if (roles[0] == ACL_ID_PUBLIC) + { + return true; + } + + for (i = 0; i < ARR_DIMS(policy_roles)[0]; i++) + { + if (has_privs_of_role(user_id, roles[i])) + { + return true; + } + } + + return false; +} + +/* + * add_security_quals + * + * Add security quals to enforce the specified RLS policies, restricting + * access to existing data in a table. If there are no policies controlling + * access to the table, then all access is prohibited --- i.e., an implicit + * default-deny policy is used. + * + * New security quals are added to securityQuals, and hasSubLinks is set to + * true if any of the quals added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_security_quals(int rt_index, + List *permissive_policies, + List *restrictive_policies, + List **securityQuals, + bool *hasSubLinks) +{ + ListCell *item; + List *permissive_quals = NIL; + Expr *rowsec_expr; + + /* + * First collect up the permissive quals. If we do not find any + * permissive policies then no rows are visible (this is handled below). + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (policy->qual != NULL) + { + permissive_quals = lappend(permissive_quals, + copyObject(policy->qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * We must have permissive quals, always, or no rows are visible. + * + * If we do not, then we simply return a single 'false' qual which results + * in no rows being visible. + */ + if (permissive_quals != NIL) + { + /* + * We now know that permissive policies exist, so we can now add + * security quals based on the USING clauses from the restrictive + * policies. Since these need to be combined together using AND, we + * can just add them one at a time. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual; + + if (policy->qual != NULL) + { + qual = copyObject(policy->qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + *securityQuals = list_append_unique(*securityQuals, qual); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * Then add a single security qual combining together the USING + * clauses from all the permissive policies using OR. + */ + if (list_length(permissive_quals) == 1) + { + rowsec_expr = (Expr *) linitial(permissive_quals); + } + else + { + rowsec_expr = makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes((Node *) rowsec_expr, 1, rt_index, 0); + *securityQuals = list_append_unique(*securityQuals, rowsec_expr); + } + else + { + /* + * A permissive policy must exist for rows to be visible at all. + * Therefore, if there were no permissive policies found, return a + * single always-false clause. + */ + *securityQuals = lappend(*securityQuals, + makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true)); + } +} + +/* + * setup_security_quals + * + * Security quals (USING policies) are added during the rewrite phase, but + * since AGE uses CMD_SELECT for all queries, they don't get added for + * UPDATE/DELETE operations. This function sets up security quals at + * execution time to be evaluated against each tuple before modification. + * + * Returns a list of compiled ExprState for the security quals. + */ +List * +setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + List *qualExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + bool hasSubLinks = false; + + /* Only UPDATE and DELETE have security quals */ + if (cmd != CMD_UPDATE && cmd != CMD_DELETE) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_security_quals"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* If no RLS policies exist, return empty list */ + if (rel->rd_rsdesc == NULL) + { + return NIL; + } + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies */ + add_security_quals(rt_index, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* Compile the security qual expressions */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(qual, List)) + { + qual = (Expr *) list_make1(qual); + } + + qualExpr = ExecInitQual((List *) qual, (PlanState *) node); + qualExprs = lappend(qualExprs, qualExpr); + } + + return qualExprs; +} + +/* + * check_security_quals + * + * Evaluate security quals against a tuple. Returns true if all quals pass + * (row can be modified), false if any qual fails (row should be silently + * skipped). + * + * This matches PostgreSQL's behavior where USING expressions for UPDATE/DELETE + * silently filter rows rather than raising errors. + */ +bool +check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext) +{ + ListCell *lc; + TupleTableSlot *saved_scantuple; + bool result = true; + + if (qualExprs == NIL) + { + return true; + } + + /* Save and set up the scan tuple for expression evaluation */ + saved_scantuple = econtext->ecxt_scantuple; + econtext->ecxt_scantuple = slot; + + foreach(lc, qualExprs) + { + ExprState *qualExpr = (ExprState *) lfirst(lc); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + econtext->ecxt_scantuple = saved_scantuple; + return result; +} + +/* + * check_rls_for_tuple + * + * Check RLS policies for a tuple without needing full executor context. + * Used by standalone functions like startNode()/endNode() that access + * tables directly. + * + * Returns true if the tuple passes RLS checks (or if RLS is not enabled), + * false if the tuple should be filtered out. + */ +bool +check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + ListCell *lc; + Oid user_id; + bool hasSubLinks = false; + bool result = true; + EState *estate; + ExprContext *econtext; + TupleTableSlot *slot; + + /* If RLS is not enabled, tuple passes */ + if (check_enable_rls(RelationGetRelid(rel), InvalidOid, true) != RLS_ENABLED) + { + return true; + } + + /* If no RLS policies exist on the relation, tuple passes */ + if (rel->rd_rsdesc == NULL) + { + return true; + } + + /* Get the policies for the specified command type */ + user_id = GetUserId(); + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies (use rt_index=1) */ + add_security_quals(1, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* If no quals, tuple passes */ + if (securityQuals == NIL) + { + return true; + } + + /* Create minimal execution environment */ + estate = CreateExecutorState(); + econtext = CreateExprContext(estate); + + /* Create tuple slot and store the tuple */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + econtext->ecxt_scantuple = slot; + + /* Compile and evaluate each qual */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + List *qualList; + + /* ExecPrepareQual expects a List */ + if (!IsA(qual, List)) + { + qualList = list_make1(qual); + } + else + { + qualList = (List *) qual; + } + + /* Use ExecPrepareQual for standalone expression evaluation */ + qualExpr = ExecPrepareQual(qualList, estate); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + /* Clean up */ + ExecDropSingleTupleTableSlot(slot); + FreeExprContext(econtext, true); + FreeExecutorState(estate); + + return result; +} diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index acc52349d..991e3f785 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -71,6 +71,7 @@ #define AGE_VARNAME_MERGE_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"merge_clause" #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" +#define AGE_VARNAME_SET_VALUE AGE_DEFAULT_VARNAME_PREFIX"set_value" /* * In the transformation stage, we need to track @@ -345,6 +346,100 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Recursively searches through RTEs including subqueries. + */ +static bool +add_rte_permissions_recurse(List *rtable, List *rteperminfos, + Oid relid, AclMode permissions) +{ + ListCell *lc; + + /* First check the perminfos at this level */ + foreach(lc, rteperminfos) + { + RTEPermissionInfo *perminfo = lfirst(lc); + + if (perminfo->relid == relid) + { + perminfo->requiredPerms |= permissions; + return true; + } + } + + /* Then recurse into subqueries */ + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + + if (rte->rtekind == RTE_SUBQUERY && rte->subquery != NULL) + { + if (add_rte_permissions_recurse(rte->subquery->rtable, + rte->subquery->rteperminfos, + relid, permissions)) + { + return true; + } + } + } + + return false; +} + +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Searches through p_rteperminfos and subqueries for a matching relOid + * and adds the specified permissions to requiredPerms. + */ +static void +add_rte_permissions(ParseState *pstate, Oid relid, AclMode permissions) +{ + add_rte_permissions_recurse(pstate->p_rtable, pstate->p_rteperminfos, + relid, permissions); +} + +/* + * Add required permissions to the label table for a given entity variable. + * Looks up the entity by variable name, extracts its label, and adds + * the specified permissions to the corresponding RTEPermissionInfo. + */ +static void +add_entity_permissions(cypher_parsestate *cpstate, char *var_name, + AclMode permissions) +{ + ParseState *pstate = (ParseState *)cpstate; + transform_entity *entity; + char *label = NULL; + Oid relid; + + entity = find_variable(cpstate, var_name); + if (entity == NULL) + { + return; + } + + if (entity->type == ENT_VERTEX) + { + label = entity->entity.node->label; + } + else if (entity->type == ENT_EDGE) + { + label = entity->entity.rel->label; + } + + if (label == NULL) + { + return; + } + + relid = get_label_relation(label, cpstate->graph_oid); + if (OidIsValid(relid)) + { + add_rte_permissions(pstate, relid, permissions); + } +} + /* * transform a cypher_clause */ @@ -1560,6 +1655,9 @@ static List *transform_cypher_delete_item_list(cypher_parsestate *cpstate, parser_errposition(pstate, col->location))); } + /* Add ACL_DELETE permission to the entity's label table */ + add_entity_permissions(cpstate, val->sval, ACL_DELETE); + add_volatile_wrapper_to_target_entry(query->targetList, resno); pos = makeInteger(resno); @@ -1725,6 +1823,9 @@ cypher_update_information *transform_cypher_remove_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); @@ -1902,6 +2003,9 @@ cypher_update_information *transform_cypher_set_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); @@ -1911,10 +2015,24 @@ cypher_update_information *transform_cypher_set_item_list( ((cypher_map*)set_item->expr)->keep_null = set_item->is_add; } - /* create target entry for the new property value */ + /* + * Create target entry for the new property value. + * + * We use a hidden variable name (AGE_VARNAME_SET_VALUE) for the + * SET expression value to prevent column name conflicts. This is + * necessary when the same variable is used on both the LHS and RHS + * of a SET clause (e.g., SET n.prop = n). Without this, the column + * name derived from the expression (e.g., "n") would duplicate the + * existing column name from the MATCH clause, causing a "column + * reference is ambiguous" error in subsequent clauses like RETURN. + * + * The hidden variable name will be filtered out by expand_pnsi_attrs + * when the targetlist is expanded for subsequent clauses. + */ item->prop_position = (AttrNumber)pstate->p_next_resno; target_item = transform_cypher_item(cpstate, set_item->expr, NULL, - EXPR_KIND_SELECT_TARGET, NULL, + EXPR_KIND_SELECT_TARGET, + AGE_VARNAME_SET_VALUE, false); if (nodeTag(target_item->expr) == T_Aggref) diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 5f4de86b9..fc0335def 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -600,6 +600,34 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) Assert(is_ag_node(a->rexpr, cypher_list)); + rexpr = (cypher_list *)a->rexpr; + + /* + * Handle empty list case: x IN [] is always false, x NOT IN [] is always true. + * We need to check this before processing to avoid returning NULL result + * which causes "cache lookup failed for type 0" error. + */ + if (rexpr->elems == NIL || list_length((List *)rexpr->elems) == 0) + { + Datum bool_value; + Const *const_result; + + /* If operator is <> (NOT IN), result is true; otherwise (IN) result is false */ + if (strcmp(strVal(linitial(a->name)), "<>") == 0) + { + bool_value = BoolGetDatum(true); + } + else + { + bool_value = BoolGetDatum(false); + } + + const_result = makeConst(BOOLOID, -1, InvalidOid, sizeof(bool), + bool_value, false, true); + + return (Node *)const_result; + } + /* If the operator is <>, combine with AND not OR. */ if (strcmp(strVal(linitial(a->name)), "<>") == 0) { @@ -614,8 +642,6 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) rexprs = rvars = rnonvars = NIL; - rexpr = (cypher_list *)a->rexpr; - foreach(l, (List *) rexpr->elems) { Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l)); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 02fc3221c..c552727d8 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -44,7 +44,10 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/parse_coerce.h" +#include "nodes/nodes.h" +#include "utils/acl.h" #include "utils/builtins.h" +#include "executor/cypher_utils.h" #include "utils/float.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" @@ -5409,10 +5412,24 @@ Datum age_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("id() argument must be a vertex, an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: id is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_ID(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_ID(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("id() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5447,10 +5464,11 @@ Datum age_start_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("start_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "start_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: start_id is at index 3 for edge + * objects due to key length sorting (id=0, label=1, end_id=2, start_id=3). + */ + agtv_result = AGTYPE_EDGE_GET_START_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5485,10 +5503,11 @@ Datum age_end_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("end_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "end_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: end_id is at index 2 for edge + * objects due to key length sorting (id=0, label=1, end_id=2). + */ + agtv_result = AGTYPE_EDGE_GET_END_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5609,15 +5628,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, HeapTuple tuple; TupleDesc tupdesc; Datum id, properties, result; + AclResult aclresult; /* get the specific graph namespace (schema) */ Oid graph_namespace_oid = get_namespace_oid(graph, false); /* get the specific vertex label table (schema.vertex_label) */ Oid vertex_label_table_oid = get_relname_relid(vertex_label, - graph_namespace_oid); + graph_namespace_oid); /* get the active snapshot */ Snapshot snapshot = GetActiveSnapshot(); + /* check for SELECT permission on the table */ + aclresult = pg_class_aclcheck(vertex_label_table_oid, GetUserId(), + ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, vertex_label); + } + /* initialize the scan key */ ScanKeyInit(&scan_keys[0], 1, BTEqualStrategyNumber, F_OIDEQ, Int64GetDatum(graphid)); @@ -5630,11 +5658,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, /* bail if the tuple isn't valid */ if (!HeapTupleIsValid(tuple)) { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("graphid %lu does not exist", graphid))); } + /* Check RLS policies - error if filtered out */ + if (!check_rls_for_tuple(graph_vertex_label, tuple, CMD_SELECT)) + { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("access to vertex %lu denied by row-level security policy on \"%s\"", + graphid, vertex_label))); + } + /* get the tupdesc - we don't need to release this one */ tupdesc = RelationGetDescr(graph_vertex_label); /* bail if the number of columns differs */ @@ -6038,10 +6079,25 @@ Datum age_properties(PG_FUNCTION_ARGS) errmsg("properties() argument must be a vertex, an edge or null"))); } - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "properties"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_OBJECT); + /* + * Direct field access optimization: properties is at index 2 for vertex + * (id=0, label=1, properties=2) and index 4 for edge (id=0, label=1, + * end_id=2, start_id=3, properties=4) due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_PROPERTIES(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_PROPERTIES(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("properties() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -7170,8 +7226,24 @@ Datum age_label(PG_FUNCTION_ARGS) } - /* extract the label agtype value from the vertex or edge */ - label = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_value, "label"); + /* + * Direct field access optimization: label is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_value->type == AGTV_VERTEX) + { + label = AGTYPE_VERTEX_GET_LABEL(agtv_value); + } + else if (agtv_value->type == AGTV_EDGE) + { + label = AGTYPE_EDGE_GET_LABEL(agtv_value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(label)); } @@ -10507,6 +10579,59 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, Oid *types = NULL; agtype *agtype_result = NULL; + /* + * Fast path optimization: For non-variadic calls where the argument + * is already an agtype, we can avoid the overhead of extract_variadic_args + * which allocates three arrays. This is the common case for most agtype + * comparison and arithmetic operators. + */ + if (!get_fn_expr_variadic(fcinfo->flinfo)) + { + int total_args = PG_NARGS(); + int actual_nargs = total_args - variadic_offset; + + /* Verify expected number of arguments */ + if (actual_nargs != expected_nargs) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of args %d does not match expected %d", + actual_nargs, expected_nargs))); + } + + /* Check for SQL NULL */ + if (PG_ARGISNULL(variadic_offset)) + { + return NULL; + } + + /* Check if the argument is already an agtype */ + if (get_fn_expr_argtype(fcinfo->flinfo, variadic_offset) == AGTYPEOID) + { + agtype_container *agtc; + + agtype_result = DATUM_GET_AGTYPE_P(PG_GETARG_DATUM(variadic_offset)); + agtc = &agtype_result->root; + + /* + * Is this a scalar (scalars are stored as one element arrays)? + * If so, test for agtype NULL. + */ + if (AGTYPE_CONTAINER_IS_SCALAR(agtc) && + AGTE_IS_NULL(agtc->children[0])) + { + return NULL; + } + + return agtype_result; + } + + /* + * Not an agtype, need to convert. Fall through to use + * extract_variadic_args for type conversion handling. + */ + } + + /* Standard path using extract_variadic_args */ nargs = extract_variadic_args(fcinfo, variadic_offset, false, &args, &types, &nulls); /* throw an error if the number of args is not the expected number */ diff --git a/src/backend/utils/adt/agtype_ext.c b/src/backend/utils/adt/agtype_ext.c index 8fc6600d1..7a0ea991d 100644 --- a/src/backend/utils/adt/agtype_ext.c +++ b/src/backend/utils/adt/agtype_ext.c @@ -89,7 +89,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -109,7 +109,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -129,7 +129,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -175,7 +175,7 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset, break; default: - elog(ERROR, "Invalid AGT header value."); + ereport(ERROR, (errmsg("Invalid AGT header value: 0x%08x", agt_header))); } } diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c index 01a965cdd..b39723413 100644 --- a/src/backend/utils/adt/agtype_util.c +++ b/src/backend/utils/adt/agtype_util.c @@ -41,6 +41,14 @@ #include "utils/agtype_ext.h" +/* + * Extended type header macros - must match definitions in agtype_ext.c. + * These are used for deserializing extended agtype values (INTEGER, FLOAT, + * VERTEX, EDGE, PATH) from their binary representation. + */ +#define AGT_HEADER_TYPE uint32 +#define AGT_HEADER_SIZE sizeof(AGT_HEADER_TYPE) + /* * Maximum number of elements in an array (or key/value pairs in an object). * This is limited by two things: the size of the agtentry array must fit @@ -56,6 +64,11 @@ static void fill_agtype_value(agtype_container *container, int index, char *base_addr, uint32 offset, agtype_value *result); +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result); +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b); static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b); static agtype *convert_to_agtype(agtype_value *val); static void convert_agtype_value(StringInfo buffer, agtentry *header, @@ -264,6 +277,24 @@ int compare_agtype_containers_orderability(agtype_container *a, agtype_iterator *itb; int res = 0; + /* + * Fast path optimization for scalar values. + * + * The most common case in ORDER BY and comparison operations is comparing + * scalar values (integers, strings, floats, etc.). For these cases, we can + * avoid the overhead of the full iterator machinery by directly extracting + * and comparing the scalar values. + * + * This provides significant performance improvement because: + * 1. We avoid allocating two agtype_iterator structures + * 2. We avoid the iterator state machine overhead + * 3. We use no-copy extraction where possible + */ + if (AGTYPE_CONTAINER_IS_SCALAR(a) && AGTYPE_CONTAINER_IS_SCALAR(b)) + { + return compare_agtype_scalar_containers(a, b); + } + ita = agtype_iterator_init(a); itb = agtype_iterator_init(b); @@ -751,6 +782,173 @@ static void fill_agtype_value(agtype_container *container, int index, } } +/* + * A helper function to fill in an agtype_value WITHOUT making deep copies. + * This is used for read-only comparison operations where the agtype_value + * will not outlive the container data. The caller MUST NOT free the + * agtype_value content or use it after the container is freed. + * + * This function provides significant performance improvements for comparison + * operations by avoiding palloc/memcpy for strings and numerics. + * + * Note: For AGTV_STRING, val.string.val points directly into container data. + * Note: For AGTV_NUMERIC, val.numeric points directly into container data. + * Note: Extended types (VERTEX, EDGE, PATH) still require deserialization, + * so they use the standard fill_agtype_value path. + */ +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result) +{ + agtentry entry = container->children[index]; + + if (AGTE_IS_NULL(entry)) + { + result->type = AGTV_NULL; + } + else if (AGTE_IS_STRING(entry)) + { + result->type = AGTV_STRING; + /* Point directly into the container data - no copy */ + result->val.string.val = base_addr + offset; + result->val.string.len = get_agtype_length(container, index); + } + else if (AGTE_IS_NUMERIC(entry)) + { + result->type = AGTV_NUMERIC; + /* Point directly into the container data - no copy */ + result->val.numeric = (Numeric)(base_addr + INTALIGN(offset)); + } + else if (AGTE_IS_AGTYPE(entry)) + { + /* + * For extended types (INTEGER, FLOAT, VERTEX, EDGE, PATH), we need + * to deserialize. INTEGER and FLOAT don't allocate, but composite + * types (VERTEX, EDGE, PATH) do. For simple scalar comparisons, + * we handle INTEGER and FLOAT directly here. + */ + char *base = base_addr + INTALIGN(offset); + AGT_HEADER_TYPE agt_header = *((AGT_HEADER_TYPE *)base); + + switch (agt_header) + { + case AGT_HEADER_INTEGER: + result->type = AGTV_INTEGER; + result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE)); + break; + + case AGT_HEADER_FLOAT: + result->type = AGTV_FLOAT; + result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE)); + break; + + default: + /* + * For VERTEX, EDGE, PATH - use standard deserialization. + * These are composite types that require full parsing. + */ + ag_deserialize_extended_type(base_addr, offset, result); + break; + } + } + else if (AGTE_IS_BOOL_TRUE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = true; + } + else if (AGTE_IS_BOOL_FALSE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = false; + } + else + { + Assert(AGTE_IS_CONTAINER(entry)); + result->type = AGTV_BINARY; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = + (agtype_container *)(base_addr + INTALIGN(offset)); + result->val.binary.len = get_agtype_length(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Fast path comparison for scalar agtype containers. + * + * This function compares two scalar containers directly without the overhead + * of the full iterator machinery. It extracts the scalar values using no-copy + * fill and compares them directly. + * + * Returns: negative if a < b, 0 if a == b, positive if a > b + */ +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b) +{ + agtype_value va; + agtype_value vb; + char *base_addr_a; + char *base_addr_b; + int result; + bool need_free_a = false; + bool need_free_b = false; + + Assert(AGTYPE_CONTAINER_IS_SCALAR(a)); + Assert(AGTYPE_CONTAINER_IS_SCALAR(b)); + + /* Scalars are stored as single-element arrays */ + base_addr_a = (char *)&a->children[1]; + base_addr_b = (char *)&b->children[1]; + + /* Use no-copy fill to avoid allocations for simple types */ + fill_agtype_value_no_copy(a, 0, base_addr_a, 0, &va); + fill_agtype_value_no_copy(b, 0, base_addr_b, 0, &vb); + + /* + * Check if we need to free the values after comparison. + * Only VERTEX, EDGE, and PATH types allocate memory in no-copy mode. + */ + if (va.type == AGTV_VERTEX || va.type == AGTV_EDGE || va.type == AGTV_PATH) + { + need_free_a = true; + } + if (vb.type == AGTV_VERTEX || vb.type == AGTV_EDGE || vb.type == AGTV_PATH) + { + need_free_b = true; + } + + /* + * Compare the scalar values. If types match or are numeric compatible, + * use scalar comparison. Otherwise, use type-based ordering. + */ + if ((va.type == vb.type) || + ((va.type == AGTV_INTEGER || va.type == AGTV_FLOAT || + va.type == AGTV_NUMERIC) && + (vb.type == AGTV_INTEGER || vb.type == AGTV_FLOAT || + vb.type == AGTV_NUMERIC))) + { + result = compare_agtype_scalar_values(&va, &vb); + } + else + { + /* Type-defined order */ + result = (get_type_sort_priority(va.type) < + get_type_sort_priority(vb.type)) ? -1 : 1; + } + + /* Free any allocated memory from composite types */ + if (need_free_a) + { + pfree_agtype_value_content(&va); + } + if (need_free_b) + { + pfree_agtype_value_content(&vb); + } + + return result; +} + /* * Push agtype_value into agtype_parse_state. * @@ -1597,7 +1795,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_VERTEX: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_VERTEX_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1606,7 +1805,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_EDGE: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_EDGE_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1704,8 +1904,8 @@ static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b) case AGTV_VERTEX: { graphid a_graphid, b_graphid; - a_graphid = a->val.object.pairs[0].value.val.int_value; - b_graphid = b->val.object.pairs[0].value.val.int_value; + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; return a_graphid == b_graphid; } @@ -1790,16 +1990,33 @@ int compare_agtype_scalar_values(agtype_value *a, agtype_value *b) return compare_two_floats_orderability(a->val.float_value, b->val.float_value); case AGTV_VERTEX: - case AGTV_EDGE: { - agtype_value *a_id, *b_id; graphid a_graphid, b_graphid; - a_id = GET_AGTYPE_VALUE_OBJECT_VALUE(a, "id"); - b_id = GET_AGTYPE_VALUE_OBJECT_VALUE(b, "id"); + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; + + if (a_graphid == b_graphid) + { + return 0; + } + else if (a_graphid > b_graphid) + { + return 1; + } + else + { + return -1; + } + } + case AGTV_EDGE: + { + graphid a_graphid, b_graphid; - a_graphid = a_id->val.int_value; - b_graphid = b_id->val.int_value; + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_EDGE_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_EDGE_GET_ID(b)->val.int_value; if (a_graphid == b_graphid) { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 931c6e0dc..c05bf3352 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -16,50 +16,30 @@ * specific language governing permissions and limitations * under the License. */ - #include "postgres.h" -#include "utils/load/ag_load_edges.h" -#include "utils/load/csv.h" +#include "access/heapam.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" -void edge_field_cb(void *field, size_t field_len, void *data) -{ - - csv_edge_reader *cr = (csv_edge_reader*)data; - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); - cr->cur_field += 1; -} +#include "utils/load/ag_load_edges.h" -/* Parser calls this function when it detects end of a row */ -void edge_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single edge row from COPY's raw fields. + * Edge CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + */ +static void process_edge_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + Oid graph_oid, bool load_as_agtype, + batch_insert_state *batch_state) { - - csv_edge_reader *cr = (csv_edge_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - - size_t i, n_fields; int64 start_id_int; graphid start_vertex_graph_id; int start_vertex_type_id; @@ -72,104 +52,92 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) int64 entry_id; TupleTableSlot *slot; - n_fields = cr->cur_field; + char *start_vertex_type; + char *end_vertex_type; + agtype *edge_properties; - if (cr->row == 0) - { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); + /* Generate edge ID */ + entry_id = nextval_internal(label_seq_relid, true); + edge_id = make_graphid(label_id, entry_id); - for (i = 0; icur_field; i++) - { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - edge_id = make_graphid(cr->label_id, entry_id); - - start_id_int = strtol(cr->fields[0], NULL, 10); - start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); - end_id_int = strtol(cr->fields[2], NULL, 10); - end_vertex_type_id = get_label_id(cr->fields[3], cr->graph_oid); - - start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); - end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - - /* Clear the slots contents */ - ExecClearTuple(slot); - - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); - slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); - slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); - slot->tts_values[3] = AGTYPE_P_GET_DATUM( - create_agtype_from_list_i( - cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; - slot->tts_isnull[2] = false; - slot->tts_isnull[3] = false; - - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - batch_state->num_tuples++; - - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + /* Trim whitespace from vertex type names */ + start_vertex_type = trim_whitespace(fields[1]); + end_vertex_type = trim_whitespace(fields[3]); - for (i = 0; i < n_fields; ++i) - { - pfree_if_not_null(cr->fields[i]); - } + /* Parse start vertex info */ + start_id_int = strtol(fields[0], NULL, 10); + start_vertex_type_id = get_label_id(start_vertex_type, graph_oid); - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } + /* Parse end vertex info */ + end_id_int = strtol(fields[2], NULL, 10); + end_vertex_type_id = get_label_id(end_vertex_type, graph_oid); - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; -} + /* Create graphids for start and end vertices */ + start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); + end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); -static int is_space(unsigned char c) -{ - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Build the agtype properties */ + edge_properties = create_agtype_from_list_i(header, fields, + nfields, 4, load_as_agtype); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM(edge_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + + batch_state->buffered_bytes += VARSIZE(edge_properties); + batch_state->num_tuples++; + + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - return 0; + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } } -static int is_term(unsigned char c) +/* + * Create COPY options for CSV parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; + + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load edges from CSV file using pg's COPY infrastructure. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -177,79 +145,133 @@ int create_edges_from_csv_file(char *file_path, int label_id, bool load_as_agtype) { + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Edge Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); + + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); + + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_edge_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + PG_TRY(); { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) + { + if (is_first_row) + { + int i; - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } - PG_TRY(); - { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_edge_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + graph_oid, load_as_agtype, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 1e86bbda4..5b11f68b8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,155 +17,114 @@ * under the License. */ #include "postgres.h" -#include "executor/spi.h" + +#include "access/heapam.h" +#include "access/table.h" #include "catalog/namespace.h" +#include "commands/copy.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/csv.h" - -void vertex_field_cb(void *field, size_t field_len, void *data) -{ - - csv_vertex_reader *cr = (csv_vertex_reader *) data; - - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); - cr->cur_field += 1; -} -void vertex_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single vertex row from COPY's raw fields. + * Vertex CSV format: [id,] [properties...] + */ +static void process_vertex_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + bool id_field_exists, bool load_as_agtype, + int64 *curr_seq_num, + batch_insert_state *batch_state) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - size_t i, n_fields; graphid vertex_id; int64 entry_id; TupleTableSlot *slot; + agtype *vertex_properties; - n_fields = cr->cur_field; - - if (cr->row == 0) + /* Generate or use provided entry_id */ + if (id_field_exists) { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); - - for (i = 0; icur_field; i++) + entry_id = strtol(fields[0], NULL, 10); + if (entry_id > *curr_seq_num) { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); + /* This is needed to ensure the sequence is up-to-date */ + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(label_seq_relid), + Int64GetDatum(entry_id)); + *curr_seq_num = entry_id; } } else { - if (cr->id_field_exists) - { - entry_id = strtol(cr->fields[0], NULL, 10); - if (entry_id > cr->curr_seq_num) - { - DirectFunctionCall2(setval_oid, - ObjectIdGetDatum(cr->label_seq_relid), - Int64GetDatum(entry_id)); - cr->curr_seq_num = entry_id; - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - } + entry_id = nextval_internal(label_seq_relid, true); + } - vertex_id = make_graphid(cr->label_id, entry_id); + vertex_id = make_graphid(label_id, entry_id); - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; - /* Clear the slots contents */ - ExecClearTuple(slot); + /* Clear the slots contents */ + ExecClearTuple(slot); - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - slot->tts_values[1] = AGTYPE_P_GET_DATUM( - create_agtype_from_list(cr->header, cr->fields, - n_fields, entry_id, - cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; + /* Build the agtype properties */ + vertex_properties = create_agtype_from_list(header, fields, + nfields, entry_id, + load_as_agtype); - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM(vertex_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; - batch_state->num_tuples++; + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + batch_state->buffered_bytes += VARSIZE(vertex_properties); + batch_state->num_tuples++; - for (i = 0; i < n_fields; ++i) + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - pfree_if_not_null(cr->fields[i]); + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } - - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } - - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; } -static int is_space(unsigned char c) +/* + * Create COPY options for csv parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; -} -static int is_term(unsigned char c) -{ - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load vertex labels from csv file using pg's COPY infrastructure. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -174,96 +133,146 @@ int create_labels_from_csv_file(char *file_path, bool id_field_exists, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_vertex_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + int64 curr_seq_num = 0; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + if (id_field_exists) { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + */ + curr_seq_num = nextval_internal(label_seq_relid, true); } - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); PG_TRY(); { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist - NULL means all columns */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + if (is_first_row) + { + int i; - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } + + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_vertex_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + id_field_exists, load_as_agtype, + &curr_seq_num, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index c7cf0677f..e4f10d7e4 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,24 +18,81 @@ */ #include "postgres.h" + +#include "access/heapam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" #include "catalog/indexing.h" +#include "catalog/pg_authid.h" #include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "parser/parse_relation.h" +#include "utils/acl.h" #include "utils/json.h" +#include "utils/rel.h" +#include "utils/rls.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" -#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); static char *build_safe_filename(char *name); +static void check_file_read_permission(void); +static void check_table_permissions(Oid relid); +static void check_rls_for_load(Oid relid); #define AGE_BASE_CSV_DIRECTORY "/tmp/age/" #define AGE_CSV_FILE_EXTENSION ".csv" +/* + * Trim leading and trailing whitespace from a string. + * Returns a newly allocated string with whitespace removed. + * Returns empty string for NULL input. + */ +char *trim_whitespace(const char *str) +{ + const char *start; + const char *end; + size_t len; + + if (str == NULL) + { + return pstrdup(""); + } + + /* Find first non-whitespace character */ + start = str; + while (*start && (*start == ' ' || *start == '\t' || + *start == '\n' || *start == '\r')) + { + start++; + } + + /* If string is all whitespace, return empty string */ + if (*start == '\0') + { + return pstrdup(""); + } + + /* Find last non-whitespace character */ + end = str + strlen(str) - 1; + while (end > start && (*end == ' ' || *end == '\t' || + *end == '\n' || *end == '\r')) + { + end--; + } + + /* Copy the trimmed string */ + len = end - start + 1; + return pnstrdup(start, len); +} + static char *build_safe_filename(char *name) { int length; @@ -88,6 +145,51 @@ static char *build_safe_filename(char *name) return resolved; } +/* + * Check if the current user has permission to read server files. + * Only users with the pg_read_server_files role can load from files. + */ +static void check_file_read_permission(void) +{ + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to LOAD from a file"), + errdetail("Only roles with privileges of the \"%s\" role may LOAD from a file.", + "pg_read_server_files"))); + } +} + +/* + * Check if the current user has INSERT permission on the target table. + */ +static void check_table_permissions(Oid relid) +{ + AclResult aclresult; + + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_INSERT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, get_rel_name(relid)); + } +} + +/* + * Check if RLS is enabled on the target table. + * CSV loading is not supported with row-level security. + */ +static void check_rls_for_load(Oid relid) +{ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOAD from file is not supported with row-level security"), + errhint("Use Cypher CREATE clause instead."))); + } +} + agtype *create_empty_agtype(void) { agtype* out; @@ -118,6 +220,14 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) char *new_csv_val; agtype_value *res; + /* Handle NULL or empty input - return null agtype value */ + if (csv_val == NULL || csv_val[0] == '\0') + { + res = palloc(sizeof(agtype_value)); + res->type = AGTV_NULL; + return res; + } + if (!json_validate(cstring_to_text(csv_val), false, false)) { /* wrap the string with double-quote */ @@ -175,18 +285,40 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, for (i = 0; itype = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -228,18 +360,40 @@ agtype* create_agtype_from_list_i(char **header, char **fields, for (i = start_index; i < fields_len; i++) { + char *trimmed_value; + + /* Skip empty header fields (e.g., from trailing commas) */ + if (header[i] == NULL || header[i][0] == '\0') + { + continue; + } + key_agtype = string_to_agtype_value(header[i]); result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); + /* Trim whitespace from field value */ + trimmed_value = trim_whitespace(fields[i]); + if (load_as_agtype) { - value_agtype = csv_value_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(trimmed_value); } else { - value_agtype = string_to_agtype_value(fields[i]); + /* Handle empty field values */ + if (trimmed_value[0] == '\0') + { + value_agtype = palloc(sizeof(agtype_value)); + value_agtype->type = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -362,11 +516,24 @@ void insert_batch(batch_insert_state *batch_state) List *result; int i; + /* Check constraints for each tuple before inserting */ + if (batch_state->resultRelInfo->ri_RelationDesc->rd_att->constr) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + ExecConstraints(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate); + } + } + /* Insert the tuples */ heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, batch_state->slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - + GetCurrentCommandId(true), + TABLE_INSERT_SKIP_FSM, /* Skip free space map for bulk */ + batch_state->bistate); /* Use bulk insert state */ + /* Insert index entries for the tuples */ if (batch_state->resultRelInfo->ri_NumIndices > 0) { @@ -405,6 +572,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool id_field_exists; bool load_as_agtype; @@ -427,6 +595,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -447,6 +618,11 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_VERTEX); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); @@ -459,7 +635,6 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(load_edges_from_file); Datum load_edges_from_file(PG_FUNCTION_ARGS) { - Name graph_name; Name label_name; text* file_name; @@ -467,6 +642,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool load_as_agtype; @@ -488,6 +664,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -507,6 +686,11 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_EDGE); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); @@ -597,19 +781,42 @@ void init_batch_insert(batch_insert_state **batch_state, Oid relid; EState *estate; ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + RTEPermissionInfo *perminfo; + List *range_table = NIL; + List *perminfos = NIL; int i; - /* Open the relation */ + /* Get the relation OID */ relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, RowExclusiveLock); /* Initialize executor state */ estate = CreateExecutorState(); - /* Initialize resultRelInfo */ + /* Create range table entry for ExecConstraints */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; + rte->rellockmode = RowExclusiveLock; + rte->perminfoindex = 1; + range_table = list_make1(rte); + + /* Create permission info */ + perminfo = makeNode(RTEPermissionInfo); + perminfo->relid = relid; + perminfo->requiredPerms = ACL_INSERT; + perminfos = list_make1(perminfo); + + /* Initialize range table in executor state */ + ExecInitRangeTable(estate, range_table, perminfos, NULL); + + /* Initialize resultRelInfo - this opens the relation */ resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Get relation from resultRelInfo (opened by ExecInitResultRelation) */ + relation = resultRelInfo->ri_RelationDesc; /* Open the indices */ ExecOpenIndices(resultRelInfo, false); @@ -619,8 +826,9 @@ void init_batch_insert(batch_insert_state **batch_state, (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); (*batch_state)->estate = estate; (*batch_state)->resultRelInfo = resultRelInfo; - (*batch_state)->max_tuples = BATCH_SIZE; (*batch_state)->num_tuples = 0; + (*batch_state)->buffered_bytes = 0; + (*batch_state)->bistate = GetBulkInsertState(); /* Create slots */ for (i = 0; i < BATCH_SIZE; i++) @@ -651,12 +859,14 @@ void finish_batch_insert(batch_insert_state **batch_state) ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); } - /* Clean up, close the indices and relation */ - ExecCloseIndices((*batch_state)->resultRelInfo); - table_close((*batch_state)->resultRelInfo->ri_RelationDesc, - RowExclusiveLock); + /* Free BulkInsertState */ + FreeBulkInsertState((*batch_state)->bistate); + + /* Close result relations and range table relations */ + ExecCloseResultRelations((*batch_state)->estate); + ExecCloseRangeTableRelations((*batch_state)->estate); - /* Clean up batch state */ + /* Clean up executor state */ FreeExecutorState((*batch_state)->estate); pfree((*batch_state)->slots); pfree(*batch_state); diff --git a/src/backend/utils/load/libcsv.c b/src/backend/utils/load/libcsv.c deleted file mode 100644 index f0e8b46be..000000000 --- a/src/backend/utils/load/libcsv.c +++ /dev/null @@ -1,549 +0,0 @@ -/* -libcsv - parse and write csv data -Copyright (C) 2008 Robert Gamble - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include - -#if __STDC_VERSION__ >= 199901L -# include -#else - /* C89 doesn't have stdint.h or SIZE_MAX */ -# define SIZE_MAX ((size_t)-1) -#endif - -#include "utils/load/csv.h" - -#define VERSION "3.0.3" - -#define ROW_NOT_BEGUN 0 -#define FIELD_NOT_BEGUN 1 -#define FIELD_BEGUN 2 -#define FIELD_MIGHT_HAVE_ENDED 3 - -/* - Explanation of states - ROW_NOT_BEGUN There have not been any fields encountered for this row - FIELD_NOT_BEGUN There have been fields but we are currently not in one - FIELD_BEGUN We are in a field - FIELD_MIGHT_HAVE_ENDED - We encountered a double quote inside a quoted field, the - field is either ended or the quote is literal -*/ - -#define MEM_BLK_SIZE 128 - -#define SUBMIT_FIELD(p) \ - do { \ - if (!quoted) \ - entry_pos -= spaces; \ - if (p->options & CSV_APPEND_NULL) \ - ((p)->entry_buf[entry_pos]) = '\0'; \ - if (cb1 && (p->options & CSV_EMPTY_IS_NULL) && !quoted && entry_pos == 0) \ - cb1(NULL, entry_pos, data); \ - else if (cb1) \ - cb1(p->entry_buf, entry_pos, data); \ - pstate = FIELD_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_ROW(p, c) \ - do { \ - if (cb2) \ - cb2(c, data); \ - pstate = ROW_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) - -static const char *csv_errors[] = {"success", - "error parsing data while strict checking enabled", - "memory exhausted while increasing buffer size", - "data size too large", - "invalid status code"}; - -int -csv_error(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Return the current status of the parser */ - return p->status; -} - -const char * -csv_strerror(int status) -{ - /* Return a textual description of status */ - if (status >= CSV_EINVALID || status < 0) - return csv_errors[CSV_EINVALID]; - else - return csv_errors[status]; -} - -int -csv_get_opts(const struct csv_parser *p) -{ - /* Return the currently set options of parser */ - if (p == NULL) - return -1; - - return p->options; -} - -int -csv_set_opts(struct csv_parser *p, unsigned char options) -{ - /* Set the options */ - if (p == NULL) - return -1; - - p->options = options; - return 0; -} - -int -csv_init(struct csv_parser *p, unsigned char options) -{ - /* Initialize a csv_parser object returns 0 on success, -1 on error */ - if (p == NULL) - return -1; - - p->entry_buf = NULL; - p->pstate = ROW_NOT_BEGUN; - p->quoted = 0; - p->spaces = 0; - p->entry_pos = 0; - p->entry_size = 0; - p->status = 0; - p->options = options; - p->quote_char = CSV_QUOTE; - p->delim_char = CSV_COMMA; - p->is_space = NULL; - p->is_term = NULL; - p->blk_size = MEM_BLK_SIZE; - p->malloc_func = NULL; - p->realloc_func = realloc; - p->free_func = free; - - return 0; -} - -void -csv_free(struct csv_parser *p) -{ - /* Free the entry_buffer of csv_parser object */ - if (p == NULL) - return; - - if (p->entry_buf && p->free_func) - p->free_func(p->entry_buf); - - p->entry_buf = NULL; - p->entry_size = 0; - - return; -} - -int -csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - int quoted; - int pstate; - size_t spaces; - size_t entry_pos; - - if (p == NULL) - return -1; - - /* Finalize parsing. Needed, for example, when file does not end in a newline */ - quoted = p->quoted; - pstate = p->pstate; - spaces = p->spaces; - entry_pos = p->entry_pos; - - if ((pstate == FIELD_BEGUN) && p->quoted && (p->options & CSV_STRICT) && (p->options & CSV_STRICT_FINI)) { - /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ - p->status = CSV_EPARSE; - return -1; - } - - switch (pstate) { - case FIELD_MIGHT_HAVE_ENDED: - p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ - entry_pos = p->entry_pos; - /*lint -fallthrough */ - case FIELD_NOT_BEGUN: - case FIELD_BEGUN: - /* Unnecessary: - quoted = p->quoted, pstate = p->pstate; - spaces = p->spaces, entry_pos = p->entry_pos; - */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, -1); - break; - case ROW_NOT_BEGUN: /* Already ended properly */ - ; - } - - /* Reset parser */ - p->spaces = p->quoted = p->entry_pos = p->status = 0; - p->pstate = ROW_NOT_BEGUN; - - return 0; -} - -void -csv_set_delim(struct csv_parser *p, unsigned char c) -{ - /* Set the delimiter */ - if (p) p->delim_char = c; -} - -void -csv_set_quote(struct csv_parser *p, unsigned char c) -{ - /* Set the quote character */ - if (p) p->quote_char = c; -} - -unsigned char -csv_get_delim(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the delimiter */ - return p->delim_char; -} - -unsigned char -csv_get_quote(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the quote character */ - return p->quote_char; -} - -void -csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the space function */ - if (p) p->is_space = f; -} - -void -csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the term function */ - if (p) p->is_term = f; -} - -void -csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) -{ - /* Set the realloc function used to increase buffer size */ - if (p && f) p->realloc_func = f; -} - -void -csv_set_free_func(struct csv_parser *p, void (*f)(void *)) -{ - /* Set the free function used to free the buffer */ - if (p && f) p->free_func = f; -} - -void -csv_set_blk_size(struct csv_parser *p, size_t size) -{ - /* Set the block size used to increment buffer size */ - if (p) p->blk_size = size; -} - -size_t -csv_get_buffer_size(const struct csv_parser *p) -{ - /* Get the size of the entry buffer */ - if (p) - return p->entry_size; - return 0; -} - -static int -csv_increase_buffer(struct csv_parser *p) -{ - size_t to_add; - void *vp; - - if (p == NULL) return 0; - if (p->realloc_func == NULL) return 0; - - /* Increase the size of the entry buffer. Attempt to increase size by - * p->blk_size, if this is larger than SIZE_MAX try to increase current - * buffer size to SIZE_MAX. If allocation fails, try to allocate halve - * the size and try again until successful or increment size is zero. - */ - - to_add = p->blk_size; - - if ( p->entry_size >= SIZE_MAX - to_add ) - to_add = SIZE_MAX - p->entry_size; - - if (!to_add) { - p->status = CSV_ETOOBIG; - return -1; - } - - while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) { - to_add /= 2; - if (!to_add) { - p->status = CSV_ENOMEM; - return -1; - } - } - - /* Update entry buffer pointer and entry_size if successful */ - p->entry_buf = vp; - p->entry_size += to_add; - return 0; -} - -size_t -csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - unsigned const char *us = s; /* Access input data as array of unsigned char */ - unsigned char c; /* The character we are currently processing */ - size_t pos = 0; /* The number of characters we have processed in this call */ - - /* Store key fields into local variables for performance */ - unsigned char delim = p->delim_char; - unsigned char quote = p->quote_char; - int (*is_space)(unsigned char) = p->is_space; - int (*is_term)(unsigned char) = p->is_term; - int quoted = p->quoted; - int pstate = p->pstate; - size_t spaces = p->spaces; - size_t entry_pos = p->entry_pos; - - - if (!p->entry_buf && pos < len) { - /* Buffer hasn't been allocated yet and len > 0 */ - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - while (pos < len) { - /* Check memory usage, increase buffer if necessary */ - if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - c = us[pos++]; - - switch (pstate) { - case ROW_NOT_BEGUN: - case FIELD_NOT_BEGUN: - if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ - continue; - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (pstate == FIELD_NOT_BEGUN) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { /* ROW_NOT_BEGUN */ - /* Don't submit empty rows by default */ - if (p->options & CSV_REPALL_NL) { - SUBMIT_ROW(p, c); - } - } - continue; - } else if (c == delim) { /* Comma */ - SUBMIT_FIELD(p); - break; - } else if (c == quote) { /* Quote */ - pstate = FIELD_BEGUN; - quoted = 1; - } else { /* Anything else */ - pstate = FIELD_BEGUN; - quoted = 0; - SUBMIT_CHAR(p, c); - } - break; - case FIELD_BEGUN: - if (c == quote) { /* Quote */ - if (quoted) { - SUBMIT_CHAR(p, c); - pstate = FIELD_MIGHT_HAVE_ENDED; - } else { - /* STRICT ERROR - double quote inside non-quoted field */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - SUBMIT_CHAR(p, c); - spaces = 0; - } - } else if (c == delim) { /* Comma */ - if (quoted) { - SUBMIT_CHAR(p, c); - } else { - SUBMIT_FIELD(p); - } - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (!quoted) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { - SUBMIT_CHAR(p, c); - } - } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ - SUBMIT_CHAR(p, c); - spaces++; - } else { /* Anything else */ - SUBMIT_CHAR(p, c); - spaces = 0; - } - break; - case FIELD_MIGHT_HAVE_ENDED: - /* This only happens when a quote character is encountered in a quoted field */ - if (c == delim) { /* Comma */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ - SUBMIT_CHAR(p, c); - spaces++; - } else if (c == quote) { /* Quote */ - if (spaces) { - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - spaces = 0; - SUBMIT_CHAR(p, c); - } else { - /* Two quotes in a row */ - pstate = FIELD_BEGUN; - } - } else { /* Anything else */ - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - pstate = FIELD_BEGUN; - spaces = 0; - SUBMIT_CHAR(p, c); - } - break; - default: - break; - } - } - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; -} - -size_t -csv_write (void *dest, size_t dest_size, const void *src, size_t src_size) -{ - return csv_write2(dest, dest_size, src, src_size, CSV_QUOTE); -} - -int -csv_fwrite (FILE *fp, const void *src, size_t src_size) -{ - return csv_fwrite2(fp, src, src_size, CSV_QUOTE); -} - -size_t -csv_write2 (void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) -{ - unsigned char *cdest = dest; - const unsigned char *csrc = src; - size_t chars = 0; - - if (src == NULL) - return 0; - - if (dest == NULL) - dest_size = 0; - - if (dest_size > 0) - *cdest++ = quote; - chars++; - - while (src_size) { - if (*csrc == quote) { - if (dest_size > chars) - *cdest++ = quote; - if (chars < SIZE_MAX) chars++; - } - if (dest_size > chars) - *cdest++ = *csrc; - if (chars < SIZE_MAX) chars++; - src_size--; - csrc++; - } - - if (dest_size > chars) - *cdest = quote; - if (chars < SIZE_MAX) chars++; - - return chars; -} - -int -csv_fwrite2 (FILE *fp, const void *src, size_t src_size, unsigned char quote) -{ - const unsigned char *csrc = src; - - if (fp == NULL || src == NULL) - return 0; - - if (fputc(quote, fp) == EOF) - return EOF; - - while (src_size) { - if (*csrc == quote) { - if (fputc(quote, fp) == EOF) - return EOF; - } - if (fputc(*csrc, fp) == EOF) - return EOF; - src_size--; - csrc++; - } - - if (fputc(quote, fp) == EOF) { - return EOF; - } - - return 0; -} diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 0798f153c..fc4067455 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -21,6 +21,7 @@ #define AG_CYPHER_UTILS_H #include "access/heapam.h" +#include "nodes/execnodes.h" #include "nodes/cypher_nodes.h" #include "utils/agtype.h" @@ -127,4 +128,25 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, TupleTableSlot *elemTupleSlot, EState *estate, CommandId cid); +/* RLS support */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +List *setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +bool check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext); +bool check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd); + +/* Hash table entry for caching RLS state per label */ +typedef struct RLSCacheEntry +{ + Oid relid; /* hash key */ + /* Security quals (USING policies) for UPDATE/DELETE */ + List *qualExprs; + TupleTableSlot *slot; /* slot for old tuple (RLS check) */ + /* WCOs - used only in SET */ + List *withCheckOptions; + List *withCheckOptionExprs; +} RLSCacheEntry; + #endif diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index ab2ba08cc..ec9125073 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -322,6 +322,109 @@ enum agtype_value_type AGTV_BINARY }; +/* + * Direct field access indices for vertex and edge objects. + * + * Vertex and edge objects are serialized with keys sorted by length first, + * then lexicographically (via uniqueify_agtype_object). This means field + * positions are deterministic and can be accessed directly without binary + * search, providing O(1) access instead of O(log n). + * + * Vertex keys by length: "id"(2), "label"(5), "properties"(10) + * Edge keys by length: "id"(2), "label"(5), "end_id"(6), "start_id"(8), "properties"(10) + */ +#define VERTEX_FIELD_ID 0 +#define VERTEX_FIELD_LABEL 1 +#define VERTEX_FIELD_PROPERTIES 2 +#define VERTEX_NUM_FIELDS 3 + +#define EDGE_FIELD_ID 0 +#define EDGE_FIELD_LABEL 1 +#define EDGE_FIELD_END_ID 2 +#define EDGE_FIELD_START_ID 3 +#define EDGE_FIELD_PROPERTIES 4 +#define EDGE_NUM_FIELDS 5 + +/* + * Macros for direct field access from vertex/edge agtype_value objects. + * These avoid the binary search overhead of GET_AGTYPE_VALUE_OBJECT_VALUE. + * Validation is integrated - macros will error if field count is incorrect. + * Uses GCC statement expressions to allow validation within expressions. + */ +#define AGTYPE_VERTEX_GET_ID(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_ID].value; \ + }) +#define AGTYPE_VERTEX_GET_LABEL(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_LABEL].value; \ + }) +#define AGTYPE_VERTEX_GET_PROPERTIES(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_PROPERTIES].value; \ + }) + +#define AGTYPE_EDGE_GET_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_ID].value; \ + }) +#define AGTYPE_EDGE_GET_LABEL(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_LABEL].value; \ + }) +#define AGTYPE_EDGE_GET_END_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_END_ID].value; \ + }) +#define AGTYPE_EDGE_GET_START_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_START_ID].value; \ + }) +#define AGTYPE_EDGE_GET_PROPERTIES(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_PROPERTIES].value; \ + }) + /* * agtype_value: In-memory representation of agtype. This is a convenient * deserialized representation, that can easily support using the "val" diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index df663b1dd..4db00d93a 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,42 +17,28 @@ * under the License. */ -#include "access/heapam.h" -#include "utils/load/age_load.h" - #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - char *start_vertex; - char *end_vertex; - bool load_as_agtype; - batch_insert_state *batch_state; -} csv_edge_reader; - - -void edge_field_cb(void *field, size_t field_len, void *data); -void edge_row_cb(int delim __attribute__((unused)), void *data); +#include "utils/load/age_load.h" +/* + * Load edges from a CSV file using pg's COPY infrastructure. + * + * CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the edge label + * label_id - ID of the label + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *label_name, int label_id, - bool load_as_agtype); - -#endif /*AG_LOAD_EDGES_H */ + char *label_name, int label_id, + bool load_as_agtype); +#endif /* AG_LOAD_EDGES_H */ diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index b8ed1572e..c3d517f30 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -17,46 +17,26 @@ * under the License. */ - #ifndef AG_LOAD_LABELS_H #define AG_LOAD_LABELS_H -#include "access/heapam.h" #include "utils/load/age_load.h" -struct counts { - long unsigned fields; - long unsigned allvalues; - long unsigned rows; -}; - -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - bool id_field_exists; - bool load_as_agtype; - int curr_seq_num; - batch_insert_state *batch_state; -} csv_vertex_reader; - - -void vertex_field_cb(void *field, size_t field_len, void *data); -void vertex_row_cb(int delim __attribute__((unused)), void *data); - +/* + * Load vertex labels from a CSV file using pg's COPY infrastructure. + * CSV format: [id,] [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the vertex label + * label_id - ID of the label + * id_field_exists - If true, first CSV column contains the vertex ID + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 72f11493d..6573c79f3 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -17,6 +17,10 @@ * under the License. */ +#ifndef AG_LOAD_H +#define AG_LOAD_H + +#include "access/heapam.h" #include "commands/sequence.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -27,10 +31,8 @@ #include "commands/graph_commands.h" #include "utils/ag_cache.h" -#ifndef AGE_ENTITY_CREATOR_H -#define AGE_ENTITY_CREATOR_H - #define BATCH_SIZE 1000 +#define MAX_BUFFERED_BYTES 65535 /* 64KB, same as pg COPY */ typedef struct batch_insert_state { @@ -38,26 +40,29 @@ typedef struct batch_insert_state ResultRelInfo *resultRelInfo; TupleTableSlot **slots; int num_tuples; - int max_tuples; + size_t buffered_bytes; + BulkInsertState bistate; } batch_insert_state; -agtype* create_empty_agtype(void); - -agtype* create_agtype_from_list(char **header, char **fields, +agtype *create_empty_agtype(void); +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id, bool load_as_agtype); -agtype* create_agtype_from_list_i(char **header, char **fields, +agtype *create_agtype_from_list_i(char **header, char **fields, size_t fields_len, size_t start_index, bool load_as_agtype); + void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, - agtype* end_properties); -void insert_batch(batch_insert_state *batch_state); + agtype *edge_properties); void init_batch_insert(batch_insert_state **batch_state, char *label_name, Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); void finish_batch_insert(batch_insert_state **batch_state); -#endif /* AGE_ENTITY_CREATOR_H */ +char *trim_whitespace(const char *str); + +#endif /* AG_LOAD_H */ diff --git a/src/include/utils/load/csv.h b/src/include/utils/load/csv.h deleted file mode 100644 index 062536977..000000000 --- a/src/include/utils/load/csv.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Created by Shoaib on 12/5/2021. -*/ - -/* -libcsv - parse and write csv data -Copyright (C) 2008-2021 Robert Gamble -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifndef LIBCSV_H__ -#define LIBCSV_H__ -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define CSV_MAJOR 3 -#define CSV_MINOR 0 -#define CSV_RELEASE 3 - -/* Error Codes */ -#define CSV_SUCCESS 0 -#define CSV_EPARSE 1 /* Parse error in strict mode */ -#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ -#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ -#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ - - -/* parser options */ -#define CSV_STRICT 1 /* enable strict mode */ -#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ -#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last - field is quoted and doesn't contain ending - quote */ -#define CSV_APPEND_NULL 8 /* Ensure that all fields are null-terminated */ -#define CSV_EMPTY_IS_NULL 16 /* Pass null pointer to cb1 function when - empty, unquoted fields are encountered */ - - -/* Character values */ -#define CSV_TAB 0x09 -#define CSV_SPACE 0x20 -#define CSV_CR 0x0d -#define CSV_LF 0x0a -#define CSV_COMMA 0x2c -#define CSV_QUOTE 0x22 - -struct csv_parser { - int pstate; /* Parser state */ - int quoted; /* Is the current field a quoted field? */ - size_t spaces; /* Number of continuous spaces after quote or in a non-quoted field */ - unsigned char * entry_buf; /* Entry buffer */ - size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ - size_t entry_size; /* Size of entry buffer */ - int status; /* Operation status */ - unsigned char options; - unsigned char quote_char; - unsigned char delim_char; - int (*is_space)(unsigned char); - int (*is_term)(unsigned char); - size_t blk_size; - void *(*malloc_func)(size_t); /* not used */ - void *(*realloc_func)(void *, size_t); /* function used to allocate buffer memory */ - void (*free_func)(void *); /* function used to free buffer memory */ -}; - -/* Function Prototypes */ -int csv_init(struct csv_parser *p, unsigned char options); -int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -void csv_free(struct csv_parser *p); -int csv_error(const struct csv_parser *p); -const char * csv_strerror(int error); -size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size); -int csv_fwrite(FILE *fp, const void *src, size_t src_size); -size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote); -int csv_fwrite2(FILE *fp, const void *src, size_t src_size, unsigned char quote); -int csv_get_opts(const struct csv_parser *p); -int csv_set_opts(struct csv_parser *p, unsigned char options); -void csv_set_delim(struct csv_parser *p, unsigned char c); -void csv_set_quote(struct csv_parser *p, unsigned char c); -unsigned char csv_get_delim(const struct csv_parser *p); -unsigned char csv_get_quote(const struct csv_parser *p); -void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_realloc_func(struct csv_parser *p, void *(*)(void *, size_t)); -void csv_set_free_func(struct csv_parser *p, void (*)(void *)); -void csv_set_blk_size(struct csv_parser *p, size_t); -size_t csv_get_buffer_size(const struct csv_parser *p); - -#ifdef __cplusplus -} -#endif - -#endif