From 399a289a0f7503dcd50e786ca650b0bf8ba4d533 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 19 Nov 2025 08:38:54 -0800 Subject: [PATCH 01/25] Updated CI, Labeler, Docker, and branch security files for PG18 (#2246) Updated the CI and Docker files for the PG18 branch to point to PG18 and PostgreSQL version 18. Updated the labeler and branch security files for PG18. Some of these only apply to the master branch but are updated for consistency. modified: .asf.yaml modified: .github/labeler.yml modified: .github/workflows/go-driver.yml modified: .github/workflows/installcheck.yaml modified: .github/workflows/jdbc-driver.yaml modified: .github/workflows/nodejs-driver.yaml modified: .github/workflows/python-driver.yaml modified: docker/Dockerfile modified: docker/Dockerfile.dev modified: drivers/docker-compose.yml --- .asf.yaml | 4 +++ .github/labeler.yml | 3 +++ .github/workflows/go-driver.yml | 4 +-- .github/workflows/installcheck.yaml | 37 ++++++++++++++-------------- .github/workflows/jdbc-driver.yaml | 4 +-- .github/workflows/nodejs-driver.yaml | 4 +-- .github/workflows/python-driver.yaml | 4 +-- docker/Dockerfile | 12 ++++----- docker/Dockerfile.dev | 4 +-- drivers/docker-compose.yml | 2 +- 10 files changed, 43 insertions(+), 35 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index 8a0149a04..be18a25c1 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -50,6 +50,10 @@ github: required_pull_request_reviews: required_approving_review_count: 1 + PG18: + required_pull_request_reviews: + required_approving_review_count: 1 + PG17: required_pull_request_reviews: required_approving_review_count: 1 diff --git a/.github/labeler.yml b/.github/labeler.yml index 6baa297c5..f860c2b19 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -19,5 +19,8 @@ PG16: PG17: - base-branch: 'PG17' +PG18: +- base-branch: 'PG18' + master: - base-branch: 'master' diff --git a/.github/workflows/go-driver.yml b/.github/workflows/go-driver.yml index 5b6d15030..1b0379c34 100644 --- a/.github/workflows/go-driver.yml +++ b/.github/workflows/go-driver.yml @@ -2,10 +2,10 @@ name: Go Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG18" ] pull_request: - branches: [ "master" ] + branches: [ "PG18" ] jobs: build: diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index da266f0da..3ee97296c 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -2,62 +2,63 @@ name: Build / Regression on: push: - branches: [ "master" ] + branches: [ "PG18" ] + pull_request: - branches: [ "master" ] + branches: [ "PG18" ] jobs: build: runs-on: ubuntu-latest steps: - - name: Get latest commit id of PostgreSQL 17 + - name: Get latest commit id of PostgreSQL 18 run: | - echo "PG_COMMIT_HASH=$(git ls-remote https://git.postgresql.org/git/postgresql.git refs/heads/REL_17_STABLE | awk '{print $1}')" >> $GITHUB_ENV + echo "PG_COMMIT_HASH=$(git ls-remote https://git.postgresql.org/git/postgresql.git refs/heads/REL_18_STABLE | awk '{print $1}')" >> $GITHUB_ENV - - name: Cache PostgreSQL 17 + - name: Cache PostgreSQL 18 uses: actions/cache@v3 - id: pg17cache + id: pg18cache with: - path: ~/pg17 - key: ${{ runner.os }}-v1-pg17-${{ env.PG_COMMIT_HASH }} + path: ~/pg18 + key: ${{ runner.os }}-v1-pg18-${{ env.PG_COMMIT_HASH }} - name: Install necessary dependencies run: | sudo apt-get update sudo apt-get install -y build-essential libreadline-dev zlib1g-dev flex bison - - name: Install PostgreSQL 17 and some extensions - if: steps.pg17cache.outputs.cache-hit != 'true' + - name: Install PostgreSQL 18 and some extensions + if: steps.pg18cache.outputs.cache-hit != 'true' run: | - git clone --depth 1 --branch REL_17_STABLE https://git.postgresql.org/git/postgresql.git ~/pg17source - cd ~/pg17source - ./configure --prefix=$HOME/pg17 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert + git clone --depth 1 --branch REL_18_STABLE https://git.postgresql.org/git/postgresql.git ~/pg18source + cd ~/pg18source + ./configure --prefix=$HOME/pg18 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert make install -j$(nproc) > /dev/null cd contrib cd fuzzystrmatch - make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg18/bin/pg_config install -j$(nproc) > /dev/null cd ../pg_trgm - make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg18/bin/pg_config install -j$(nproc) > /dev/null - uses: actions/checkout@v3 - name: Build AGE id: build run: | - make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) + make PG_CONFIG=$HOME/pg18/bin/pg_config install -j$(nproc) - name: Pull and build pgvector id: pgvector run: | git clone https://github.com/pgvector/pgvector.git cd pgvector - make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg18/bin/pg_config install -j$(nproc) > /dev/null - name: Regression tests id: regression_tests run: | - make PG_CONFIG=$HOME/pg17/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" + make PG_CONFIG=$HOME/pg18/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" continue-on-error: true - name: Dump regression test errors diff --git a/.github/workflows/jdbc-driver.yaml b/.github/workflows/jdbc-driver.yaml index 29b368438..293db7b88 100644 --- a/.github/workflows/jdbc-driver.yaml +++ b/.github/workflows/jdbc-driver.yaml @@ -2,10 +2,10 @@ name: JDBC Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG18" ] pull_request: - branches: [ "master" ] + branches: [ "PG18" ] jobs: build: diff --git a/.github/workflows/nodejs-driver.yaml b/.github/workflows/nodejs-driver.yaml index 3d9e07023..c2da7fe60 100644 --- a/.github/workflows/nodejs-driver.yaml +++ b/.github/workflows/nodejs-driver.yaml @@ -2,10 +2,10 @@ name: Nodejs Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG18" ] pull_request: - branches: [ "master" ] + branches: [ "PG18" ] jobs: build: diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index 099b5c871..ef6cf96a1 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -2,10 +2,10 @@ name: Python Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG18" ] pull_request: - branches: [ "master" ] + branches: [ "PG18" ] jobs: build: diff --git a/docker/Dockerfile b/docker/Dockerfile index 0436dc8f9..3eb17c798 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,14 +17,14 @@ # # Build stage: Install necessary development tools for compilation and installation -FROM postgres:17 AS build +FROM postgres:18 AS build RUN apt-get update \ && apt-get install -y --no-install-recommends --no-install-suggests \ bison \ build-essential \ flex \ - postgresql-server-dev-17 + postgresql-server-dev-18 COPY . /age @@ -34,7 +34,7 @@ RUN make && make install # Final stage: Create a final image by copying the files created in the build stage -FROM postgres:17 +FROM postgres:18 RUN apt-get update \ && apt-get install -y --no-install-recommends --no-install-suggests \ @@ -48,9 +48,9 @@ ENV LANG=en_US.UTF-8 ENV LC_COLLATE=en_US.UTF-8 ENV LC_CTYPE=en_US.UTF-8 -COPY --from=build /usr/lib/postgresql/17/lib/age.so /usr/lib/postgresql/17/lib/ -COPY --from=build /usr/share/postgresql/17/extension/age--1.6.0.sql /usr/share/postgresql/17/extension/ -COPY --from=build /usr/share/postgresql/17/extension/age.control /usr/share/postgresql/17/extension/ +COPY --from=build /usr/lib/postgresql/18/lib/age.so /usr/lib/postgresql/18/lib/ +COPY --from=build /usr/share/postgresql/18/extension/age--1.6.0.sql /usr/share/postgresql/18/extension/ +COPY --from=build /usr/share/postgresql/18/extension/age.control /usr/share/postgresql/18/extension/ COPY docker/docker-entrypoint-initdb.d/00-create-extension-age.sql /docker-entrypoint-initdb.d/00-create-extension-age.sql CMD ["postgres", "-c", "shared_preload_libraries=age"] diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index 48b2db3ed..e02c21fc4 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -17,14 +17,14 @@ # -FROM postgres:17 +FROM postgres:18 RUN apt-get update RUN apt-get install --assume-yes --no-install-recommends --no-install-suggests \ bison \ build-essential \ flex \ - postgresql-server-dev-17 \ + postgresql-server-dev-18 \ locales ENV LANG=en_US.UTF-8 diff --git a/drivers/docker-compose.yml b/drivers/docker-compose.yml index 9ec072db5..5ae7b79a8 100644 --- a/drivers/docker-compose.yml +++ b/drivers/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.3" services: db: - image: apache/age:dev_snapshot_master + image: apache/age:dev_snapshot_PG18 environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=agens From 146bf38035725004ced7f52ba4c484592e25f53a Mon Sep 17 00:00:00 2001 From: "Krishnakumar R (KK)" <65895020+kk-src@users.noreply.github.com> Date: Tue, 25 Nov 2025 07:25:12 -0800 Subject: [PATCH 02/25] PG18 port for AGE (#2251) * [PG18 port][Set1] Fix header dependencies and use TupleDescAttr macro - Include executor/executor.h for PG18 header reorganization and use TupleDescAttr() accessor macro instead of direct attrs[] access. * [PG18 port][Set2] Adapt to expandRTE signature change and pg_noreturn - Add VarReturningType parameter to expandRTE() calls using VAR_RETURNING_DEFAULT. - Replace pg_attribute_noreturn() with pg_noreturn prefix specifier. * [PG18 port][Set3] Fix double ExecOpenIndices call for PG18 compatibility - PG18 enforces stricter assertions in ExecOpenIndices, requiring ri_IndexRelationDescs to be NULL when called. - In update_entity_tuple(), indices may already be opened by the caller (create_entity_result_rel_info), causing assertion failures. - Add a check to only open indices if not already open, and track ownership with a boolean flag to ensure we only close what we opened. - Found when regression tests failed with assertions, which this change resolves. * [PG18 port][Set4] Update regression test expected output for ordering PG18's implementation changes result in different row ordering for queries without explicit ORDER BY clauses. Update expected output files to reflect the new ordering while maintaining identical result content. * [PG18 port][Set5] Address review comments - coding standard fix Note: Assisted by GitHub Copilot Agent mode. --- regress/expected/cypher_match.out | 48 +++++++++++++-------------- regress/expected/expr.out | 24 +++++++------- src/backend/catalog/ag_label.c | 1 + src/backend/executor/cypher_create.c | 2 ++ src/backend/executor/cypher_delete.c | 3 +- src/backend/executor/cypher_merge.c | 2 ++ src/backend/executor/cypher_set.c | 29 ++++++++++++---- src/backend/executor/cypher_utils.c | 1 + src/backend/parser/cypher_clause.c | 4 +-- src/backend/utils/adt/agtype_parser.c | 8 ++--- 10 files changed, 72 insertions(+), 50 deletions(-) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e83ba3b93..ad5a07d05 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -88,8 +88,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -97,8 +97,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -132,10 +132,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -143,10 +143,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -165,8 +165,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -174,8 +174,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (2 rows) -- Right Path Test @@ -348,10 +348,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype); i ---------------------------------------------------------------------------------- - {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex + {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex + {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -537,18 +537,18 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype, b agtype, c agtype); i | b | c ---+-----------+----------- - | "end" | "middle" - 0 | "end" | "middle" - 1 | "end" | "middle" | "middle" | "end" 0 | "middle" | "end" 1 | "middle" | "end" - | "middle" | "initial" - 0 | "middle" | "initial" - 1 | "middle" | "initial" + | "end" | "middle" + 0 | "end" | "middle" + 1 | "end" | "middle" | "initial" | "middle" 0 | "initial" | "middle" 1 | "initial" | "middle" + | "middle" | "initial" + 0 | "middle" | "initial" + 1 | "middle" | "initial" (12 rows) SELECT * FROM cypher('cypher_match', $$ @@ -558,18 +558,18 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype, c agtype); i | c ---+----------- - | "middle" - 0 | "middle" - 1 | "middle" | "end" 0 | "end" 1 | "end" - | "initial" - 0 | "initial" - 1 | "initial" | "middle" 0 | "middle" 1 | "middle" + | "middle" + 0 | "middle" + 1 | "middle" + | "initial" + 0 | "initial" + 1 | "initial" (12 rows) -- @@ -2407,15 +2407,15 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 513ea142f..20c9e95f3 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -2644,10 +2644,10 @@ SELECT * FROM cypher('expr', $$ MATCH (v) RETURN v $$) AS (expression agtype); SELECT * FROM cypher('expr', $$ MATCH ()-[e]-() RETURN e $$) AS (expression agtype); expression --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) -- id() @@ -2656,10 +2656,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype); id ------------------ - 1407374883553281 - 1407374883553281 1407374883553282 + 1407374883553281 1407374883553282 + 1407374883553281 (4 rows) SELECT * FROM cypher('expr', $$ @@ -2698,10 +2698,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (start_id agtype); start_id ------------------ - 1125899906842626 - 1125899906842626 1125899906842625 + 1125899906842626 1125899906842625 + 1125899906842626 (4 rows) -- should return null @@ -2731,10 +2731,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (end_id agtype); end_id ------------------ - 1125899906842627 - 1125899906842627 1125899906842626 + 1125899906842627 1125899906842626 + 1125899906842627 (4 rows) -- should return null @@ -2764,10 +2764,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, start_id agtype, startNode agtype); id | start_id | startnode ------------------+------------------+---------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex (4 rows) -- should return null @@ -2797,10 +2797,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, end_id agtype, endNode agtype); id | end_id | endnode ------------------+------------------+--------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (4 rows) -- should return null diff --git a/src/backend/catalog/ag_label.c b/src/backend/catalog/ag_label.c index b6dcf77a3..54c31ef36 100644 --- a/src/backend/catalog/ag_label.c +++ b/src/backend/catalog/ag_label.c @@ -21,6 +21,7 @@ #include "access/genam.h" #include "catalog/indexing.h" +#include "executor/executor.h" #include "nodes/makefuncs.h" #include "utils/builtins.h" #include "utils/lsyscache.h" diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 2091ea29c..2031fe8d8 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -19,6 +19,8 @@ #include "postgres.h" +#include "executor/executor.h" + #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index 6bb869833..7e10d9afc 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include "executor/executor.h" #include "storage/bufmgr.h" #include "common/hashfn.h" @@ -257,7 +258,7 @@ static agtype_value *extract_entity(CustomScanState *node, tupleDescriptor = scanTupleSlot->tts_tupleDescriptor; /* type checking, make sure the entity is an agtype vertex or edge */ - if (tupleDescriptor->attrs[entity_position -1].atttypid != AGTYPEOID) + if (TupleDescAttr(tupleDescriptor, entity_position -1)->atttypid != AGTYPEOID) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("DELETE clause can only delete agtype"))); diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 9136825ab..6cfa70d48 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -19,6 +19,8 @@ #include "postgres.h" +#include "executor/executor.h" + #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index d1837fb16..9fd599eed 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include "executor/executor.h" #include "storage/bufmgr.h" #include "executor/cypher_executor.h" @@ -102,6 +103,7 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, TM_Result result; CommandId cid = GetCurrentCommandId(true); ResultRelInfo **saved_resultRels = estate->es_result_relations; + bool close_indices = false; estate->es_result_relations = &resultRelInfo; @@ -113,7 +115,16 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, if (lock_result == TM_Ok) { - ExecOpenIndices(resultRelInfo, false); + /* + * Open indices if not already open. The resultRelInfo may already + * have indices opened by the caller (e.g., create_entity_result_rel_info), + * so only open if needed and track that we did so for cleanup. + */ + if (resultRelInfo->ri_IndexRelationDescs == NULL) + { + ExecOpenIndices(resultRelInfo, false); + close_indices = true; + } ExecStoreVirtualTuple(elemTupleSlot); tuple = ExecFetchSlotHeapTuple(elemTupleSlot, true, NULL); tuple->t_self = old_tuple->t_self; @@ -141,7 +152,10 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, errmsg("tuple to be updated was already modified"))); } - ExecCloseIndices(resultRelInfo); + if (close_indices) + { + ExecCloseIndices(resultRelInfo); + } estate->es_result_relations = saved_resultRels; return tuple; @@ -160,7 +174,10 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, (update_indexes == TU_Summarizing)); } - ExecCloseIndices(resultRelInfo); + if (close_indices) + { + ExecCloseIndices(resultRelInfo); + } } else if (lock_result == TM_SelfModified) { @@ -310,7 +327,7 @@ static void update_all_paths(CustomScanState *node, graphid id, agtype_value *original_entity_value; /* skip nulls */ - if (scanTupleSlot->tts_tupleDescriptor->attrs[i].atttypid != AGTYPEOID) + if (TupleDescAttr(scanTupleSlot->tts_tupleDescriptor, i)->atttypid != AGTYPEOID) { continue; } @@ -414,7 +431,7 @@ static void process_update_list(CustomScanState *node) continue; } - if (scanTupleSlot->tts_tupleDescriptor->attrs[update_item->entity_position -1].atttypid != AGTYPEOID) + if (TupleDescAttr(scanTupleSlot->tts_tupleDescriptor, update_item->entity_position -1)->atttypid != AGTYPEOID) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -588,7 +605,7 @@ static void process_update_list(CustomScanState *node) } estate->es_snapshot->curcid = cid; - /* close relation */ + /* close relation */ ExecCloseIndices(resultRelInfo); table_close(resultRelInfo->ri_RelationDesc, RowExclusiveLock); diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index c8d568831..d7a55f709 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -24,6 +24,7 @@ #include "postgres.h" +#include "executor/executor.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 468706a87..971f8c3d2 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2585,9 +2585,9 @@ static void get_res_cols(ParseState *pstate, ParseNamespaceItem *l_pnsi, List *colnames = NIL; List *colvars = NIL; - expandRTE(l_pnsi->p_rte, l_pnsi->p_rtindex, 0, -1, false, + expandRTE(l_pnsi->p_rte, l_pnsi->p_rtindex, 0, VAR_RETURNING_DEFAULT, -1, false, &l_colnames, &l_colvars); - expandRTE(r_pnsi->p_rte, r_pnsi->p_rtindex, 0, -1, false, + expandRTE(r_pnsi->p_rte, r_pnsi->p_rtindex, 0, VAR_RETURNING_DEFAULT, -1, false, &r_colnames, &r_colvars); /* add in all colnames and colvars from the l_rte. */ diff --git a/src/backend/utils/adt/agtype_parser.c b/src/backend/utils/adt/agtype_parser.c index c485cb925..40fc8d8c5 100644 --- a/src/backend/utils/adt/agtype_parser.c +++ b/src/backend/utils/adt/agtype_parser.c @@ -74,11 +74,9 @@ static void parse_object(agtype_lex_context *lex, agtype_sem_action *sem); static void parse_array_element(agtype_lex_context *lex, agtype_sem_action *sem); static void parse_array(agtype_lex_context *lex, agtype_sem_action *sem); -static void report_parse_error(agtype_parse_context ctx, - agtype_lex_context *lex) - pg_attribute_noreturn(); -static void report_invalid_token(agtype_lex_context *lex) - pg_attribute_noreturn(); +static pg_noreturn void report_parse_error(agtype_parse_context ctx, + agtype_lex_context *lex); +static pg_noreturn void report_invalid_token(agtype_lex_context *lex); static int report_agtype_context(agtype_lex_context *lex); static char *extract_mb_char(char *s); From 201399f4da153aad766e60cf97088d7b5ab55435 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 2 Dec 2025 10:50:16 -0800 Subject: [PATCH 03/25] Update CI for BaseDockerizedTest (#2254) Updated the CI BaseDockerizedTest to point to dev_snapshot_PG18 --- .../src/test/java/org/apache/age/jdbc/BaseDockerizedTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/jdbc/lib/src/test/java/org/apache/age/jdbc/BaseDockerizedTest.java b/drivers/jdbc/lib/src/test/java/org/apache/age/jdbc/BaseDockerizedTest.java index 393175c3d..d9caa4bb0 100644 --- a/drivers/jdbc/lib/src/test/java/org/apache/age/jdbc/BaseDockerizedTest.java +++ b/drivers/jdbc/lib/src/test/java/org/apache/age/jdbc/BaseDockerizedTest.java @@ -52,7 +52,7 @@ public void beforeAll() throws Exception { String CORRECT_DB_PASSWORDS = "postgres"; agensGraphContainer = new GenericContainer<>(DockerImageName - .parse("apache/age:dev_snapshot_master")) + .parse("apache/age:dev_snapshot_PG18")) .withEnv("POSTGRES_PASSWORD", CORRECT_DB_PASSWORDS) .withExposedPorts(5432); agensGraphContainer.start(); From f364664e0d2835429d480c90683d193599e249cb Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 2 Dec 2025 10:51:54 -0800 Subject: [PATCH 04/25] Fix DockerHub build warning messages (#2252) PR fixes build warning messages on DockerHub and on my local build. No regression tests needed. modified: src/include/nodes/ag_nodes.h modified: src/include/optimizer/cypher_createplan.h modified: src/include/optimizer/cypher_pathnode.h modified: tools/gen_keywordlist.pl --- src/include/nodes/ag_nodes.h | 3 +++ src/include/optimizer/cypher_createplan.h | 7 +++++++ src/include/optimizer/cypher_pathnode.h | 7 +++++++ tools/gen_keywordlist.pl | 1 + 4 files changed, 18 insertions(+) diff --git a/src/include/nodes/ag_nodes.h b/src/include/nodes/ag_nodes.h index f0cc22043..121832c01 100644 --- a/src/include/nodes/ag_nodes.h +++ b/src/include/nodes/ag_nodes.h @@ -78,6 +78,9 @@ typedef enum ag_node_tag cypher_merge_information_t } ag_node_tag; +extern const char *node_names[]; +extern const ExtensibleNodeMethods node_methods[]; + void register_ag_nodes(void); ExtensibleNode *_new_ag_node(Size size, ag_node_tag tag); diff --git a/src/include/optimizer/cypher_createplan.h b/src/include/optimizer/cypher_createplan.h index 2d5d2e698..ab01f2b58 100644 --- a/src/include/optimizer/cypher_createplan.h +++ b/src/include/optimizer/cypher_createplan.h @@ -20,6 +20,8 @@ #ifndef AG_CYPHER_CREATEPLAN_H #define AG_CYPHER_CREATEPLAN_H +#include "nodes/extensible.h" + Plan *plan_cypher_create_path(PlannerInfo *root, RelOptInfo *rel, CustomPath *best_path, List *tlist, List *clauses, List *custom_plans); @@ -36,4 +38,9 @@ Plan *plan_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, CustomPath *best_path, List *tlist, List *clauses, List *custom_plans); +extern const CustomScanMethods cypher_create_plan_methods; +extern const CustomScanMethods cypher_set_plan_methods; +extern const CustomScanMethods cypher_delete_plan_methods; +extern const CustomScanMethods cypher_merge_plan_methods; + #endif diff --git a/src/include/optimizer/cypher_pathnode.h b/src/include/optimizer/cypher_pathnode.h index 75c2b07de..676832029 100644 --- a/src/include/optimizer/cypher_pathnode.h +++ b/src/include/optimizer/cypher_pathnode.h @@ -20,6 +20,8 @@ #ifndef AG_CYPHER_PATHNODE_H #define AG_CYPHER_PATHNODE_H +#include "nodes/extensible.h" + #define CREATE_PATH_NAME "Cypher Create" #define SET_PATH_NAME "Cypher Set" #define DELETE_PATH_NAME "Cypher Delete" @@ -34,4 +36,9 @@ CustomPath *create_cypher_delete_path(PlannerInfo *root, RelOptInfo *rel, CustomPath *create_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, List *custom_private); +extern const CustomPathMethods cypher_create_path_methods; +extern const CustomPathMethods cypher_set_path_methods; +extern const CustomPathMethods cypher_delete_path_methods; +extern const CustomPathMethods cypher_merge_path_methods; + #endif diff --git a/tools/gen_keywordlist.pl b/tools/gen_keywordlist.pl index 499300433..58e66db8e 100755 --- a/tools/gen_keywordlist.pl +++ b/tools/gen_keywordlist.pl @@ -112,6 +112,7 @@ #define %s_H #include "common/kwlookup.h" +#include "parser/cypher_keywords.h" EOM From c176b57ce3e1437c23dd486a8bc44fcd3802e989 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 19 Nov 2025 13:45:44 -0800 Subject: [PATCH 05/25] Fix issue 2245 - Creating more than 41 vlabels causes crash in drop_graph (#2248) Fixed issue 2245 - Creating more than 41 vlabels causes drop_grapth to fail with "label (relation) cache corrupted" and crashing out on the following command. This was due to corruption of the label_relation_cache during the HASH_DELETE process. As the issue was with a cache flush routine, it was necessary to fix them all. Here is the list of the flush functions that were fixed - static void flush_graph_name_cache(void) static void flush_graph_namespace_cache(void) static void flush_label_name_graph_cache(void) static void flush_label_graph_oid_cache(void) static void flush_label_relation_cache(void) static void flush_label_seq_name_graph_cache(void) Added regression tests. modified: regress/expected/catalog.out modified: regress/sql/catalog.sql modified: src/backend/utils/cache/ag_cache.c --- regress/expected/catalog.out | 141 ++++++++++++++++++++++- regress/sql/catalog.sql | 42 ++++++- src/backend/utils/cache/ag_cache.c | 174 ++++++++++------------------- 3 files changed, 241 insertions(+), 116 deletions(-) diff --git a/regress/expected/catalog.out b/regress/expected/catalog.out index d06a0ce67..a15fa4698 100644 --- a/regress/expected/catalog.out +++ b/regress/expected/catalog.out @@ -457,7 +457,146 @@ NOTICE: graph does not exist (1 row) DROP FUNCTION raise_notice(TEXT); --- dropping the graph +-- +-- Fix issue 2245 - Creating more than 41 vlabels causes drop_graph to fail with +-- label (relation) cache corrupted +-- +-- this result will change if another graph was created prior to this point. +SELECT count(*) FROM ag_label; + count +------- + 2 +(1 row) + +SELECT * FROM create_graph('issue_2245'); +NOTICE: graph "issue_2245" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2245', $$ + CREATE (a1:Part1 {part_num: '123'}), (a2:Part2 {part_num: '345'}), (a3:Part3 {part_num: '456'}), + (a4:Part4 {part_num: '789'}), (a5:Part5 {part_num: '123'}), (a6:Part6 {part_num: '345'}), + (a7:Part7 {part_num: '456'}), (a8:Part8 {part_num: '789'}), (a9:Part9 {part_num: '123'}), + (a10:Part10 {part_num: '345'}), (a11:Part11 {part_num: '456'}), (a12:Part12 {part_num: '789'}), + (a13:Part13 {part_num: '123'}), (a14:Part14 {part_num: '345'}), (a15:Part15 {part_num: '456'}), + (a16:Part16 {part_num: '789'}), (a17:Part17 {part_num: '123'}), (a18:Part18 {part_num: '345'}), + (a19:Part19 {part_num: '456'}), (a20:Part20 {part_num: '789'}), (a21:Part21 {part_num: '123'}), + (a22:Part22 {part_num: '345'}), (a23:Part23 {part_num: '456'}), (a24:Part24 {part_num: '789'}), + (a25:Part25 {part_num: '123'}), (a26:Part26 {part_num: '345'}), (a27:Part27 {part_num: '456'}), + (a28:Part28 {part_num: '789'}), (a29:Part29 {part_num: '789'}), (a30:Part30 {part_num: '123'}), + (a31:Part31 {part_num: '345'}), (a32:Part32 {part_num: '456'}), (a33:Part33 {part_num: '789'}), + (a34:Part34 {part_num: '123'}), (a35:Part35 {part_num: '345'}), (a36:Part36 {part_num: '456'}), + (a37:Part37 {part_num: '789'}), (a38:Part38 {part_num: '123'}), (a39:Part39 {part_num: '345'}), + (a40:Part40 {part_num: '456'}), (a41:Part41 {part_num: '789'}), (a42:Part42 {part_num: '345'}), + (a43:Part43 {part_num: '456'}), (a44:Part44 {part_num: '789'}), (a45:Part45 {part_num: '456'}), + (a46:Part46 {part_num: '789'}), (a47:Part47 {part_num: '456'}), (a48:Part48 {part_num: '789'}), + (a49:Part49 {part_num: '789'}), (a50:Part50 {part_num: '456'}), (a51:Part51 {part_num: '789'}) + $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT count(*) FROM ag_label; + count +------- + 55 +(1 row) + +SELECT drop_graph('issue_2245', true); +NOTICE: drop cascades to 53 other objects +DETAIL: drop cascades to table issue_2245._ag_label_vertex +drop cascades to table issue_2245._ag_label_edge +drop cascades to table issue_2245."Part1" +drop cascades to table issue_2245."Part2" +drop cascades to table issue_2245."Part3" +drop cascades to table issue_2245."Part4" +drop cascades to table issue_2245."Part5" +drop cascades to table issue_2245."Part6" +drop cascades to table issue_2245."Part7" +drop cascades to table issue_2245."Part8" +drop cascades to table issue_2245."Part9" +drop cascades to table issue_2245."Part10" +drop cascades to table issue_2245."Part11" +drop cascades to table issue_2245."Part12" +drop cascades to table issue_2245."Part13" +drop cascades to table issue_2245."Part14" +drop cascades to table issue_2245."Part15" +drop cascades to table issue_2245."Part16" +drop cascades to table issue_2245."Part17" +drop cascades to table issue_2245."Part18" +drop cascades to table issue_2245."Part19" +drop cascades to table issue_2245."Part20" +drop cascades to table issue_2245."Part21" +drop cascades to table issue_2245."Part22" +drop cascades to table issue_2245."Part23" +drop cascades to table issue_2245."Part24" +drop cascades to table issue_2245."Part25" +drop cascades to table issue_2245."Part26" +drop cascades to table issue_2245."Part27" +drop cascades to table issue_2245."Part28" +drop cascades to table issue_2245."Part29" +drop cascades to table issue_2245."Part30" +drop cascades to table issue_2245."Part31" +drop cascades to table issue_2245."Part32" +drop cascades to table issue_2245."Part33" +drop cascades to table issue_2245."Part34" +drop cascades to table issue_2245."Part35" +drop cascades to table issue_2245."Part36" +drop cascades to table issue_2245."Part37" +drop cascades to table issue_2245."Part38" +drop cascades to table issue_2245."Part39" +drop cascades to table issue_2245."Part40" +drop cascades to table issue_2245."Part41" +drop cascades to table issue_2245."Part42" +drop cascades to table issue_2245."Part43" +drop cascades to table issue_2245."Part44" +drop cascades to table issue_2245."Part45" +drop cascades to table issue_2245."Part46" +drop cascades to table issue_2245."Part47" +drop cascades to table issue_2245."Part48" +drop cascades to table issue_2245."Part49" +drop cascades to table issue_2245."Part50" +drop cascades to table issue_2245."Part51" +NOTICE: graph "issue_2245" has been dropped + drop_graph +------------ + +(1 row) + +-- this result should be the same as the one before the create_graph +SELECT count(*) FROM ag_label; + count +------- + 2 +(1 row) + +-- create the graph again +SELECT * FROM create_graph('issue_2245'); +NOTICE: graph "issue_2245" has been created + create_graph +-------------- + +(1 row) + +SELECT count(*) FROM ag_label; + count +------- + 4 +(1 row) + +-- dropping the graphs +SELECT drop_graph('issue_2245', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2245._ag_label_vertex +drop cascades to table issue_2245._ag_label_edge +NOTICE: graph "issue_2245" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('graph', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table graph._ag_label_vertex diff --git a/regress/sql/catalog.sql b/regress/sql/catalog.sql index 85fc4e8ab..bb72c3495 100644 --- a/regress/sql/catalog.sql +++ b/regress/sql/catalog.sql @@ -193,5 +193,45 @@ SELECT raise_notice('graph1'); DROP FUNCTION raise_notice(TEXT); --- dropping the graph +-- +-- Fix issue 2245 - Creating more than 41 vlabels causes drop_graph to fail with +-- label (relation) cache corrupted +-- + +-- this result will change if another graph was created prior to this point. +SELECT count(*) FROM ag_label; + +SELECT * FROM create_graph('issue_2245'); +SELECT * FROM cypher('issue_2245', $$ + CREATE (a1:Part1 {part_num: '123'}), (a2:Part2 {part_num: '345'}), (a3:Part3 {part_num: '456'}), + (a4:Part4 {part_num: '789'}), (a5:Part5 {part_num: '123'}), (a6:Part6 {part_num: '345'}), + (a7:Part7 {part_num: '456'}), (a8:Part8 {part_num: '789'}), (a9:Part9 {part_num: '123'}), + (a10:Part10 {part_num: '345'}), (a11:Part11 {part_num: '456'}), (a12:Part12 {part_num: '789'}), + (a13:Part13 {part_num: '123'}), (a14:Part14 {part_num: '345'}), (a15:Part15 {part_num: '456'}), + (a16:Part16 {part_num: '789'}), (a17:Part17 {part_num: '123'}), (a18:Part18 {part_num: '345'}), + (a19:Part19 {part_num: '456'}), (a20:Part20 {part_num: '789'}), (a21:Part21 {part_num: '123'}), + (a22:Part22 {part_num: '345'}), (a23:Part23 {part_num: '456'}), (a24:Part24 {part_num: '789'}), + (a25:Part25 {part_num: '123'}), (a26:Part26 {part_num: '345'}), (a27:Part27 {part_num: '456'}), + (a28:Part28 {part_num: '789'}), (a29:Part29 {part_num: '789'}), (a30:Part30 {part_num: '123'}), + (a31:Part31 {part_num: '345'}), (a32:Part32 {part_num: '456'}), (a33:Part33 {part_num: '789'}), + (a34:Part34 {part_num: '123'}), (a35:Part35 {part_num: '345'}), (a36:Part36 {part_num: '456'}), + (a37:Part37 {part_num: '789'}), (a38:Part38 {part_num: '123'}), (a39:Part39 {part_num: '345'}), + (a40:Part40 {part_num: '456'}), (a41:Part41 {part_num: '789'}), (a42:Part42 {part_num: '345'}), + (a43:Part43 {part_num: '456'}), (a44:Part44 {part_num: '789'}), (a45:Part45 {part_num: '456'}), + (a46:Part46 {part_num: '789'}), (a47:Part47 {part_num: '456'}), (a48:Part48 {part_num: '789'}), + (a49:Part49 {part_num: '789'}), (a50:Part50 {part_num: '456'}), (a51:Part51 {part_num: '789'}) + $$) AS (result agtype); + +SELECT count(*) FROM ag_label; +SELECT drop_graph('issue_2245', true); + +-- this result should be the same as the one before the create_graph +SELECT count(*) FROM ag_label; + +-- create the graph again +SELECT * FROM create_graph('issue_2245'); +SELECT count(*) FROM ag_label; + +-- dropping the graphs +SELECT drop_graph('issue_2245', true); SELECT drop_graph('graph', true); diff --git a/src/backend/utils/cache/ag_cache.c b/src/backend/utils/cache/ag_cache.c index e3c4d0794..493ffcfa9 100644 --- a/src/backend/utils/cache/ag_cache.c +++ b/src/backend/utils/cache/ag_cache.c @@ -286,52 +286,34 @@ static void invalidate_graph_caches(Datum arg, int cache_id, uint32 hash_value) static void flush_graph_name_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, graph_name_cache_hash); - for (;;) + /* + * If the graph_name_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (graph_name_cache_hash) { - graph_name_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(graph_name_cache_hash, &entry->name, HASH_REMOVE, - NULL); - if (!removed) - { - ereport(ERROR, (errmsg_internal("graph (name) cache corrupted"))); - } + hash_destroy(graph_name_cache_hash); + graph_name_cache_hash = NULL; } + + /* recreate the graph_name_cache */ + create_graph_name_cache(); } static void flush_graph_namespace_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, graph_namespace_cache_hash); - for (;;) + /* + * If the graph_namespace_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (graph_namespace_cache_hash) { - graph_namespace_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - - removed = hash_search(graph_namespace_cache_hash, &entry->namespace, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("graph (namespace) cache corrupted"))); - } + hash_destroy(graph_namespace_cache_hash); + graph_namespace_cache_hash = NULL; } + + /* recreate the graph_namespace_cache */ + create_graph_namespace_cache(); } graph_cache_data *search_graph_name_cache(const char *name) @@ -664,27 +646,18 @@ static void invalidate_label_name_graph_cache(Oid relid) static void flush_label_name_graph_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_name_graph_cache_hash); - for (;;) + /* + * If the label_name_graph_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_name_graph_cache_hash) { - label_name_graph_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_name_graph_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (name, graph) cache corrupted"))); - } + hash_destroy(label_name_graph_cache_hash); + label_name_graph_cache_hash = NULL; } + + /* recreate the label_name_graph_cache */ + create_label_name_graph_cache(); } static void invalidate_label_graph_oid_cache(Oid relid) @@ -722,27 +695,18 @@ static void invalidate_label_graph_oid_cache(Oid relid) static void flush_label_graph_oid_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_graph_oid_cache_hash); - for (;;) + /* + * If the label_graph_oid_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_graph_oid_cache_hash) { - label_graph_oid_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_graph_oid_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (graph, id) cache corrupted"))); - } + hash_destroy(label_graph_oid_cache_hash); + label_graph_oid_cache_hash = NULL; } + + /* recreate the label_graph_oid_cache */ + create_label_graph_oid_cache(); } static void invalidate_label_relation_cache(Oid relid) @@ -765,27 +729,18 @@ static void invalidate_label_relation_cache(Oid relid) static void flush_label_relation_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_relation_cache_hash); - for (;;) + /* + * If the label_relation_cache exists, destroy it. This will avoid any + * potential corruption issues. + */ + if (label_relation_cache_hash) { - label_relation_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_relation_cache_hash, &entry->relation, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (relation) cache corrupted"))); - } + hash_destroy(label_relation_cache_hash); + label_relation_cache_hash = NULL; } + + /* recreate the label_relation_cache */ + create_label_relation_cache(); } static void invalidate_label_seq_name_graph_cache(Oid relid) @@ -823,27 +778,18 @@ static void invalidate_label_seq_name_graph_cache(Oid relid) static void flush_label_seq_name_graph_cache(void) { - HASH_SEQ_STATUS hash_seq; - - hash_seq_init(&hash_seq, label_seq_name_graph_cache_hash); - for (;;) + /* + * If the label_seq_name_graph_cache exists, destroy it. This will + * avoid any potential corruption issues by deleting entries. + */ + if (label_seq_name_graph_cache_hash) { - label_seq_name_graph_cache_entry *entry; - void *removed; - - entry = hash_seq_search(&hash_seq); - if (!entry) - { - break; - } - removed = hash_search(label_seq_name_graph_cache_hash, &entry->key, - HASH_REMOVE, NULL); - if (!removed) - { - ereport(ERROR, - (errmsg_internal("label (seq_name, graph) cache corrupted"))); - } + hash_destroy(label_seq_name_graph_cache_hash); + label_seq_name_graph_cache_hash = NULL; } + + /* recreate the label_seq_name_graph_cache */ + create_label_seq_name_graph_cache(); } label_cache_data *search_label_name_graph_cache(const char *name, Oid graph) From 2f36b1c31cc4cd0d18cdf58fc3c5a8e81aab65a8 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 3 Dec 2025 22:17:09 +0500 Subject: [PATCH 06/25] Add index on id columns (#2117) - Whenever a label will be created, indices on id columns will be created by default. In case of vertex, a unique index on id column will be created, which will also serve as a unique constraint. In case of edge, a non-unique index on start_id and end_id columns will be created. - This change is expected to improve the performance of queries that involve joins. From some performance tests, it was observed that the performance of queries improved alot. - Loader was updated to insert tuples in indices as well. This has caused to slow the loader down a bit, but it was necessary. - A bug related to command ids in cypher_delete executor was also fixed. Resolved conflicts: regress/expected/cypher_match.out regress/expected/expr.out --- regress/expected/age_load.out | 14 -- regress/expected/cypher_match.out | 56 +++--- regress/expected/cypher_merge.out | 2 +- regress/expected/cypher_subquery.out | 2 +- regress/expected/cypher_vle.out | 24 +-- regress/expected/expr.out | 20 +-- regress/expected/graph_generation.out | 20 +-- regress/expected/index.out | 97 +++++++++-- regress/expected/map_projection.out | 2 +- regress/sql/age_load.sql | 6 - regress/sql/index.sql | 34 ++-- src/backend/commands/label_commands.c | 78 ++++++++- src/backend/executor/cypher_delete.c | 4 + src/backend/utils/load/ag_load_edges.c | 75 +-------- src/backend/utils/load/ag_load_labels.c | 215 +----------------------- src/backend/utils/load/age_load.c | 193 +++++++++++++++++---- src/include/utils/load/ag_load_labels.h | 5 - src/include/utils/load/age_load.h | 16 +- 18 files changed, 410 insertions(+), 453 deletions(-) diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index b638e636b..5f2bdab78 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -43,13 +43,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', (1 row) --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; ?column? @@ -74,13 +67,6 @@ NOTICE: VLabel "City" has been created (1 row) --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; ?column? diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index ad5a07d05..a34f59196 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -79,8 +79,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path [{"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex]::path + [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -115,8 +115,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -133,9 +133,9 @@ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -154,10 +154,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -250,8 +250,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Left Path Test @@ -308,8 +308,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Divergent Path Tests @@ -412,8 +412,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype); i --------------------------------------------------------------------------------------------------------------------------- - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -712,8 +712,8 @@ $$) AS (r0 agtype); {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (6 rows) SELECT * FROM cypher('cypher_match', $$ @@ -775,8 +775,8 @@ $$) AS (r1 agtype); {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge + {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge (12 rows) @@ -1055,8 +1055,8 @@ SELECT * FROM cypher('cypher_match', {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) SELECT * FROM cypher('cypher_match', @@ -1068,8 +1068,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) -- Property Constraint in EXISTS @@ -1123,8 +1123,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -1156,8 +1156,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -2164,8 +2164,8 @@ SELECT * FROM cypher('cypher_match', $$ MATCH p=(u)-[]-()-[]-(u) RETURN p $$)as p ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path - [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path + [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path (4 rows) @@ -2421,8 +2421,8 @@ SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relation SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ CREATE () WITH * MATCH (x{n0:x.n1}) RETURN 0 $$) as (a agtype); @@ -3514,19 +3514,17 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ (1 row) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (y.id = _age_default_alias_0.end_id) - -> Seq Scan on "Product" y - -> Hash - -> Hash Join - Hash Cond: (x.id = _age_default_alias_0.start_id) - -> Seq Scan on "Customer" x - -> Hash - -> Seq Scan on bought _age_default_alias_0 - Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) -(10 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Seq Scan on bought _age_default_alias_0 + Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) + -> Index Only Scan using "Customer_pkey" on "Customer" x + Index Cond: (id = _age_default_alias_0.start_id) + -> Index Only Scan using "Product_pkey" on "Product" y + Index Cond: (id = _age_default_alias_0.end_id) +(8 rows) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); QUERY PLAN diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 238a4c472..56a23f513 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -655,8 +655,8 @@ $$) AS (name agtype, bornIn agtype, city agtype); name | bornin | city -------------------+--------------+----------------------------------------------------------------------------------------- "Rob Reiner" | "New York" | {"id": 1970324836974593, "label": "City", "properties": {"name": "New York"}}::vertex - "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex "Michael Douglas" | "New Jersey" | {"id": 1970324836974594, "label": "City", "properties": {"name": "New Jersey"}}::vertex + "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex (3 rows) --validate diff --git a/regress/expected/cypher_subquery.out b/regress/expected/cypher_subquery.out index ff5672bca..934549f82 100644 --- a/regress/expected/cypher_subquery.out +++ b/regress/expected/cypher_subquery.out @@ -668,8 +668,8 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person)-[]->(b) RETURN a $$) AS (result agtype); result ------------------------------------------------------------------------------------------------- - {"id": 844424930131975, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex {"id": 844424930131977, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex {"id": 844424930131977, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex (3 rows) diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 9cbb3420c..57f930d98 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -508,37 +508,37 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) -- diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 20c9e95f3..d0546d8a3 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -2645,9 +2645,9 @@ SELECT * FROM cypher('expr', $$ MATCH ()-[e]-() RETURN e $$) AS (expression agty expression --------------------------------------------------------------------------------------------------------------------------- {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) -- id() @@ -2657,9 +2657,9 @@ $$) AS (id agtype); id ------------------ 1407374883553282 - 1407374883553281 1407374883553282 1407374883553281 + 1407374883553281 (4 rows) SELECT * FROM cypher('expr', $$ @@ -2699,9 +2699,9 @@ $$) AS (start_id agtype); start_id ------------------ 1125899906842625 - 1125899906842626 1125899906842625 1125899906842626 + 1125899906842626 (4 rows) -- should return null @@ -2732,9 +2732,9 @@ $$) AS (end_id agtype); end_id ------------------ 1125899906842626 - 1125899906842627 1125899906842626 1125899906842627 + 1125899906842627 (4 rows) -- should return null @@ -2765,9 +2765,9 @@ $$) AS (id agtype, start_id agtype, startNode agtype); id | start_id | startnode ------------------+------------------+---------------------------------------------------------------------------------- 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex (4 rows) -- should return null @@ -2798,9 +2798,9 @@ $$) AS (id agtype, end_id agtype, endNode agtype); id | end_id | endnode ------------------+------------------+--------------------------------------------------------------------------------- 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (4 rows) -- should return null @@ -7496,10 +7496,10 @@ SELECT * FROM cypher('opt_forms', $$MATCH (u) RETURN *$$) AS (result agtype); SELECT * FROM cypher('opt_forms', $$MATCH (u)--(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); u | v ---+--- - 2 | 3 - 3 | 2 1 | 2 2 | 1 + 2 | 3 + 3 | 2 (4 rows) SELECT * FROM cypher('opt_forms', $$MATCH (u)-->(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); @@ -7686,12 +7686,12 @@ SELECT * FROM cypher('keys', $$MATCH (v) RETURN keys(v)$$) AS (vertex_keys agtyp SELECT * FROM cypher('keys', $$MATCH ()-[e]-() RETURN keys(e)$$) AS (edge_keys agtype); edge_keys ----------- - [] - [] ["song"] ["song"] + [] ["song"] ["song"] + [] (6 rows) SELECT * FROM cypher('keys', $$RETURN keys({a:1,b:'two',c:[1,2,3]})$$) AS (keys agtype); diff --git a/regress/expected/graph_generation.out b/regress/expected/graph_generation.out index 235052a08..ca511eafa 100644 --- a/regress/expected/graph_generation.out +++ b/regress/expected/graph_generation.out @@ -43,15 +43,15 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge - {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge (10 rows) SELECT * FROM create_complete_graph('gp1',5,'edges','vertices'); @@ -140,25 +140,25 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge + {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842635, "label": "edges", "end_id": 844424930131975, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842636, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131974, "properties": {}}::edge + {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842637, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842640, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131975, "properties": {}}::edge - {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge + {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842638, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131974, "properties": {}}::edge {"id": 1125899906842641, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842643, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131976, "properties": {}}::edge + {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge {"id": 1125899906842645, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131969, "properties": {}}::edge (21 rows) diff --git a/regress/expected/index.out b/regress/expected/index.out index f911900ab..3ed7b1c33 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -264,18 +264,22 @@ $$) as (n agtype); --- (0 rows) -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; + indexname | indexdef +-----------------------------+------------------------------------------------------------------------------------------------ + _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) + _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) + _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) + cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) +(10 rows) + SET enable_mergejoin = ON; SET enable_hashjoin = OFF; SET enable_nestloop = OFF; @@ -288,6 +292,29 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Merge Join + Merge Cond: (_age_default_alias_0.id = e.start_id) + -> Merge Append + Sort Key: _age_default_alias_0.id + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Sort + Sort Key: e.start_id + -> Merge Join + Merge Cond: (a.id = e.end_id) + -> Index Only Scan using "Country_pkey" on "Country" a + -> Index Scan using has_city_end_id_idx on has_city e +(15 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -300,17 +327,53 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (_age_default_alias_0.id = e.start_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Hash + -> Hash Join + Hash Cond: (e.end_id = a.id) + -> Index Scan using has_city_end_id_idx on has_city e + -> Hash + -> Index Only Scan using "Country_pkey" on "Country" a +(14 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); - count -------- - 10 -(1 row) + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Nested Loop + -> Index Scan using has_city_start_id_idx on has_city e + -> Index Only Scan using "Country_pkey" on "Country" a + Index Cond: (id = e.end_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + Index Cond: (id = e.start_id) + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + Index Cond: (id = e.start_id) + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + Index Cond: (id = e.start_id) + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + Index Cond: (id = e.start_id) +(15 rows) SET enable_mergejoin = ON; SET enable_hashjoin = ON; diff --git a/regress/expected/map_projection.out b/regress/expected/map_projection.out index dcb7f0e76..f0c45c557 100644 --- a/regress/expected/map_projection.out +++ b/regress/expected/map_projection.out @@ -152,7 +152,7 @@ $$ $$) as (a agtype); a -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}, {"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}] + [{"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}, {"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}] (1 row) -- drop diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 425ca5417..180248bf1 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -34,9 +34,6 @@ SELECT * FROM cypher('agload_test_graph', $$CREATE (n:Country {__id__:1}) RETURN SELECT load_labels_from_file('agload_test_graph', 'Country', 'age_load/countries.csv', true); --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; @@ -52,9 +49,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', SELECT load_labels_from_file('agload_test_graph', 'City', 'age_load/cities.csv', true); --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; diff --git a/regress/sql/index.sql b/regress/sql/index.sql index aac1dc40e..d9a4331a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -166,26 +166,8 @@ SELECT * FROM cypher('cypher_index', $$ (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"}) $$) as (n agtype); -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; - -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); - -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; - -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; - -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); - -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); - -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; @@ -196,6 +178,11 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -205,12 +192,17 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); diff --git a/src/backend/commands/label_commands.c b/src/backend/commands/label_commands.c index f603ec97c..c9fdfad89 100644 --- a/src/backend/commands/label_commands.c +++ b/src/backend/commands/label_commands.c @@ -79,6 +79,10 @@ static void range_var_callback_for_remove_relation(const RangeVar *rel, Oid rel_oid, Oid odl_rel_oid, void *arg); +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique); PG_FUNCTION_INFO_V1(age_is_valid_label_name); @@ -379,16 +383,24 @@ static void create_table_for_label(char *graph_name, char *label_name, * inheritance system. */ if (list_length(parents) != 0) + { create_stmt->tableElts = NIL; + } else if (label_type == LABEL_TYPE_EDGE) + { create_stmt->tableElts = create_edge_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else if (label_type == LABEL_TYPE_VERTEX) + { create_stmt->tableElts = create_vertex_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else + { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("undefined label type \'%c\'", label_type))); + } create_stmt->inhRelations = parents; create_stmt->partbound = NULL; @@ -409,7 +421,69 @@ static void create_table_for_label(char *graph_name, char *label_name, ProcessUtility(wrapper, "(generated CREATE TABLE command)", false, PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, NULL); - /* CommandCounterIncrement() is called in ProcessUtility() */ + + /* Create index on id columns */ + if (label_type == LABEL_TYPE_VERTEX) + { + create_index_on_column(schema_name, rel_name, "id", true); + } + else if (label_type == LABEL_TYPE_EDGE) + { + create_index_on_column(schema_name, rel_name, "start_id", false); + create_index_on_column(schema_name, rel_name, "end_id", false); + } +} + +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique) +{ + IndexStmt *index_stmt; + IndexElem *index_col; + PlannedStmt *index_wrapper; + + index_stmt = makeNode(IndexStmt); + index_col = makeNode(IndexElem); + index_col->name = colname; + index_col->expr = NULL; + index_col->indexcolname = NULL; + index_col->collation = InvalidOid; + index_col->opclass = list_make1(makeString("graphid_ops")); + index_col->opclassopts = NIL; + index_col->ordering = SORTBY_DEFAULT; + index_col->nulls_ordering = SORTBY_NULLS_DEFAULT; + + index_stmt->relation = makeRangeVar(schema_name, rel_name, -1); + index_stmt->accessMethod = "btree"; + index_stmt->tableSpace = NULL; + index_stmt->indexParams = list_make1(index_col); + index_stmt->options = NIL; + index_stmt->whereClause = NULL; + index_stmt->excludeOpNames = NIL; + index_stmt->idxcomment = NULL; + index_stmt->indexOid = InvalidOid; + index_stmt->unique = unique; + index_stmt->nulls_not_distinct = false; + index_stmt->primary = unique; + index_stmt->isconstraint = unique; + index_stmt->deferrable = false; + index_stmt->initdeferred = false; + index_stmt->transformed = false; + index_stmt->concurrent = false; + index_stmt->if_not_exists = false; + index_stmt->reset_default_tblspc = false; + + index_wrapper = makeNode(PlannedStmt); + index_wrapper->commandType = CMD_UTILITY; + index_wrapper->canSetTag = false; + index_wrapper->utilityStmt = (Node *)index_stmt; + index_wrapper->stmt_location = -1; + index_wrapper->stmt_len = 0; + + ProcessUtility(index_wrapper, "(generated CREATE INDEX command)", false, + PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, + NULL); } /* @@ -468,7 +542,7 @@ static List *create_vertex_table_elements(char *graph_name, char *label_name, /* "id" graphid PRIMARY KEY DEFAULT "ag_catalog"."_graphid"(...) */ id = makeColumnDef(AG_VERTEX_COLNAME_ID, GRAPHIDOID, -1, InvalidOid); - id->constraints = list_make2(build_pk_constraint(), + id->constraints = list_make2(build_not_null_constraint(), build_id_default(graph_name, label_name, schema_name, seq_name)); diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index 7e10d9afc..f86c6126b 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -341,6 +341,10 @@ static void delete_entity(EState *estate, ResultRelInfo *resultRelInfo, } /* increment the command counter */ CommandCounterIncrement(); + + /* Update command id in estate */ + estate->es_snapshot->curcid = GetCurrentCommandId(false); + estate->es_output_cid = GetCurrentCommandId(false); } else if (lock_result != TM_Invisible && lock_result != TM_SelfModified) { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 30dc4761d..67049431c 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -22,11 +22,6 @@ #include "utils/load/ag_load_edges.h" #include "utils/load/csv.h" -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); - void edge_field_cb(void *field, size_t field_len, void *data) { @@ -131,7 +126,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) if (batch_state->num_tuples >= batch_state->max_tuples) { /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state, cr->label_name, cr->graph_oid); + insert_batch(batch_state); batch_state->num_tuples = 0; } } @@ -223,7 +218,7 @@ int create_edges_from_csv_file(char *file_path, cr.load_as_agtype = load_as_agtype; /* Initialize the batch insert state */ - init_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + init_batch_insert(&cr.batch_state, label_name, graph_oid); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { @@ -238,7 +233,7 @@ int create_edges_from_csv_file(char *file_path, csv_fini(&p, edge_field_cb, edge_row_cb, &cr); /* Finish any remaining batch inserts */ - finish_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + finish_batch_insert(&cr.batch_state); if (ferror(fp)) { @@ -250,66 +245,4 @@ int create_edges_from_csv_file(char *file_path, free(cr.fields); csv_free(&p); return EXIT_SUCCESS; -} - -/* - * Initialize the batch insert state for edges. - */ -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - Relation relation; - int i; - - // Open a temporary relation to get the tuple descriptor - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); - - // Initialize the batch insert state - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - // Create slots - for (i = 0; i < BATCH_SIZE; i++) - { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - } - - table_close(relation, AccessShareLock); -} - -/* - * Finish the batch insert for edges. Insert the - * remaining tuples in the batch state and clean up. - */ -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - int i; - Relation relation; - - if ((*batch_state)->num_tuples > 0) - { - insert_batch(*batch_state, label_name, graph_oid); - (*batch_state)->num_tuples = 0; - } - - // Open a temporary relation to ensure resources are properly cleaned up - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); - - // Free slots - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); - } - - // Clean up batch state - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); -} +} \ No newline at end of file diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 2ab223346..4a04f3cd8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -24,18 +24,6 @@ #include "utils/load/ag_load_labels.h" #include "utils/load/csv.h" -static void setup_temp_table_for_vertex_ids(char *graph_name); -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid); -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid); - void vertex_field_cb(void *field, size_t field_len, void *data) { @@ -75,7 +63,6 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) graphid vertex_id; int64 entry_id; TupleTableSlot *slot; - TupleTableSlot *temp_id_slot; n_fields = cr->cur_field; @@ -114,11 +101,9 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) /* Get the appropriate slot from the batch state */ slot = batch_state->slots[batch_state->num_tuples]; - temp_id_slot = batch_state->temp_id_slots[batch_state->num_tuples]; /* Clear the slots contents */ ExecClearTuple(slot); - ExecClearTuple(temp_id_slot); /* Fill the values in the slot */ slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); @@ -129,20 +114,15 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) slot->tts_isnull[0] = false; slot->tts_isnull[1] = false; - temp_id_slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - temp_id_slot->tts_isnull[0] = false; - /* Make the slot as containing virtual tuple */ ExecStoreVirtualTuple(slot); - ExecStoreVirtualTuple(temp_id_slot); batch_state->num_tuples++; if (batch_state->num_tuples >= batch_state->max_tuples) { /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_vertex_batch(batch_state, cr->label_name, cr->graph_oid, - cr->temp_table_relid); + insert_batch(batch_state); batch_state->num_tuples = 0; } } @@ -202,7 +182,6 @@ int create_labels_from_csv_file(char *file_path, unsigned char options = 0; csv_vertex_reader cr; char *label_seq_name; - Oid temp_table_relid; if (csv_init(&p, options) != 0) { @@ -210,13 +189,6 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - if (!OidIsValid(temp_table_relid)) - { - setup_temp_table_for_vertex_ids(graph_name); - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - } - csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -243,7 +215,6 @@ int create_labels_from_csv_file(char *file_path, cr.id_field_exists = id_field_exists; cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); cr.load_as_agtype = load_as_agtype; - cr.temp_table_relid = temp_table_relid; if (cr.id_field_exists) { @@ -258,8 +229,7 @@ int create_labels_from_csv_file(char *file_path, } /* Initialize the batch insert state */ - init_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); + init_batch_insert(&cr.batch_state, label_name, graph_oid); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { @@ -274,8 +244,7 @@ int create_labels_from_csv_file(char *file_path, csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); /* Finish any remaining batch inserts */ - finish_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); + finish_batch_insert(&cr.batch_state); if (ferror(fp)) { @@ -288,180 +257,4 @@ int create_labels_from_csv_file(char *file_path, free(cr.fields); csv_free(&p); return EXIT_SUCCESS; -} - -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid) -{ - insert_batch_in_temp_table(batch_state, graph_oid, temp_table_relid); - insert_batch(batch_state, label_name, graph_oid); -} - -/* - * Create and populate a temporary table with vertex ids that are already - * present in the graph. This table will be used to check if the new vertex - * id generated by loader is a duplicate. - * Unique index is created to enforce uniqueness of the ids. - * - * We dont need this for loading edges since the ids are generated using - * sequence and are unique. - */ -static void setup_temp_table_for_vertex_ids(char *graph_name) -{ - char *create_as_query; - char *index_query; - - create_as_query = psprintf("CREATE TEMP TABLE IF NOT EXISTS %s AS " - "SELECT DISTINCT id FROM \"%s\".%s", - GET_TEMP_VERTEX_ID_TABLE(graph_name), graph_name, - AG_DEFAULT_LABEL_VERTEX); - - index_query = psprintf("CREATE UNIQUE INDEX ON %s (id)", - GET_TEMP_VERTEX_ID_TABLE(graph_name)); - SPI_connect(); - SPI_execute(create_as_query, false, 0); - SPI_execute(index_query, false, 0); - - SPI_finish(); -} - -/* - * Inserts batch of tuples into the temporary table. - * This function also updates the index to check for - * uniqueness of the ids. - */ -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid) -{ - int i; - EState *estate; - ResultRelInfo *resultRelInfo; - Relation rel; - List *result; - - rel = table_open(relid, RowExclusiveLock); - - /* Initialize executor state */ - estate = CreateExecutorState(); - - /* Initialize result relation information */ - resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, rel, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; - - /* Open the indices */ - ExecOpenIndices(resultRelInfo, false); - - /* Insert the batch into the temporary table */ - heap_multi_insert(rel, batch_state->temp_id_slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - - for (i = 0; i < batch_state->num_tuples; i++) - { - result = ExecInsertIndexTuples(resultRelInfo, batch_state->temp_id_slots[i], - estate, false, true, NULL, NIL, false); - /* Check if the unique cnstraint is violated */ - if (list_length(result) != 0) - { - Datum id; - bool isnull; - - id = slot_getattr(batch_state->temp_id_slots[i], 1, &isnull); - ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", - DATUM_GET_GRAPHID(id)), - errhint("Entry id %ld is already used", - get_graphid_entry_id(id)))); - } - } - /* Clean up and close the indices */ - ExecCloseIndices(resultRelInfo); - - FreeExecutorState(estate); - table_close(rel, RowExclusiveLock); - - CommandCounterIncrement(); -} - -/* - * Initialize the batch insert state for vertices. - */ -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; - - Relation temp_table_relation; - int i; - - /* Open a temporary relation to get the tuple descriptor */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); - - temp_table_relation = table_open(temp_table_relid, AccessShareLock); - - /* Initialize the batch insert state */ - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->temp_id_slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - /* Create slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - (*batch_state)->temp_id_slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(temp_table_relation), - &TTSOpsHeapTuple); - } - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); -} - -/* - * Finish the batch insert for vertices. Insert the - * remaining tuples in the batch state and clean up. - */ -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; - - Relation temp_table_relation; - int i; - - if ((*batch_state)->num_tuples > 0) - { - insert_vertex_batch(*batch_state, label_name, graph_oid, temp_table_relid); - (*batch_state)->num_tuples = 0; - } - - /* Open a temporary relation to ensure resources are properly cleaned up */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); - - temp_table_relation = table_open(temp_table_relid, AccessShareLock); - - /* Free slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); - ExecDropSingleTupleTableSlot((*batch_state)->temp_id_slots[i]); - } - - /* Clean up batch state */ - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null((*batch_state)->temp_id_slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); -} +} \ No newline at end of file diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 41233ff18..307ec335a 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,11 +18,14 @@ */ #include "postgres.h" +#include "catalog/indexing.h" +#include "executor/executor.h" #include "utils/json.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" +#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); @@ -217,18 +220,35 @@ void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, errmsg("label %s already exists as vertex label", label_name))); } + /* Open the relation */ + label_relation = table_open(get_label_relation(label_name, graph_oid), + RowExclusiveLock); + + /* Form the tuple */ values[0] = GRAPHID_GET_DATUM(edge_id); values[1] = GRAPHID_GET_DATUM(start_id); values[2] = GRAPHID_GET_DATUM(end_id); values[3] = AGTYPE_P_GET_DATUM((edge_properties)); - - label_relation = table_open(get_label_relation(label_name, graph_oid), - RowExclusiveLock); - tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } @@ -246,46 +266,75 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, if (get_label_kind(label_name, graph_oid) == LABEL_KIND_EDGE) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("label %s already exists as edge label", label_name))); + errmsg("label %s already exists as edge label", + label_name))); } - values[0] = GRAPHID_GET_DATUM(vertex_id); - values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); - + /* Open the relation */ label_relation = table_open(get_label_relation(label_name, graph_oid), RowExclusiveLock); + + /* Form the tuple */ + values[0] = GRAPHID_GET_DATUM(vertex_id); + values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid) +void insert_batch(batch_insert_state *batch_state) { - Relation label_relation; - BulkInsertState bistate; - Oid relid; - - // Get the relation OID - relid = get_label_relation(label_name, graph_oid); - - // Open the relation - label_relation = table_open(relid, RowExclusiveLock); - - // Prepare the BulkInsertState - bistate = GetBulkInsertState(); - - // Perform the bulk insert - heap_multi_insert(label_relation, batch_state->slots, - batch_state->num_tuples, GetCurrentCommandId(true), - 0, bistate); + List *result; + int i; - // Clean up - FreeBulkInsertState(bistate); - table_close(label_relation, RowExclusiveLock); + /* Insert the tuples */ + heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, + batch_state->slots, batch_state->num_tuples, + GetCurrentCommandId(true), 0, NULL); + + /* Insert index entries for the tuples */ + if (batch_state->resultRelInfo->ri_NumIndices > 0) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + result = ExecInsertIndexTuples(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate, false, + true, NULL, NIL, false); + + /* Check if the unique constraint is violated */ + if (list_length(result) != 0) + { + Datum id; + bool isnull; + + id = slot_getattr(batch_state->slots[i], 1, &isnull); + ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", + DATUM_GET_GRAPHID(id)), + errhint("Entry id %ld is already used", + get_graphid_entry_id(id)))); + } + } + } CommandCounterIncrement(); } @@ -475,3 +524,79 @@ static int32 get_or_create_label(Oid graph_oid, char *graph_name, return label_id; } + +/* + * Initialize the batch insert state. + */ +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid) +{ + Relation relation; + Oid relid; + EState *estate; + ResultRelInfo *resultRelInfo; + int i; + + /* Open the relation */ + relid = get_label_relation(label_name, graph_oid); + relation = table_open(relid, RowExclusiveLock); + + /* Initialize executor state */ + estate = CreateExecutorState(); + + /* Initialize resultRelInfo */ + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); + estate->es_result_relations = &resultRelInfo; + + /* Open the indices */ + ExecOpenIndices(resultRelInfo, false); + + /* Initialize the batch insert state */ + *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); + (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->estate = estate; + (*batch_state)->resultRelInfo = resultRelInfo; + (*batch_state)->max_tuples = BATCH_SIZE; + (*batch_state)->num_tuples = 0; + + /* Create slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + (*batch_state)->slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(relation), + &TTSOpsHeapTuple); + } +} + +/* + * Finish the batch insert for vertices. Insert the + * tuples remaining in the batch state and clean up. + */ +void finish_batch_insert(batch_insert_state **batch_state) +{ + int i; + + if ((*batch_state)->num_tuples > 0) + { + insert_batch(*batch_state); + (*batch_state)->num_tuples = 0; + } + + /* Free slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + } + + /* Clean up, close the indices and relation */ + ExecCloseIndices((*batch_state)->resultRelInfo); + table_close((*batch_state)->resultRelInfo->ri_RelationDesc, + RowExclusiveLock); + + /* Clean up batch state */ + FreeExecutorState((*batch_state)->estate); + pfree((*batch_state)->slots); + pfree(*batch_state); + *batch_state = NULL; +} \ No newline at end of file diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 3a70a5c05..b8ed1572e 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -24,10 +24,6 @@ #include "access/heapam.h" #include "utils/load/age_load.h" -#define AGE_VERTIX 1 -#define AGE_EDGE 2 - - struct counts { long unsigned fields; long unsigned allvalues; @@ -51,7 +47,6 @@ typedef struct { char *label_name; int label_id; Oid label_seq_relid; - Oid temp_table_relid; bool id_field_exists; bool load_as_agtype; int curr_seq_num; diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index b1335581b..72f11493d 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -30,16 +30,13 @@ #ifndef AGE_ENTITY_CREATOR_H #define AGE_ENTITY_CREATOR_H -#define TEMP_VERTEX_ID_TABLE_SUFFIX "_ag_vertex_ids" -#define GET_TEMP_VERTEX_ID_TABLE(graph_name) \ - psprintf("_%s%s", graph_name, TEMP_VERTEX_ID_TABLE_SUFFIX) - #define BATCH_SIZE 1000 -typedef struct +typedef struct batch_insert_state { + EState *estate; + ResultRelInfo *resultRelInfo; TupleTableSlot **slots; - TupleTableSlot **temp_id_slots; int num_tuples; int max_tuples; } batch_insert_state; @@ -57,7 +54,10 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, agtype* end_properties); -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); + +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid); +void finish_batch_insert(batch_insert_state **batch_state); #endif /* AGE_ENTITY_CREATOR_H */ From 1d9df48caeb7ab584108034bb1d027cf05a1b6a1 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 9 Dec 2025 10:51:01 -0800 Subject: [PATCH 07/25] Fix Issue 2256: segmentation fault when calling coalesce function (#2259) Fixed issue 2256: A segmentation fault occurs when calling the coalesce function in PostgreSQL version 17. This likely predates 17 and includes other similar types of "functions". See issues 1124 (PR 1125) and 1303 (PR 1317) for more details. This issue is due to coalesce() being processed differently from other functions. Additionally, greatest() was found to exhibit the same behavior. They were added to the list of types to ignore during the cypher analyze phase. A few others were added: CaseExpr, XmlExpr, ArrayExpr, & RowExpr. Although, I wasn't able to find cases where these caused crashes. Added regression tests. modified: regress/expected/cypher.out modified: regress/sql/cypher.sql modified: src/backend/parser/cypher_analyze.c --- regress/expected/cypher.out | 16 ++++++++++++++++ regress/sql/cypher.sql | 7 +++++++ src/backend/parser/cypher_analyze.c | 26 +++++++++++++++++++------- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/regress/expected/cypher.out b/regress/expected/cypher.out index 31bafc6cf..53ea9f0c1 100644 --- a/regress/expected/cypher.out +++ b/regress/expected/cypher.out @@ -169,6 +169,22 @@ CREATE TABLE my_edges AS -- create a table of 4 columns, u, e, v, p. should be 5 rows CREATE TABLE my_detailed_paths AS (SELECT * FROM cypher('issue_1767', $$ MATCH p=(u)-[e]->(v) RETURN u,e,v,p $$) as (u agtype, e agtype, v agtype, p agtype)); +-- +-- Issue 2256: A segmentation fault occurs when calling the coalesce function +-- This also occurs with the greatest function too. +-- +SELECT * FROM coalesce(1, 0); + coalesce +---------- + 1 +(1 row) + +SELECT * FROM greatest(1, 0); + greatest +---------- + 1 +(1 row) + -- dump out the tables SELECT * FROM my_vertices; u diff --git a/regress/sql/cypher.sql b/regress/sql/cypher.sql index 7ded61ee6..090c7e704 100644 --- a/regress/sql/cypher.sql +++ b/regress/sql/cypher.sql @@ -94,6 +94,13 @@ CREATE TABLE my_edges AS CREATE TABLE my_detailed_paths AS (SELECT * FROM cypher('issue_1767', $$ MATCH p=(u)-[e]->(v) RETURN u,e,v,p $$) as (u agtype, e agtype, v agtype, p agtype)); +-- +-- Issue 2256: A segmentation fault occurs when calling the coalesce function +-- This also occurs with the greatest function too. +-- +SELECT * FROM coalesce(1, 0); +SELECT * FROM greatest(1, 0); + -- dump out the tables SELECT * FROM my_vertices; SELECT * FROM my_edges; diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index af3e83c87..a408eea6e 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -171,14 +171,20 @@ static bool convert_cypher_walker(Node *node, ParseState *pstate) * JsonConstructorExpr - wrapper over FuncExpr/Aggref/WindowFunc for * SQL/JSON constructors * - * These are a special case that needs to be ignored. + * Added the following, although only the first 2 caused crashes in tests - + * CoalesceExpr, MinMaxExpr, CaseExpr, XmlExpr, ArrayExpr, RowExpr + * + * These are all special case that needs to be ignored. * */ if (IsA(funcexpr, SQLValueFunction) - || IsA(funcexpr, CoerceViaIO) - || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) - || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) - || IsA(funcexpr, JsonConstructorExpr)) + || IsA(funcexpr, CoerceViaIO) + || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) + || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) + || IsA(funcexpr, JsonConstructorExpr) + || IsA(funcexpr, CoalesceExpr) || IsA(funcexpr, MinMaxExpr) + || IsA(funcexpr, CaseExpr) || IsA(funcexpr, XmlExpr) + || IsA(funcexpr, ArrayExpr) || IsA(funcexpr, RowExpr)) { return false; } @@ -346,14 +352,20 @@ static bool is_func_cypher(FuncExpr *funcexpr) * JsonConstructorExpr - wrapper over FuncExpr/Aggref/WindowFunc for * SQL/JSON constructors * - * These are a special case that needs to be ignored. + * Added the following, although only the first 2 caused crashes in tests - + * CoalesceExpr, MinMaxExpr, CaseExpr, XmlExpr, ArrayExpr, RowExpr + * + * These are all special case that needs to be ignored. * */ if (IsA(funcexpr, SQLValueFunction) || IsA(funcexpr, CoerceViaIO) || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) - || IsA(funcexpr, JsonConstructorExpr)) + || IsA(funcexpr, JsonConstructorExpr) + || IsA(funcexpr, CoalesceExpr) || IsA(funcexpr, MinMaxExpr) + || IsA(funcexpr, CaseExpr) || IsA(funcexpr, XmlExpr) + || IsA(funcexpr, ArrayExpr) || IsA(funcexpr, RowExpr)) { return false; } From 2ccdc6b951dc5e92e32ce32a41263c0b23e565e7 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 9 Dec 2025 11:15:26 -0800 Subject: [PATCH 08/25] Adjust 'could not find rte for' ERROR message (#2266) Adjusted the following type of error message. It was mentioned in issue 2263 as being incorrect, which it isn't. However, it did need some clarification added - ERROR: could not find rte for Added a HINT for additional clarity - HINT: variable does not exist within scope of usage For example: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) RETURN 1 ERROR: could not find rte for p0 LINE 3: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) ^ HINT: variable p0 does not exist within scope of usage Additionally, added pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET to transform_cypher_clause_as_subquery. Updated existing regression tests. Added regression tests from issue. modified: regress/expected/cypher_call.out modified: regress/expected/cypher_subquery.out modified: regress/expected/cypher_union.out modified: regress/expected/cypher_with.out modified: regress/expected/expr.out modified: regress/expected/list_comprehension.out modified: regress/expected/scan.out modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_expr.c --- regress/expected/cypher_call.out | 2 + regress/expected/cypher_subquery.out | 4 ++ regress/expected/cypher_union.out | 1 + regress/expected/cypher_with.out | 4 ++ regress/expected/expr.out | 66 +++++++++++++++++++++++++ regress/expected/list_comprehension.out | 2 + regress/expected/scan.out | 6 +++ regress/sql/expr.sql | 27 ++++++++++ src/backend/parser/cypher_clause.c | 1 + src/backend/parser/cypher_expr.c | 8 +-- 10 files changed, 118 insertions(+), 3 deletions(-) diff --git a/regress/expected/cypher_call.out b/regress/expected/cypher_call.out index bb1185b96..08f97ba41 100644 --- a/regress/expected/cypher_call.out +++ b/regress/expected/cypher_call.out @@ -125,6 +125,7 @@ SELECT * FROM cypher('cypher_call', $$CALL sqrt(64) YIELD sqrt WHERE a = 8 RETUR ERROR: could not find rte for a LINE 2: ...r('cypher_call', $$CALL sqrt(64) YIELD sqrt WHERE a = 8 RETU... ^ +HINT: variable a does not exist within scope of usage /* MATCH CALL RETURN, should fail */ SELECT * FROM cypher('cypher_call', $$ MATCH (a) CALL sqrt(64) RETURN sqrt $$) as (sqrt agtype); ERROR: Procedure call inside a query does not support naming results implicitly @@ -171,6 +172,7 @@ SELECT * FROM cypher('cypher_call', $$ MATCH (a) CALL sqrt(64) YIELD sqrt WHERE ERROR: could not find rte for b LINE 1: ...all', $$ MATCH (a) CALL sqrt(64) YIELD sqrt WHERE b = 8 RETU... ^ +HINT: variable b does not exist within scope of usage /* CALL MATCH YIELD WHERE UPDATE/RETURN */ SELECT * FROM cypher('cypher_call', $$ CALL sqrt(64) YIELD sqrt WHERE sqrt > 1 CREATE ({n:'c'}) $$) as (a agtype); a diff --git a/regress/expected/cypher_subquery.out b/regress/expected/cypher_subquery.out index 934549f82..456b3a2c9 100644 --- a/regress/expected/cypher_subquery.out +++ b/regress/expected/cypher_subquery.out @@ -135,6 +135,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for c LINE 5: RETURN c ^ +HINT: variable c does not exist within scope of usage --union, no returns SELECT * FROM cypher('subquery', $$ MATCH (a:person) WHERE EXISTS { @@ -341,6 +342,7 @@ SELECT * FROM cypher('subquery', $$ RETURN 1, ERROR: could not find rte for a LINE 4: RETURN a ^ +HINT: variable a does not exist within scope of usage --- COUNT --count pattern subquery in where SELECT * FROM cypher('subquery', $$ MATCH (a:person) @@ -540,6 +542,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for b LINE 2: RETURN a.name, COUNT{MATCH (a) RETURN b} $$) ^ +HINT: variable b does not exist within scope of usage --incorrect nested variable reference SELECT * FROM cypher('subquery', $$ MATCH (a:person) RETURN a.name, COUNT{MATCH (a) @@ -549,6 +552,7 @@ SELECT * FROM cypher('subquery', $$ MATCH (a:person) ERROR: could not find rte for b LINE 4: RETURN b} $$) ^ +HINT: variable b does not exist within scope of usage --count nested with exists SELECT * FROM cypher('subquery', $$ MATCH (a:person) RETURN a.name, diff --git a/regress/expected/cypher_union.out b/regress/expected/cypher_union.out index 063354ddb..14fa56e67 100644 --- a/regress/expected/cypher_union.out +++ b/regress/expected/cypher_union.out @@ -141,6 +141,7 @@ SELECT * FROM cypher('cypher_union', $$MATCH (n) RETURN n UNION ALL MATCH (m) RE ERROR: could not find rte for n LINE 2: ..., $$MATCH (n) RETURN n UNION ALL MATCH (m) RETURN n$$) AS (r... ^ +HINT: variable n does not exist within scope of usage /* *UNION and UNION ALL, type casting */ diff --git a/regress/expected/cypher_with.out b/regress/expected/cypher_with.out index e5f82aa21..99ea320a0 100644 --- a/regress/expected/cypher_with.out +++ b/regress/expected/cypher_with.out @@ -267,6 +267,7 @@ $$) AS (a agtype, b agtype); ERROR: could not find rte for b LINE 4: RETURN m,b ^ +HINT: variable b does not exist within scope of usage SELECT * FROM cypher('cypher_with', $$ MATCH (m)-[]->(b) WITH m AS start_node,b AS end_node @@ -278,6 +279,7 @@ $$) AS (id agtype, node agtype); ERROR: could not find rte for end_node LINE 7: RETURN id(start_node),end_node.name ^ +HINT: variable end_node does not exist within scope of usage -- Clean up SELECT drop_graph('cypher_with', true); NOTICE: drop cascades to 4 other objects @@ -320,6 +322,7 @@ $$) AS (n agtype, d agtype); ERROR: could not find rte for d LINE 8: RETURN c,d ^ +HINT: variable d does not exist within scope of usage -- Issue 396 (should error out) SELECT * FROM cypher('graph',$$ CREATE (v),(u),(w), @@ -338,6 +341,7 @@ $$) as (a agtype,b agtype); ERROR: could not find rte for v LINE 4: RETURN v,path_length ^ +HINT: variable v does not exist within scope of usage -- Clean up SELECT drop_graph('graph', true); NOTICE: drop cascades to 6 other objects diff --git a/regress/expected/expr.out b/regress/expected/expr.out index d0546d8a3..4be3bf90f 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -3370,6 +3370,7 @@ $$) AS (toBooleanList agtype); ERROR: could not find rte for fail LINE 2: RETURN toBooleanList(fail) ^ +HINT: variable fail does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toBooleanList("fail") $$) AS (toBooleanList agtype); @@ -3513,6 +3514,7 @@ $$) AS (toFloatList agtype); ERROR: could not find rte for failed LINE 2: RETURN toFloatList([failed]) ^ +HINT: variable failed does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toFloatList("failed") $$) AS (toFloatList agtype); @@ -3892,12 +3894,14 @@ $$) AS (toStringList agtype); ERROR: could not find rte for b LINE 2: RETURN toStringList([['a', b]]) ^ +HINT: variable b does not exist within scope of usage SELECT * FROM cypher('expr', $$ RETURN toStringList([test]) $$) AS (toStringList agtype); ERROR: could not find rte for test LINE 2: RETURN toStringList([test]) ^ +HINT: variable test does not exist within scope of usage -- -- reverse(string) -- @@ -7923,6 +7927,7 @@ SELECT * FROM cypher('list', $$ RETURN tail(abc) $$) AS (tail agtype); ERROR: could not find rte for abc LINE 1: SELECT * FROM cypher('list', $$ RETURN tail(abc) $$) AS (tai... ^ +HINT: variable abc does not exist within scope of usage SELECT * FROM cypher('list', $$ RETURN tail() $$) AS (tail agtype); ERROR: function ag_catalog.age_tail() does not exist LINE 1: SELECT * FROM cypher('list', $$ RETURN tail() $$) AS (tail a... @@ -9011,9 +9016,70 @@ SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -505290721 (1 row) +-- +-- Issue 2263: AGE returns incorrect error message for EXISTS subquery outer variable reference +-- +-- NOTE: There isn't really anything incorrect about the message. However, +-- it could be more clear. +-- +SELECT * FROM create_graph('issue_2263'); +NOTICE: graph "issue_2263" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2263', $$ + CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for a +LINE 2: CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + ^ +HINT: variable a does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for p0 +LINE 2: CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + ^ +HINT: variable p0 does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIND [] AS a4 WITH DISTINCT NULL AS a5}}) + RETURN 1 +$$) AS (one agtype); +ERROR: could not find rte for r4 +LINE 2: CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIN... + ^ +HINT: variable r4 does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }}) +$$) AS (out agtype); +ERROR: could not find rte for x +LINE 2: ...TE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }... + ^ +HINT: variable x does not exist within scope of usage +SELECT * FROM cypher('issue_2263', $$ + CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) +$$) AS (out agtype); +ERROR: could not find rte for x +LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... + ^ +HINT: variable x does not exist within scope of usage -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2263', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2263._ag_label_vertex +drop cascades to table issue_2263._ag_label_edge +NOTICE: graph "issue_2263" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_1988', true); NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table issue_1988._ag_label_vertex diff --git a/regress/expected/list_comprehension.out b/regress/expected/list_comprehension.out index 07f777707..5a3756422 100644 --- a/regress/expected/list_comprehension.out +++ b/regress/expected/list_comprehension.out @@ -569,10 +569,12 @@ SELECT * FROM cypher('list_comprehension', $$ RETURN [i IN range(0, 10, 2)],i $$ ERROR: could not find rte for i LINE 1: ..._comprehension', $$ RETURN [i IN range(0, 10, 2)],i $$) AS (... ^ +HINT: variable i does not exist within scope of usage SELECT * FROM cypher('list_comprehension', $$ RETURN [i IN range(0, 10, 2) WHERE i>5 | i^2], i $$) AS (result agtype, i agtype); ERROR: could not find rte for i LINE 1: ...$$ RETURN [i IN range(0, 10, 2) WHERE i>5 | i^2], i $$) AS (... ^ +HINT: variable i does not exist within scope of usage -- Invalid list comprehension SELECT * FROM cypher('list_comprehension', $$ RETURN [1 IN range(0, 10, 2) WHERE 2>5] $$) AS (result agtype); ERROR: Syntax error at or near IN diff --git a/regress/expected/scan.out b/regress/expected/scan.out index d8105a053..46d5676d0 100644 --- a/regress/expected/scan.out +++ b/regress/expected/scan.out @@ -437,36 +437,42 @@ $$) AS t(id text); ERROR: could not find rte for _$09A_z LINE 2: RETURN _$09A_z ^ +HINT: variable _$09A_z does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN A $$) AS t(id text); ERROR: could not find rte for A LINE 2: RETURN A ^ +HINT: variable A does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN z $$) AS t(id text); ERROR: could not find rte for z LINE 2: RETURN z ^ +HINT: variable z does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN `$` $$) AS t(id text); ERROR: could not find rte for $ LINE 2: RETURN `$` ^ +HINT: variable $ does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN `0` $$) AS t(id text); ERROR: could not find rte for 0 LINE 2: RETURN `0` ^ +HINT: variable 0 does not exist within scope of usage SELECT * FROM cypher('scan', $$ RETURN ```` $$) AS t(id text); ERROR: could not find rte for ` LINE 2: RETURN ```` ^ +HINT: variable ` does not exist within scope of usage -- zero-length quoted identifier SELECT * FROM cypher('scan', $$ RETURN `` diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 83f21856e..466bace41 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -3634,9 +3634,36 @@ SELECT * FROM cypher('issue_1988', $$ SELECT agtype_access_operator(agtype_in('[null, null]')); SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); +-- +-- Issue 2263: AGE returns incorrect error message for EXISTS subquery outer variable reference +-- +-- NOTE: There isn't really anything incorrect about the message. However, +-- it could be more clear. +-- +SELECT * FROM create_graph('issue_2263'); +SELECT * FROM cypher('issue_2263', $$ + CREATE a=()-[:T]->(), p=({k:exists{return a}})-[:T]->() + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE p0=(n0), (n1{k:EXISTS{WITH p0}}) + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE ()-[r4 :T6]->(), ({k2:COUNT{WITH r4.k AS a3 UNWIND [] AS a4 WITH DISTINCT NULL AS a5}}) + RETURN 1 +$$) AS (one agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE (x), ({a1:EXISTS { RETURN COUNT(0) AS a2, keys(x) AS a4 }}) +$$) AS (out agtype); +SELECT * FROM cypher('issue_2263', $$ + CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) +$$) AS (out agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); SELECT * FROM drop_graph('expanded_map', true); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 971f8c3d2..5c5024cf7 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -6315,6 +6315,7 @@ transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, pstate->p_expr_kind == EXPR_KIND_OTHER || pstate->p_expr_kind == EXPR_KIND_WHERE || pstate->p_expr_kind == EXPR_KIND_SELECT_TARGET || + pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET || pstate->p_expr_kind == EXPR_KIND_FROM_SUBSELECT); /* diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 993957c83..5f4de86b9 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -423,9 +423,11 @@ static Node *transform_ColumnRef(cypher_parsestate *cpstate, ColumnRef *cref) else { ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("could not find rte for %s", colname), - parser_errposition(pstate, cref->location))); + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("could not find rte for %s", colname), + errhint("variable %s does not exist within scope of usage", + colname), + parser_errposition(pstate, cref->location))); } if (node == NULL) From 419eb13cc2923f04d12a337356855429d1139607 Mon Sep 17 00:00:00 2001 From: Aleksey Konovkin Date: Tue, 9 Dec 2025 22:49:05 +0300 Subject: [PATCH 09/25] Fix possible memory and file descriptors leaks (#2258) - Used postgres memory allocation functions instead of standard ones. - Wrapped main loop of csv loader in PG_TRY block for better error handling. --- src/backend/utils/adt/age_global_graph.c | 6 +- src/backend/utils/adt/agtype.c | 39 ++++---- src/backend/utils/load/ag_load_edges.c | 91 ++++++++++-------- src/backend/utils/load/ag_load_labels.c | 115 ++++++++++++----------- src/include/utils/agtype.h | 1 + 5 files changed, 135 insertions(+), 117 deletions(-) diff --git a/src/backend/utils/adt/age_global_graph.c b/src/backend/utils/adt/age_global_graph.c index 6f30060ae..c34e51ee3 100644 --- a/src/backend/utils/adt/age_global_graph.c +++ b/src/backend/utils/adt/age_global_graph.c @@ -1237,12 +1237,10 @@ Datum age_delete_global_graphs(PG_FUNCTION_ARGS) { char *graph_name = NULL; - graph_name = strndup(agtv_temp->val.string.val, - agtv_temp->val.string.len); + graph_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); success = delete_specific_GRAPH_global_contexts(graph_name); - - free(graph_name); } else { diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index be838cbd0..02fc3221c 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -181,6 +181,17 @@ static agtype_value *agtype_build_map_as_agtype_value(FunctionCallInfo fcinfo); agtype_value *agtype_composite_to_agtype_value_binary(agtype *a); static agtype_value *tostring_helper(Datum arg, Oid type, char *msghdr); + +void *repalloc_check(void *ptr, size_t len) +{ + if (ptr != NULL) + { + return repalloc(ptr, len); + } + + return palloc(len); +} + /* * Due to how pfree can be implemented, it may not check for a passed NULL. This * wrapper does just that, it will only call pfree is the pointer passed is not @@ -5580,7 +5591,7 @@ static char *get_label_name(const char *graph_name, graphid element_graphid) result = NameStr(*DatumGetName(heap_getattr(tuple, Anum_ag_label_name, tupdesc, &column_is_null))); /* duplicate it */ - result = strdup(result); + result = pstrdup(result); /* end the scan and close the relation */ systable_endscan(scan_desc); @@ -5673,8 +5684,8 @@ Datum age_startnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5708,8 +5719,6 @@ Datum age_startnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, start_id); - free(label_name); - return result; } @@ -5738,8 +5747,8 @@ Datum age_endnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5773,8 +5782,6 @@ Datum age_endnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, end_id); - free(label_name); - return result; } @@ -6401,11 +6408,10 @@ Datum age_tofloat(PG_FUNCTION_ARGS) NumericGetDatum(agtv_value->val.numeric))); else if (agtv_value->type == AGTV_STRING) { - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); result = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); if (!is_valid) PG_RETURN_NULL(); } @@ -6703,8 +6709,8 @@ Datum age_tointeger(PG_FUNCTION_ARGS) { char *endptr; /* we need a null terminated cstring */ - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); /* convert it if it is a regular integer string */ result = strtoi64(string, &endptr, 10); @@ -6718,7 +6724,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) f = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); /* * If the conversions failed or it's a special float value, * return null. @@ -6731,10 +6736,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) result = (int64) f; } - else - { - free(string); - } } else { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 67049431c..931c6e0dc 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -36,8 +36,8 @@ void edge_field_cb(void *field, size_t field_len, void *data) if (cr->cur_field == cr->alloc) { cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); + cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); + cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); if (cr->fields == NULL) { cr->error = 1; @@ -48,7 +48,7 @@ void edge_field_cb(void *field, size_t field_len, void *data) } cr->fields_len[cr->cur_field] = field_len; cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char*)field, field_len); + cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); cr->cur_field += 1; } @@ -78,13 +78,13 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) { cr->header_num = cr->cur_field; cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); + cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); + cr->header = palloc((sizeof (char*) * cr->cur_field)); for (i = 0; icur_field; i++) { cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); + cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); } } else @@ -133,7 +133,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) for (i = 0; i < n_fields; ++i) { - free(cr->fields[i]); + pfree_if_not_null(cr->fields[i]); } if (cr->error) @@ -192,6 +192,10 @@ int create_edges_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } + p.malloc_func = palloc; + p.realloc_func = repalloc_check; + p.free_func = pfree_if_not_null; + csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -202,47 +206,52 @@ int create_edges_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) + PG_TRY(); { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + label_seq_name = get_label_seq_relation_name(label_name); + + memset((void*)&cr, 0, sizeof(csv_edge_reader)); + cr.alloc = 128; + cr.fields = palloc(sizeof(char *) * cr.alloc); + cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); + cr.header_row_length = 0; + cr.curr_row_length = 0; + cr.graph_name = graph_name; + cr.graph_oid = graph_oid; + cr.label_name = label_name; + cr.label_id = label_id; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + cr.load_as_agtype = load_as_agtype; + + /* Initialize the batch insert state */ + init_batch_insert(&cr.batch_state, label_name, graph_oid); + + while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + if (csv_parse(&p, buf, bytes_read, edge_field_cb, + edge_row_cb, &cr) != bytes_read) + { + ereport(ERROR, (errmsg("Error while parsing file: %s\n", + csv_strerror(csv_error(&p))))); + } } - } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); + csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + /* Finish any remaining batch inserts */ + finish_batch_insert(&cr.batch_state); - if (ferror(fp)) + if (ferror(fp)) + { + ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); + } + } + PG_FINALLY(); { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); + fclose(fp); + csv_free(&p); } + PG_END_TRY(); - fclose(fp); - - free(cr.fields); - csv_free(&p); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 4a04f3cd8..1e86bbda4 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -39,8 +39,8 @@ void vertex_field_cb(void *field, size_t field_len, void *data) if (cr->cur_field == cr->alloc) { cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); + cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); + cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); if (cr->fields == NULL) { cr->error = 1; @@ -51,7 +51,7 @@ void vertex_field_cb(void *field, size_t field_len, void *data) } cr->fields_len[cr->cur_field] = field_len; cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char *) field, field_len); + cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); cr->cur_field += 1; } @@ -70,13 +70,13 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) { cr->header_num = cr->cur_field; cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); + cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); + cr->header = palloc((sizeof (char*) * cr->cur_field)); for (i = 0; icur_field; i++) { cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); + cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); } } else @@ -129,7 +129,7 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) for (i = 0; i < n_fields; ++i) { - free(cr->fields[i]); + pfree_if_not_null(cr->fields[i]); } if (cr->error) @@ -189,6 +189,10 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } + p.malloc_func = palloc; + p.realloc_func = repalloc_check; + p.free_func = pfree_if_not_null; + csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -199,62 +203,67 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + PG_TRY(); { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + label_seq_name = get_label_seq_relation_name(label_name); + + memset((void*)&cr, 0, sizeof(csv_vertex_reader)); + + cr.alloc = 2048; + cr.fields = palloc(sizeof(char *) * cr.alloc); + cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); + cr.header_row_length = 0; + cr.curr_row_length = 0; + cr.graph_name = graph_name; + cr.graph_oid = graph_oid; + cr.label_name = label_name; + cr.label_id = label_id; + cr.id_field_exists = id_field_exists; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + cr.load_as_agtype = load_as_agtype; + + if (cr.id_field_exists) + { + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + * + * We cant use currval because it will error out if nextval was + * not called before in the session. + */ + cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); + } - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* Initialize the batch insert state */ + init_batch_insert(&cr.batch_state, label_name, graph_oid); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + if (csv_parse(&p, buf, bytes_read, vertex_field_cb, + vertex_row_cb, &cr) != bytes_read) + { + ereport(ERROR, (errmsg("Error while parsing file: %s\n", + csv_strerror(csv_error(&p))))); + } } - } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); + csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + /* Finish any remaining batch inserts */ + finish_batch_insert(&cr.batch_state); - if (ferror(fp)) + if (ferror(fp)) + { + ereport(ERROR, (errmsg("Error while reading file %s\n", + file_path))); + } + } + PG_FINALLY(); { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); + fclose(fp); + csv_free(&p); } + PG_END_TRY(); - fclose(fp); - - free(cr.fields); - csv_free(&p); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 486775320..ab2ba08cc 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -556,6 +556,7 @@ void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); void pfree_if_not_null(void *ptr); +void *repalloc_check(void *ptr, size_t len); agtype_value *agtype_value_from_cstring(char *str, int len); /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void); From b9d35d5f4c8ef8b10ffebe3de1e3a17a8e8b7c15 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 10 Dec 2025 09:08:36 -0800 Subject: [PATCH 10/25] Fix ORDER BY alias resolution with AS in Cypher queries (#2269) NOTE: This PR was partially created with AI tools and reviewed by a human. ORDER BY clauses failed when referencing column aliases from RETURN: MATCH (p:Person) RETURN p.age AS age ORDER BY age DESC ERROR: could not find rte for age Added SQL-99 compliant alias matching to find_target_list_entry() that checks if ORDER BY identifier matches a target list alias before attempting expression transformation. This enables standard SQL behavior for sorting by aliased columns with DESC/DESCENDING/ASC/ASCENDING. Updated regression tests. Added regression tests. modified: regress/expected/cypher_match.out modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/parser/cypher_clause.c --- regress/expected/cypher_match.out | 24 ++++---- regress/expected/expr.out | 88 ++++++++++++++++++++++++++++++ regress/sql/expr.sql | 30 ++++++++++ src/backend/parser/cypher_clause.c | 26 +++++++++ 4 files changed, 156 insertions(+), 12 deletions(-) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index a34f59196..ea425e463 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -1655,10 +1655,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (u agtype, m agtype, l agtype); u | m | l ------------+---------------+------------ - "someone" | "opt_match_e" | "somebody" - "somebody" | "opt_match_e" | "someone" "anybody" | "opt_match_e" | "nobody" "nobody" | "opt_match_e" | "anybody" + "somebody" | "opt_match_e" | "someone" + "someone" | "opt_match_e" | "somebody" (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -1670,8 +1670,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (n agtype, r agtype, p agtype, m agtype, s agtype, q agtype); n | r | p | m | s | q -----------+---------------+------------+-----------+---------------+------------ - "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" "anybody" | "opt_match_e" | "nobody" | "someone" | "opt_match_e" | "somebody" + "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -1684,18 +1684,18 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (n agtype, r agtype, p agtype, m agtype, s agtype, q agtype); n | r | p | m | s | q ------------+---------------+------------+------------+---------------+------------ - "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" - "someone" | | | "somebody" | | - "someone" | | | "nobody" | | - "somebody" | | | "someone" | | - "somebody" | | | "anybody" | | - "somebody" | | | "nobody" | | "anybody" | "opt_match_e" | "nobody" | "someone" | "opt_match_e" | "somebody" - "anybody" | | | "somebody" | | "anybody" | | | "nobody" | | - "nobody" | | | "someone" | | - "nobody" | | | "somebody" | | + "anybody" | | | "somebody" | | "nobody" | | | "anybody" | | + "nobody" | | | "somebody" | | + "nobody" | | | "someone" | | + "somebody" | | | "anybody" | | + "somebody" | | | "nobody" | | + "somebody" | | | "someone" | | + "someone" | "opt_match_e" | "somebody" | "anybody" | "opt_match_e" | "nobody" + "someone" | | | "nobody" | | + "someone" | | | "somebody" | | (12 rows) -- Tests to catch match following optional match logic diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 4be3bf90f..926a958d6 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -6949,6 +6949,94 @@ $$) AS (i agtype); {"key": "value"} (9 rows) +-- +-- Test ORDER BY with AS +-- +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'John', age: 38}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jill', age: 23}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Ion', age: 34}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Mary', age: 57}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jerry', age: 34}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name ASC +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name DESC +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Mary" | 57 + "John" | 38 + "Jill" | 23 + "Jerry" | 34 + "Ion" | 34 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age ASC, name DESCENDING +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Jill" | 23 + "Jerry" | 34 + "Ion" | 34 + "John" | 38 + "Mary" | 57 +(5 rows) + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age DESC, name ASCENDING +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Mary" | 57 + "John" | 38 + "Ion" | 34 + "Jerry" | 34 + "Jill" | 23 +(5 rows) + --CASE SELECT create_graph('case_statement'); NOTICE: graph "case_statement" has been created diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 466bace41..7bf1f26b2 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -2823,6 +2823,7 @@ SELECT * FROM cypher('order_by', $$CREATE ({i: false})$$) AS (result agtype); SELECT * FROM cypher('order_by', $$CREATE ({i: {key: 'value'}})$$) AS (result agtype); SELECT * FROM cypher('order_by', $$CREATE ({i: [1]})$$) AS (result agtype); + SELECT * FROM cypher('order_by', $$ MATCH (u) RETURN u.i @@ -2835,6 +2836,35 @@ SELECT * FROM cypher('order_by', $$ ORDER BY u.i DESC $$) AS (i agtype); +-- +-- Test ORDER BY with AS +-- +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'John', age: 38}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jill', age: 23}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Ion', age: 34}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Mary', age: 57}) $$) AS (result agtype); +SELECT * FROM cypher('order_by', $$ CREATE ({name: 'Jerry', age: 34}) $$) AS (result agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name ASC +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY name DESC +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age ASC, name DESCENDING +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('order_by', $$ + MATCH (u) WHERE EXISTS(u.name) RETURN u.name AS name, u.age AS age ORDER BY age DESC, name ASCENDING +$$) AS (name agtype, age agtype); + --CASE SELECT create_graph('case_statement'); SELECT * FROM cypher('case_statement', $$CREATE ({id: 1, i: 1, j: null})-[:connected_to {id: 1, k:0}]->({id: 2, i: 'a', j: 'b'})$$) AS (result agtype); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 5c5024cf7..acc52349d 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2296,6 +2296,32 @@ static TargetEntry *find_target_list_entry(cypher_parsestate *cpstate, ListCell *lt; TargetEntry *te; + /* + * If the ORDER BY item is a simple identifier, check if it matches + * an alias in the target list. This implements SQL99-compliant + * alias matching for ORDER BY clauses. + */ + if (IsA(node, ColumnRef)) + { + ColumnRef *cref = (ColumnRef *)node; + + if (list_length(cref->fields) == 1) + { + char *name = strVal(linitial(cref->fields)); + + /* Try to match an alias in the target list */ + foreach (lt, *target_list) + { + te = lfirst(lt); + + if (te->resname != NULL && strcmp(te->resname, name) == 0) + { + return te; + } + } + } + } + expr = transform_cypher_expr(cpstate, node, expr_kind); foreach (lt, *target_list) From 7432ac630eb5bca2eed0ae4b8bc0de84e17bd8bd Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 11 Dec 2025 07:32:13 -0800 Subject: [PATCH 11/25] Update grammar file for maintainability (#2270) Consolidated duplicate code, added helper functions, and reviewed the grammar file for issues. NOTE: I used an AI tool to review and cleanup the grammar file. I have reviewed all of the work it did. Improvements: 1. Added KEYWORD_STRDUP macro to eliminate hardcoded string lengths 2. Consolidated EXPLAIN statement handling into make_explain_stmt helper 3. Extracted WITH clause validation into validate_return_item_aliases helper 4. Created make_default_return_node helper for subquery return-less logic Benefits: - Reduced code duplication by ~150 lines - Improved maintainability with helper functions - Eliminated manual string length calculations (error-prone) All 29 existing regression tests pass modified: src/backend/parser/cypher_gram.y --- src/backend/parser/cypher_gram.y | 303 ++++++++++++++----------------- 1 file changed, 140 insertions(+), 163 deletions(-) diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 0bafefe1f..5ba1e6354 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -41,6 +41,9 @@ #define YYMALLOC palloc #define YYFREE pfree + +/* Helper macro for keyword string duplication */ +#define KEYWORD_STRDUP(kw) pnstrdup((kw), strlen(kw)) %} %locations @@ -276,6 +279,11 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, Node *where, Node *mapping_expr, int location); +/* helper functions */ +static ExplainStmt *make_explain_stmt(List *options); +static void validate_return_item_aliases(List *items, ag_scanner_t scanner); +static cypher_return *make_default_return_node(int location); + %} %% @@ -300,81 +308,59 @@ stmt: * Throw syntax error in this case. */ if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); + } extra->result = $1; extra->extra = NULL; } | EXPLAIN cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $2; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = NIL; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt(NIL); } | EXPLAIN VERBOSE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $3; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make1(makeDefElem("verbose", NULL, @2));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make1(makeDefElem("verbose", NULL, @2))); } | EXPLAIN ANALYZE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $3; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make1(makeDefElem("analyze", NULL, @2));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make1(makeDefElem("analyze", NULL, @2))); } | EXPLAIN ANALYZE VERBOSE cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) yyerror(&yylloc, scanner, extra, "syntax error"); - extra->result = $4; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = list_make2(makeDefElem("analyze", NULL, @2), - makeDefElem("verbose", NULL, @3));; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt( + list_make2(makeDefElem("analyze", NULL, @2), + makeDefElem("verbose", NULL, @3))); } | EXPLAIN '(' utility_option_list ')' cypher_stmt semicolon_opt { - ExplainStmt *estmt = NULL; - if (yychar != YYEOF) + { yyerror(&yylloc, scanner, extra, "syntax error"); - + } extra->result = $5; - - estmt = makeNode(ExplainStmt); - estmt->query = NULL; - estmt->options = $3; - extra->extra = (Node *)estmt; + extra->extra = (Node *)make_explain_stmt($3); } ; @@ -652,57 +638,20 @@ single_subquery: single_subquery_no_return: subquery_part_init reading_clause_list { - ColumnRef *cr; - ResTarget *rt; cypher_return *n; /* * since subqueries allow return-less clauses, we add a * return node manually to reflect that syntax */ - cr = makeNode(ColumnRef); - cr->fields = list_make1(makeNode(A_Star)); - cr->location = @1; - - rt = makeNode(ResTarget); - rt->name = NULL; - rt->indirection = NIL; - rt->val = (Node *)cr; - rt->location = @1; - - n = make_ag_node(cypher_return); - n->distinct = false; - n->items = list_make1((Node *)rt); - n->order_by = NULL; - n->skip = NULL; - n->limit = NULL; - + n = make_default_return_node(@1); $$ = list_concat($1, lappend($2, n)); - } - | subquery_pattern + | subquery_pattern { - ColumnRef *cr; - ResTarget *rt; cypher_return *n; - cr = makeNode(ColumnRef); - cr->fields = list_make1(makeNode(A_Star)); - cr->location = @1; - - rt = makeNode(ResTarget); - rt->name = NULL; - rt->indirection = NIL; - rt->val = (Node *)cr; - rt->location = @1; - - n = make_ag_node(cypher_return); - n->distinct = false; - n->items = list_make1((Node *)rt); - n->order_by = NULL; - n->skip = NULL; - n->limit = NULL; - + n = make_default_return_node(@1); $$ = lappend(list_make1($1), n); } ; @@ -958,24 +907,10 @@ limit_opt: with: WITH DISTINCT return_item_list order_by_opt skip_opt limit_opt where_opt { - ListCell *li; cypher_with *n; /* check expressions are aliased */ - foreach(li, $3) - { - ResTarget *item = lfirst(li); - - /* variable does not have to be aliased */ - if (IsA(item->val, ColumnRef) || item->name) - continue; - - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("expression item must be aliased"), - errhint("Items can be aliased by using AS."), - ag_scanner_errposition(item->location, scanner))); - } + validate_return_item_aliases($3, scanner); n = make_ag_node(cypher_with); n->distinct = true; @@ -987,27 +922,12 @@ with: $$ = (Node *)n; } - | WITH return_item_list order_by_opt skip_opt limit_opt - where_opt + | WITH return_item_list order_by_opt skip_opt limit_opt where_opt { - ListCell *li; cypher_with *n; /* check expressions are aliased */ - foreach (li, $2) - { - ResTarget *item = lfirst(li); - - /* variable does not have to be aliased */ - if (IsA(item->val, ColumnRef) || item->name) - continue; - - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("expression item must be aliased"), - errhint("Items can be aliased by using AS."), - ag_scanner_errposition(item->location, scanner))); - } + validate_return_item_aliases($2, scanner); n = make_ag_node(cypher_with); n->distinct = false; @@ -2449,58 +2369,58 @@ qual_op: */ safe_keywords: - ALL { $$ = pnstrdup($1, 3); } - | ANALYZE { $$ = pnstrdup($1, 7); } - | AND { $$ = pnstrdup($1, 3); } - | AS { $$ = pnstrdup($1, 2); } - | ASC { $$ = pnstrdup($1, 3); } - | ASCENDING { $$ = pnstrdup($1, 9); } - | BY { $$ = pnstrdup($1, 2); } - | CALL { $$ = pnstrdup($1, 4); } - | CASE { $$ = pnstrdup($1, 4); } - | COALESCE { $$ = pnstrdup($1, 8); } - | CONTAINS { $$ = pnstrdup($1, 8); } - | COUNT { $$ = pnstrdup($1 ,5); } - | CREATE { $$ = pnstrdup($1, 6); } - | DELETE { $$ = pnstrdup($1, 6); } - | DESC { $$ = pnstrdup($1, 4); } - | DESCENDING { $$ = pnstrdup($1, 10); } - | DETACH { $$ = pnstrdup($1, 6); } - | DISTINCT { $$ = pnstrdup($1, 8); } - | ELSE { $$ = pnstrdup($1, 4); } - | ENDS { $$ = pnstrdup($1, 4); } - | EXISTS { $$ = pnstrdup($1, 6); } - | EXPLAIN { $$ = pnstrdup($1, 7); } - | IN { $$ = pnstrdup($1, 2); } - | IS { $$ = pnstrdup($1, 2); } - | LIMIT { $$ = pnstrdup($1, 6); } - | MATCH { $$ = pnstrdup($1, 6); } - | MERGE { $$ = pnstrdup($1, 6); } - | NOT { $$ = pnstrdup($1, 3); } - | OPERATOR { $$ = pnstrdup($1, 8); } - | OPTIONAL { $$ = pnstrdup($1, 8); } - | OR { $$ = pnstrdup($1, 2); } - | ORDER { $$ = pnstrdup($1, 5); } - | REMOVE { $$ = pnstrdup($1, 6); } - | RETURN { $$ = pnstrdup($1, 6); } - | SET { $$ = pnstrdup($1, 3); } - | SKIP { $$ = pnstrdup($1, 4); } - | STARTS { $$ = pnstrdup($1, 6); } - | THEN { $$ = pnstrdup($1, 4); } - | UNION { $$ = pnstrdup($1, 5); } - | WHEN { $$ = pnstrdup($1, 4); } - | VERBOSE { $$ = pnstrdup($1, 7); } - | WHERE { $$ = pnstrdup($1, 5); } - | WITH { $$ = pnstrdup($1, 4); } - | XOR { $$ = pnstrdup($1, 3); } - | YIELD { $$ = pnstrdup($1, 5); } + ALL { $$ = KEYWORD_STRDUP($1); } + | ANALYZE { $$ = KEYWORD_STRDUP($1); } + | AND { $$ = KEYWORD_STRDUP($1); } + | AS { $$ = KEYWORD_STRDUP($1); } + | ASC { $$ = KEYWORD_STRDUP($1); } + | ASCENDING { $$ = KEYWORD_STRDUP($1); } + | BY { $$ = KEYWORD_STRDUP($1); } + | CALL { $$ = KEYWORD_STRDUP($1); } + | CASE { $$ = KEYWORD_STRDUP($1); } + | COALESCE { $$ = KEYWORD_STRDUP($1); } + | CONTAINS { $$ = KEYWORD_STRDUP($1); } + | COUNT { $$ = KEYWORD_STRDUP($1); } + | CREATE { $$ = KEYWORD_STRDUP($1); } + | DELETE { $$ = KEYWORD_STRDUP($1); } + | DESC { $$ = KEYWORD_STRDUP($1); } + | DESCENDING { $$ = KEYWORD_STRDUP($1); } + | DETACH { $$ = KEYWORD_STRDUP($1); } + | DISTINCT { $$ = KEYWORD_STRDUP($1); } + | ELSE { $$ = KEYWORD_STRDUP($1); } + | ENDS { $$ = KEYWORD_STRDUP($1); } + | EXISTS { $$ = KEYWORD_STRDUP($1); } + | EXPLAIN { $$ = KEYWORD_STRDUP($1); } + | IN { $$ = KEYWORD_STRDUP($1); } + | IS { $$ = KEYWORD_STRDUP($1); } + | LIMIT { $$ = KEYWORD_STRDUP($1); } + | MATCH { $$ = KEYWORD_STRDUP($1); } + | MERGE { $$ = KEYWORD_STRDUP($1); } + | NOT { $$ = KEYWORD_STRDUP($1); } + | OPERATOR { $$ = KEYWORD_STRDUP($1); } + | OPTIONAL { $$ = KEYWORD_STRDUP($1); } + | OR { $$ = KEYWORD_STRDUP($1); } + | ORDER { $$ = KEYWORD_STRDUP($1); } + | REMOVE { $$ = KEYWORD_STRDUP($1); } + | RETURN { $$ = KEYWORD_STRDUP($1); } + | SET { $$ = KEYWORD_STRDUP($1); } + | SKIP { $$ = KEYWORD_STRDUP($1); } + | STARTS { $$ = KEYWORD_STRDUP($1); } + | THEN { $$ = KEYWORD_STRDUP($1); } + | UNION { $$ = KEYWORD_STRDUP($1); } + | WHEN { $$ = KEYWORD_STRDUP($1); } + | VERBOSE { $$ = KEYWORD_STRDUP($1); } + | WHERE { $$ = KEYWORD_STRDUP($1); } + | WITH { $$ = KEYWORD_STRDUP($1); } + | XOR { $$ = KEYWORD_STRDUP($1); } + | YIELD { $$ = KEYWORD_STRDUP($1); } ; conflicted_keywords: - END_P { $$ = pnstrdup($1, 5); } - | FALSE_P { $$ = pnstrdup($1, 7); } - | NULL_P { $$ = pnstrdup($1, 6); } - | TRUE_P { $$ = pnstrdup($1, 6); } + END_P { $$ = KEYWORD_STRDUP($1); } + | FALSE_P { $$ = KEYWORD_STRDUP($1); } + | NULL_P { $$ = KEYWORD_STRDUP($1); } + | TRUE_P { $$ = KEYWORD_STRDUP($1); } ; %% @@ -3384,7 +3304,7 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, /* * Build an ARRAY sublink and attach list_comp as sub-select, - * it will be transformed in to query tree by us and reattached for + * it will be transformed in to query tree by us and reattached for * pg to process. */ sub = makeNode(SubLink); @@ -3396,3 +3316,60 @@ static Node *build_list_comprehension_node(Node *var, Node *expr, return (Node *) node_to_agtype((Node *)sub, "agtype[]", location); } + +/* Helper function to create an ExplainStmt node */ +static ExplainStmt *make_explain_stmt(List *options) +{ + ExplainStmt *estmt = makeNode(ExplainStmt); + estmt->query = NULL; + estmt->options = options; + return estmt; +} + +/* Helper function to validate that return items are properly aliased */ +static void validate_return_item_aliases(List *items, ag_scanner_t scanner) +{ + ListCell *li; + + foreach(li, items) + { + ResTarget *item = lfirst(li); + + /* variable does not have to be aliased */ + if (IsA(item->val, ColumnRef) || item->name) + continue; + + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("expression item must be aliased"), + errhint("Items can be aliased by using AS."), + ag_scanner_errposition(item->location, scanner))); + } +} + +/* Helper function to create a default return node (RETURN *) */ +static cypher_return *make_default_return_node(int location) +{ + ColumnRef *cr; + ResTarget *rt; + cypher_return *n; + + cr = makeNode(ColumnRef); + cr->fields = list_make1(makeNode(A_Star)); + cr->location = location; + + rt = makeNode(ResTarget); + rt->name = NULL; + rt->indirection = NIL; + rt->val = (Node *)cr; + rt->location = location; + + n = make_ag_node(cypher_return); + n->distinct = false; + n->items = list_make1((Node *)rt); + n->order_by = NULL; + n->skip = NULL; + n->limit = NULL; + + return n; +} From d4e9b9cc36ed61b31e1ec20be6de734f6f0d6398 Mon Sep 17 00:00:00 2001 From: jsell-rh Date: Thu, 11 Dec 2025 10:56:37 -0500 Subject: [PATCH 12/25] Convert string to raw string to remove invalid escape sequence warning (#2267) - Changed '\s' to r'\s' --- drivers/python/age/age.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/python/age/age.py b/drivers/python/age/age.py index 817cc6e5a..b1aa82158 100644 --- a/drivers/python/age/age.py +++ b/drivers/python/age/age.py @@ -26,7 +26,7 @@ _EXCEPTION_NoConnection = NoConnection() _EXCEPTION_GraphNotSet = GraphNotSet() -WHITESPACE = re.compile('\s') +WHITESPACE = re.compile(r'\s') class AgeDumper(psycopg.adapt.Dumper): @@ -233,3 +233,4 @@ def cypher(self, cursor:psycopg.cursor, cypherStmt:str, cols:list=None, params:t # def queryCypher(self, cypherStmt:str, columns:list=None , params:tuple=None) -> psycopg.cursor : # return queryCypher(self.connection, self.graphName, cypherStmt, columns, params) + From d8fc867903e5d9f823f1ac5fa067720f8c7c447a Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Thu, 11 Dec 2025 21:51:12 +0500 Subject: [PATCH 13/25] Migrate python driver configuration to pyproject.toml (#2272) - Add pyproject.toml with package configuration - Simplify setup.py to minimal backward-compatible wrapper. - Updated CI workflow and .gitignore. - Resolves warning about using setup.py directly. --- .github/workflows/python-driver.yaml | 4 +-- .gitignore | 2 ++ drivers/python/README.md | 2 +- drivers/python/pyproject.toml | 48 ++++++++++++++++++++++++++++ drivers/python/setup.py | 28 +++------------- 5 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 drivers/python/pyproject.toml diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index ef6cf96a1..860cbcf25 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -24,7 +24,7 @@ jobs: - name: Set up python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install pre-requisites run: | @@ -33,7 +33,7 @@ jobs: - name: Build run: | - python setup.py install + pip install . - name: Test run: | diff --git a/.gitignore b/.gitignore index a8e809dda..03923b03e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,7 @@ age--*.*.*.sql !age--*--*sql __pycache__ **/__pycache__ +**/.venv +**/apache_age_python.egg-info drivers/python/build diff --git a/drivers/python/README.md b/drivers/python/README.md index 184652275..749b44bfb 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -62,7 +62,7 @@ python -m unittest -v test_agtypes.py ### Build from source ``` -python setup.py install +pip install . ``` ### For more information about [Apache AGE](https://age.apache.org/) diff --git a/drivers/python/pyproject.toml b/drivers/python/pyproject.toml new file mode 100644 index 000000000..18112381c --- /dev/null +++ b/drivers/python/pyproject.toml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "apache-age-python" +version = "0.0.7" +description = "Python driver support for Apache AGE" +readme = "README.md" +requires-python = ">=3.9" +license = "Apache-2.0" +keywords = ["Graph Database", "Apache AGE", "PostgreSQL"] +authors = [ + {name = "Ikchan Kwon, Apache AGE", email = "dev-subscribe@age.apache.org"} +] +classifiers = [ + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", +] +dependencies = [ + "psycopg", + "antlr4-python3-runtime==4.11.1", +] + +[project.urls] +Homepage = "https://github.com/apache/age/tree/master/drivers/python" +Download = "https://github.com/apache/age/releases" + +[tool.setuptools] +packages = ["age", "age.gen", "age.networkx"] diff --git a/drivers/python/setup.py b/drivers/python/setup.py index 1da49d9cb..d0eed26be 100644 --- a/drivers/python/setup.py +++ b/drivers/python/setup.py @@ -13,28 +13,10 @@ # specific language governing permissions and limitations # under the License. -from setuptools import setup, find_packages -from age import VERSION +# This setup.py is maintained for backward compatibility. +# All package configuration is in pyproject.toml. For installation, +# use: pip install . -with open("README.md", "r", encoding='utf8') as fh: - long_description = fh.read() +from setuptools import setup -setup( - name = 'apache-age-python', - version = '0.0.7', - description = 'Python driver support for Apache AGE', - long_description=long_description, - long_description_content_type="text/markdown", - author = 'Ikchan Kwon, Apache AGE', - author_email = 'dev-subscribe@age.apache.org', - url = 'https://github.com/apache/age/tree/master/drivers/python', - download_url = 'https://github.com/apache/age/releases' , - license = 'Apache2.0', - install_requires = [ 'psycopg', 'antlr4-python3-runtime==4.11.1'], - packages = ['age', 'age.gen','age.networkx'], - keywords = ['Graph Database', 'Apache AGE', 'PostgreSQL'], - python_requires = '>=3.9', - classifiers = [ - 'Programming Language :: Python :: 3.9' - ] -) +setup() From 3d54a1ce78011f4ac712c36d1e9f704f1f7f5438 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 16 Dec 2025 08:33:28 -0800 Subject: [PATCH 14/25] Restrict age_load commands (#2274) This PR applies restrictions to the following age_load commands - load_labels_from_file() load_edges_from_file() They are now tied to a specific root directory and are required to have a specific file extension to eliminate any attempts to force them to access any other files. Nothing else has changed with the actual command formats or parameters, only that they work out of the /tmp/age directory and only access files with an extension of .csv. Added regression tests and updated the location of the csv files for those regression tests. modified: regress/expected/age_load.out modified: regress/sql/age_load.sql modified: src/backend/utils/load/age_load.c --- regress/expected/age_load.out | 44 +++++++++++++++++- regress/sql/age_load.sql | 38 +++++++++++++++- src/backend/utils/load/age_load.c | 76 ++++++++++++++++++++++++++++--- 3 files changed, 149 insertions(+), 9 deletions(-) diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 5f2bdab78..55d1ff1d6 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -16,7 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; SET search_path TO ag_catalog; -- Create a country using CREATE clause @@ -401,6 +403,43 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN prop {"bool": "false", "string": "nUll", "numeric": "3.14"} (6 rows) +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +ERROR: file path must not be NULL +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); +ERROR: file path must not be NULL +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +ERROR: file name cannot be zero length +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); +ERROR: file name cannot be zero length +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_vertices.csv] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_edges.csv] +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_vertices.txt] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_edges.txt] +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: You can only load files with extension [.csv]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: You can only load files with extension [.csv]. +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +ERROR: You can only load files located in [/tmp/age/]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); +ERROR: You can only load files located in [/tmp/age/]. +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table agload_conversion._ag_label_vertex @@ -415,3 +454,6 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- End +-- diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 180248bf1..cefcfb4ca 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -17,7 +17,9 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; @@ -160,4 +162,38 @@ SELECT create_elabel('agload_conversion','Edges2'); SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); + +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); + +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); + +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); + +-- +-- End +-- diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 307ec335a..c7cf0677f 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -31,6 +31,62 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); +static char *build_safe_filename(char *name); + +#define AGE_BASE_CSV_DIRECTORY "/tmp/age/" +#define AGE_CSV_FILE_EXTENSION ".csv" + +static char *build_safe_filename(char *name) +{ + int length; + char path[PATH_MAX]; + char *resolved; + + if (name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be NULL"))); + + } + + length = strlen(name); + + if (length == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be zero length"))); + + } + + snprintf(path, sizeof(path), "%s%s", AGE_BASE_CSV_DIRECTORY, name); + + resolved = realpath(path, NULL); + + if (resolved == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("File or path does not exist [%s]", path))); + } + + if (strncmp(resolved, AGE_BASE_CSV_DIRECTORY, + strlen(AGE_BASE_CSV_DIRECTORY)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files located in [%s].", + AGE_BASE_CSV_DIRECTORY))); + } + + length = strlen(resolved) - 4; + if (strncmp(resolved+length, AGE_CSV_FILE_EXTENSION, + strlen(AGE_CSV_FILE_EXTENSION)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files with extension [%s].", + AGE_CSV_FILE_EXTENSION))); + } + + return resolved; +} agtype *create_empty_agtype(void) { @@ -344,7 +400,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) { Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; @@ -373,7 +429,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); id_field_exists = PG_GETARG_BOOL(3); load_as_agtype = PG_GETARG_BOOL(4); @@ -385,7 +441,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_VERTEX; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, @@ -394,6 +450,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } @@ -403,7 +462,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; @@ -431,7 +490,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); load_as_agtype = PG_GETARG_BOOL(3); graph_name_str = NameStr(*graph_name); @@ -442,7 +501,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_EDGE; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, @@ -450,6 +509,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } @@ -599,4 +661,4 @@ void finish_batch_insert(batch_insert_state **batch_state) pfree((*batch_state)->slots); pfree(*batch_state); *batch_state = NULL; -} \ No newline at end of file +} From 4d13998cc04c3dfa40a04d055748ccd224c44082 Mon Sep 17 00:00:00 2001 From: Arthur Nascimento Date: Tue, 6 Jan 2026 13:36:49 -0300 Subject: [PATCH 15/25] Makefile: fix race condition on cypher_gram_def.h (#2273) The file cypher_gram.c generates cypher_gram_def.h, which is directly necessary for cypher_parser.o and cypher_keywords.o and their respective .bc files. But that direct dependency is not reflected in the Makefile, which only had the indirect dependency of .o on .c. So on high parallel builds, the .h may not have been generated by bison yet. Additionally, the .bc files should have the same dependencies as the .o files, but those are lacking. Here is an example output where the .bc file fails to build, as it was running concurrently with the bison instance that was about to finalize cypher_gram_def.h: In file included from src/backend/parser/cypher_parser.c:24: clang-17 -Wno-ignored-attributes -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-unused-command-line-argument -Wno-compound-token-split-by-macro -O2 -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -flto=thin -emit-llvm -c -o src/backend/parser/cypher_parser.bc src/backend/parser/cypher_parser.c .//src/include/parser/cypher_gram.h:65:10: fatal error: 'parser/cypher_gram_def.h' file not found 65 | #include "parser/cypher_gram_def.h" | ^~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. make: *** [/usr/pgsql-17/lib/pgxs/src/makefiles/../../src/Makefile.global:1085: src/backend/parser/cypher_parser.bc] Error 1 make: *** Waiting for unfinished jobs.... gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wimplicit-fallthrough=3 -Wshadow=compatible-local -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -Wno-format-truncation -O2 -g -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables -fPIC -fvisibility=hidden -I.//src/include -I.//src/include/parser -I. -I./ -I/usr/pgsql-17/include/server -I/usr/pgsql-17/include/internal -D_GNU_SOURCE -I/usr/include -I/usr/include/libxml2 -c -o src/backend/catalog/ag_label.o src/backend/catalog/ag_label.c /usr/bin/bison -Wno-deprecated --defines=src/include/parser/cypher_gram_def.h -o src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.y Previously, cypher_parser.o was missing the dependency, so it could start before cypher_gram_def.h was available: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. As well as cypher_parser.bc, missing the dependency on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. Now cypher_parser.o correctly depends on cypher_gram_def.h: Considering target file 'src/backend/parser/cypher_parser.o'. File 'src/backend/parser/cypher_parser.o' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.o'. Must remake target 'src/backend/parser/cypher_parser.o'. And cypher_parser.bc correctly depends on cypher_gram_def.h as well: Considering target file 'src/backend/parser/cypher_parser.bc'. File 'src/backend/parser/cypher_parser.bc' does not exist. Considering target file 'src/backend/parser/cypher_parser.c'. File 'src/backend/parser/cypher_parser.c' was considered already. Considering target file 'src/backend/parser/cypher_gram.c'. File 'src/backend/parser/cypher_gram.c' was considered already. Considering target file 'src/include/parser/cypher_gram_def.h'. File 'src/include/parser/cypher_gram_def.h' was considered already. Finished prerequisites of target file 'src/backend/parser/cypher_parser.bc'. Must remake target 'src/backend/parser/cypher_parser.bc'. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e73f3e68..0392d8b85 100644 --- a/Makefile +++ b/Makefile @@ -147,8 +147,10 @@ src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c +src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h $(age_sql): @cat $(SQLS) > $@ From 43f6ca45425e1b8e7f5115e12dd7d962edfbd7c7 Mon Sep 17 00:00:00 2001 From: M15terHyde <59905806+M15terHyde@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:52:33 -0600 Subject: [PATCH 16/25] Revise README for Python driver updates (#2298) Updated README to from psycopg2 to psycopg3 (psycopg) --- drivers/python/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/python/README.md b/drivers/python/README.md index 749b44bfb..e64f9de67 100644 --- a/drivers/python/README.md +++ b/drivers/python/README.md @@ -28,11 +28,11 @@ AGType parser and driver support for [Apache AGE](https://age.apache.org/), grap ### Features * Unmarshal AGE result data(AGType) to Vertex, Edge, Path -* Cypher query support for Psycopg2 PostgreSQL driver (enables to use cypher queries directly) +* Cypher query support for Psycopg3 PostgreSQL driver (enables to use cypher queries directly) ### Prerequisites * over Python 3.9 -* This module runs on [psycopg2](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) +* This module runs on [psycopg3](https://www.psycopg.org/) and [antlr4-python3](https://pypi.org/project/antlr4-python3-runtime/) ``` sudo apt-get update sudo apt-get install python3-dev libpq-dev @@ -80,7 +80,7 @@ SET search_path = ag_catalog, "$user", public; ``` ### Usage -* If you are not familiar with Psycopg2 driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) +* If you are not familiar with Psycopg driver : Go to [Jupyter Notebook : Basic Sample](samples/apache-age-basic.ipynb) * Simpler way to access Apache AGE [AGE Sample](samples/apache-age-note.ipynb) in Samples. * Agtype converting samples: [Agtype Sample](samples/apache-age-agtypes.ipynb) in Samples. @@ -119,7 +119,7 @@ Here the following value required Insert From networkx directed graph into an AGE database. #### Parameters -- `connection` (psycopg2.connect): Connection object to the AGE database. +- `connection` (psycopg.connect): Connection object to the AGE database. - `G` (networkx.DiGraph): Networkx directed graph to be converted and inserted. @@ -152,7 +152,7 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. #### Parameters -- `connection` (psycopg2.connect): Connection object to the PostgreSQL database. +- `connection` (psycopg.connect): Connection object to the PostgreSQL database. - `graphName` (str): Name of the graph. - `G` (None | nx.DiGraph): Optional Networkx directed graph. If provided, the data will be added to this graph. - `query` (str | None): Optional Cypher query to retrieve data from the database. @@ -167,3 +167,4 @@ Converts data from a Apache AGE graph database into a Networkx directed graph. # Call the function to convert data into a Networkx graph graph = age_to_networkx(connection, graphName="MyGraph" ) ``` + From 5c6dbc862f92f8e5f7961accead42521770d0e78 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:27:47 -0800 Subject: [PATCH 17/25] Fix Issue 2289: handle empty list in IN expression (#2294) NOTE: This PR was created with AI tools and a human. When evaluating 'x IN []' with an empty list, the transform_AEXPR_IN function would return NULL because no expressions were processed. This caused a 'cache lookup failed for type 0' error downstream. This fix adds an early check for the empty list case: - 'x IN []' returns false (nothing can be in an empty list) Additional NOTE: Cypher does not have 'NOT IN' syntax. To check if a value is NOT in a list, use 'NOT (x IN list)'. The NOT operator will invert the false from an empty list to true as expected. The fix returns a boolean constant directly, avoiding the NULL result that caused the type lookup failure. Added regression tests. modified: regress/expected/expr.out modified: regress/sql/expr.sql modified: src/backend/parser/cypher_expr.c --- regress/expected/expr.out | 72 ++++++++++++++++++++++++++++++++ regress/sql/expr.sql | 23 ++++++++++ src/backend/parser/cypher_expr.c | 30 ++++++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 926a958d6..6d9341451 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -319,6 +319,50 @@ $$RETURN 1 IN [[null]]$$) AS r(c boolean); f (1 row) +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); + c +--- + f +(1 row) + +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); + c +--- + t +(1 row) + -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -9155,9 +9199,37 @@ ERROR: could not find rte for x LINE 2: ...({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL)... ^ HINT: variable x does not exist within scope of usage +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +NOTICE: graph "issue_2289" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + v +------- + false +(1 row) + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table issue_2289._ag_label_vertex +drop cascades to table issue_2289._ag_label_edge +NOTICE: graph "issue_2289" has been dropped + drop_graph +------------ + +(1 row) + SELECT * FROM drop_graph('issue_2263', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_2263._ag_label_vertex diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 7bf1f26b2..445e2d237 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -157,6 +157,20 @@ SELECT * FROM cypher('expr', $$RETURN 1 in [[1]]$$) AS r(c boolean); SELECT * FROM cypher('expr', $$RETURN 1 IN [[null]]$$) AS r(c boolean); +-- empty list: x IN [] should always return false +SELECT * FROM cypher('expr', +$$RETURN 1 IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN 'a' IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN null IN []$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN [1,2,3] IN []$$) AS r(c boolean); +-- NOT (x IN []) should always return true +SELECT * FROM cypher('expr', +$$RETURN NOT (1 IN [])$$) AS r(c boolean); +SELECT * FROM cypher('expr', +$$RETURN NOT ('a' IN [])$$) AS r(c boolean); -- should error - ERROR: object of IN must be a list SELECT * FROM cypher('expr', $$RETURN null IN 'str' $$) AS r(c boolean); @@ -3690,9 +3704,18 @@ SELECT * FROM cypher('issue_2263', $$ CREATE x = (), ({ a0:COUNT { MATCH () WHERE CASE WHEN true THEN (x IS NULL) END RETURN 0 } }) $$) AS (out agtype); +-- +-- Issue 2289: 1 IN [] causes cache lookup failed for type 0 +-- +-- Additional test cases were added above to the IN operator +-- +SELECT * FROM create_graph('issue_2289'); +SELECT * FROM cypher('issue_2289', $$ RETURN (1 IN []) AS v $$) AS (v agtype); + -- -- Cleanup -- +SELECT * FROM drop_graph('issue_2289', true); SELECT * FROM drop_graph('issue_2263', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 5f4de86b9..fc0335def 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -600,6 +600,34 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) Assert(is_ag_node(a->rexpr, cypher_list)); + rexpr = (cypher_list *)a->rexpr; + + /* + * Handle empty list case: x IN [] is always false, x NOT IN [] is always true. + * We need to check this before processing to avoid returning NULL result + * which causes "cache lookup failed for type 0" error. + */ + if (rexpr->elems == NIL || list_length((List *)rexpr->elems) == 0) + { + Datum bool_value; + Const *const_result; + + /* If operator is <> (NOT IN), result is true; otherwise (IN) result is false */ + if (strcmp(strVal(linitial(a->name)), "<>") == 0) + { + bool_value = BoolGetDatum(true); + } + else + { + bool_value = BoolGetDatum(false); + } + + const_result = makeConst(BOOLOID, -1, InvalidOid, sizeof(bool), + bool_value, false, true); + + return (Node *)const_result; + } + /* If the operator is <>, combine with AND not OR. */ if (strcmp(strVal(linitial(a->name)), "<>") == 0) { @@ -614,8 +642,6 @@ static Node *transform_AEXPR_IN(cypher_parsestate *cpstate, A_Expr *a) rexprs = rvars = rnonvars = NIL; - rexpr = (cypher_list *)a->rexpr; - foreach(l, (List *) rexpr->elems) { Node *rexpr = transform_cypher_expr_recurse(cpstate, lfirst(l)); From 88d2048f2eeb983e4e71db4806fe25da81de59c6 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:55:36 -0800 Subject: [PATCH 18/25] Fix and improve index.sql regression test coverage (#2300) NOTE: This PR was created with AI tools and a human. - Remove unused copy command (leftover from deleted agload_test_graph test) - Replace broken Section 4 that referenced non-existent graph with comprehensive WHERE clause tests covering string, int, bool, and float properties with AND/OR/NOT operators - Add EXPLAIN tests to verify index usage: - Section 3: Validate GIN indices (load_city_gin_idx, load_country_gin_idx) show Bitmap Index Scan for property matching - Section 4: Validate all expression indices (city_country_code_idx, city_id_idx, city_west_coast_idx, country_life_exp_idx) show Index Scan for WHERE clause filtering All indices now have EXPLAIN verification confirming they are used as expected. modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 290 ++++++++++++++++++++++++++++++++++--- regress/sql/index.sql | 174 ++++++++++++++++++++-- 2 files changed, 436 insertions(+), 28 deletions(-) diff --git a/regress/expected/index.out b/regress/expected/index.out index 3ed7b1c33..9faead660 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; SET enable_mergejoin = ON; @@ -385,6 +384,19 @@ CREATE INDEX load_city_gin_idx ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on "City" c + Recheck Cond: (properties @> '{"city_id": 1}'::agtype) + -> Bitmap Index Scan on load_city_gin_idx + Index Cond: (properties @> '{"city_id": 1}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) RETURN c @@ -418,6 +430,19 @@ $$) as (n agtype); {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex (4 rows) +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------------------- + Bitmap Heap Scan on "Country" c + Recheck Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) + -> Bitmap Index Scan on load_country_gin_idx + Index Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -441,26 +466,259 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Scan using city_country_code_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(2 rows) + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" + "Los Angeles" + "Seattle" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + name +------ +(0 rows) + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); - count -------- - 0 +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Index Scan using city_id_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"city_id"'::agtype]) = '1'::agtype) +(2 rows) + +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" (1 row) -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + name +------------- + "Vancouver" +(1 row) + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" +(2 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Monterrey" + "Tijuana" +(2 rows) + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); -ERROR: graph "agload_test_graph" does not exist -LINE 1: SELECT COUNT(*) FROM cypher('agload_test_graph', $$ - ^ +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) +(2 rows) + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" + "Vancouver" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +--------------- + "New York" + "Toronto" + "Montreal" + "Mexico City" + "Monterrey" + "Tijuana" +(6 rows) + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" +(3 rows) + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "New York" + "Vancouver" +(2 rows) + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" +(1 row) + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Index Scan using country_life_exp_idx on "Country" c + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"life_expectancy"'::agtype]) > '80.0'::agtype) +(2 rows) + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Canada" +(1 row) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Mexico" +(1 row) + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- @@ -478,5 +736,3 @@ NOTICE: graph "cypher_index" has been dropped (1 row) -SELECT drop_graph('agload_test_graph', true); -ERROR: graph "agload_test_graph" does not exist diff --git a/regress/sql/index.sql b/regress/sql/index.sql index d9a4331a4..96e7dd81a 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -17,8 +17,6 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load - LOAD 'age'; SET search_path TO ag_catalog; @@ -219,6 +217,11 @@ ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) @@ -235,6 +238,12 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c $$) as (n agtype); +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -250,23 +259,166 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); + +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); + +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); +$$) as (plan agtype); -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); + +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); +$$) as (plan agtype); + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); + +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- SELECT drop_graph('cypher_index', true); -SELECT drop_graph('agload_test_graph', true); From 3b7510e83598dd82f547f33fe22d1d20c3ad8b9b Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 10 Jan 2026 08:37:54 -0800 Subject: [PATCH 19/25] Fix and improve index.sql addendum (#2301) NOTE: This PR was created with the help of AI tools and a human. Added additional requested regression tests - *EXPLAIN for pattern with WHERE clause *EXPLAIN for pattern with filters on both country and city modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 34 ++++++++++++++++++++++++++++++++++ regress/sql/index.sql | 14 ++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/regress/expected/index.out b/regress/expected/index.out index 9faead660..745cab269 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -626,6 +626,19 @@ $$) as (name agtype); "Tijuana" (6 rows) +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(3 rows) + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -700,6 +713,27 @@ $$) as (name agtype); "Mexico" (1 row) +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Index Scan using city_west_coast_idx on "City" city + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + -> Bitmap Heap Scan on has_city _age_default_alias_0 + Recheck Cond: (start_id = city.id) + -> Bitmap Index Scan on has_city_start_id_idx + Index Cond: (start_id = city.id) + -> Index Scan using "Country_pkey" on "Country" country + Index Cond: (id = _age_default_alias_0.end_id) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"CA"'::agtype) +(11 rows) + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) diff --git a/regress/sql/index.sql b/regress/sql/index.sql index 96e7dd81a..a6e075c70 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -357,6 +357,13 @@ SELECT * FROM cypher('cypher_index', $$ ORDER BY a.city_id $$) as (name agtype); +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -404,6 +411,13 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c.name $$) as (name agtype); +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) From 4dd05d4a4215b63d7937effe5f0d5dc129c4ff57 Mon Sep 17 00:00:00 2001 From: Jean-Paul Abbuehl Date: Mon, 12 Jan 2026 21:10:08 +0100 Subject: [PATCH 20/25] feat: Add 32-bit platform support for graphid type (#2286) * feat: Add 32-bit platform support for graphid type This enables AGE to work on 32-bit platforms including WebAssembly (WASM). Problem: - graphid is int64 (8 bytes) with PASSEDBYVALUE - On 32-bit systems, Datum is only 4 bytes - PostgreSQL rejects pass-by-value types larger than Datum Solution: - Makefile-only change (no C code modifications) - When SIZEOF_DATUM=4 is passed to make, strip PASSEDBYVALUE from the generated SQL - If not specified, normal 64-bit behavior is preserved (PASSEDBYVALUE kept) This change is backward compatible: - 64-bit systems continue using pass-by-value - 32-bit systems now work with pass-by-reference Motivation: PGlite (PostgreSQL compiled to WebAssembly) uses 32-bit pointers and requires this patch to run AGE. Tested on: - 64-bit Linux (regression tests pass) - 32-bit WebAssembly via PGlite (all operations work) Co-authored-by: abbuehlj --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0392d8b85..17f2ed653 100644 --- a/Makefile +++ b/Makefile @@ -138,6 +138,10 @@ PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4) +# When SIZEOF_DATUM=4, PASSEDBYVALUE is stripped from graphid type for pass-by-reference. +# If not specified, normal 64-bit behavior is used (PASSEDBYVALUE preserved). + src/backend/parser/cypher_keywords.o: src/include/parser/cypher_kwlist_d.h src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_KEYWORDLIST_DEPS) @@ -152,8 +156,14 @@ src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/includ src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h -$(age_sql): +# Strip PASSEDBYVALUE on 32-bit (SIZEOF_DATUM=4) for graphid pass-by-reference +$(age_sql): $(SQLS) @cat $(SQLS) > $@ +ifeq ($(SIZEOF_DATUM),4) + @echo "32-bit build: removing PASSEDBYVALUE from graphid type" + @sed 's/^ PASSEDBYVALUE,$$/ -- PASSEDBYVALUE removed for 32-bit (see Makefile)/' $@ > $@.tmp && mv $@.tmp $@ + @grep -q 'PASSEDBYVALUE removed for 32-bit' $@ || { echo "Error: PASSEDBYVALUE replacement failed in $@"; exit 1; } +endif src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes From ce5d3a10e9de41bbe3774c75223a371ae8f34b61 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 16 Jan 2026 15:12:22 -0800 Subject: [PATCH 21/25] Optimize vertex/edge field access with direct array indexing (#2302) NOTE: This PR was created using AI tools and a human. Leverage deterministic key ordering from uniqueify_agtype_object() to access vertex/edge fields in O(1) instead of O(log n) binary search. Fields are sorted by key length, giving fixed positions: - Vertex: id(0), label(1), properties(2) - Edge: id(0), label(1), end_id(2), start_id(3), properties(4) Changes: - Add field index constants and accessor macros to agtype.h - Update age_id(), age_start_id(), age_end_id(), age_label(), age_properties() to use direct field access - Add fill_agtype_value_no_copy() for read-only scalar extraction without memory allocation - Add compare_agtype_scalar_containers() fast path for scalar comparison - Update hash_agtype_value(), equals_agtype_scalar_value(), and compare_agtype_scalar_values() to use direct field access macros - Add fast path in get_one_agtype_from_variadic_args() bypassing extract_variadic_args() for single argument case - Add comprehensive regression test (30 tests) Performance impact: Improves ORDER BY, hash joins, aggregations, and Cypher functions (id, start_id, end_id, label, properties) on vertices and edges. All previous regression tests were not impacted. Additional regression test added to enhance coverage. modified: Makefile new file: regress/expected/direct_field_access.out new file: regress/sql/direct_field_access.sql modified: src/backend/utils/adt/agtype.c modified: src/backend/utils/adt/agtype_util.c modified: src/include/utils/agtype.h --- Makefile | 3 +- regress/expected/direct_field_access.out | 535 +++++++++++++++++++++++ regress/sql/direct_field_access.sql | 319 ++++++++++++++ src/backend/utils/adt/agtype.c | 136 +++++- src/backend/utils/adt/agtype_util.c | 237 +++++++++- src/include/utils/agtype.h | 103 +++++ 6 files changed, 1304 insertions(+), 29 deletions(-) create mode 100644 regress/expected/direct_field_access.out create mode 100644 regress/sql/direct_field_access.sql diff --git a/Makefile b/Makefile index 17f2ed653..ffad7d6af 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection + map_projection \ + direct_field_access ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/direct_field_access.out b/regress/expected/direct_field_access.out new file mode 100644 index 000000000..0a059cdd9 --- /dev/null +++ b/regress/expected/direct_field_access.out @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('direct_access'); +NOTICE: graph "direct_access" has been created + create_graph +-------------- + +(1 row) + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +-------+------+------+------- + false | true | true | false +(1 row) + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + lt | gt +------+------ + true | true +(1 row) + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + lt | gt | eq | ne +------+------+------+------ + true | true | true | true +(1 row) + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + eq | ne +----+---- + | +(1 row) + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + lt | gt | eq +------+------+------ + true | true | true +(1 row) + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + n +--- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + n +--- + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1 +(9 rows) + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + s +---------- + "apple" + "banana" + "cherry" + "date" +(4 rows) + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + f +------ + 1.41 + 1.73 + 2.71 + 3.14 +(4 rows) + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + b +------- + false + false + true + true +(4 rows) + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + max_vertex +---------------------------------------------------------------------------------------------- + {"id": 844424930131973, "label": "Person", "properties": {"age": 32, "name": "Eve"}}::vertex +(1 row) + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + min_vertex +------------------------------------------------------------------------------------------------ + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex +(1 row) + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + max_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131973, "properties": {"since": 2022}}::edge +(1 row) + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + min_edge +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge +(1 row) + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" + "Eve" +(5 rows) + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + name +----------- + "Eve" + "Diana" + "Charlie" + "Bob" + "Alice" +(5 rows) + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + since +------- + 2020 + 2019 + 2021 + 2018 + 2022 +(5 rows) + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + a_name | b_name +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Alice" | "Diana" + "Alice" | "Eve" + "Bob" | "Charlie" + "Bob" | "Diana" + "Bob" | "Eve" + "Charlie" | "Diana" + "Charlie" | "Eve" + "Diana" | "Eve" +(10 rows) + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + eq | ne | lt | gt +------+------+------+------ + true | true | true | true +(1 row) + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + add | sub | mul | div +-----+-----+-----+----- + 15 | 5 | 50 | 2 +(1 row) + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + upper | lower | sz +---------+---------+---- + "HELLO" | "world" | 4 +(1 row) + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + int_val | float_val | str_val +---------+-----------+--------- + 42 | 3.14 | "42" +(1 row) + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + vertex_id +----------------- + 844424930131969 +(1 row) + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + edge_id +------------------ + 1125899906842625 +(1 row) + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + start_id | alice_id +-----------------+----------------- + 844424930131969 | 844424930131969 +(1 row) + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + end_id | bob_id +-----------------+----------------- + 844424930131970 | 844424930131970 +(1 row) + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + vertex_label +-------------- + "Person" +(1 row) + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + edge_label +------------ + "KNOWS" +(1 row) + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + vertex_props +------------------------------ + {"age": 30, "name": "Alice"} +(1 row) + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + edge_props +----------------- + {"since": 2020} +(1 row) + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + a_id | a_label | a_name | r_id | r_start | r_end | r_label | r_since | b_id | b_label | b_name +-----------------+----------+---------+------------------+-----------------+-----------------+---------+---------+-----------------+----------+-------- + 844424930131969 | "Person" | "Alice" | 1125899906842625 | 844424930131969 | 844424930131970 | "KNOWS" | 2020 | 844424930131970 | "Person" | "Bob" +(1 row) + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + eq | lt +------+------ + true | true +(1 row) + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + eq +------ + true +(1 row) + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + big_gt | neg_lt +--------+-------- + true | true +(1 row) + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + lt | eq +------+------ + true | true +(1 row) + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + zero_eq +--------- + true +(1 row) + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table direct_access._ag_label_vertex +drop cascades to table direct_access._ag_label_edge +drop cascades to table direct_access."Person" +drop cascades to table direct_access."KNOWS" +NOTICE: graph "direct_access" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/direct_field_access.sql b/regress/sql/direct_field_access.sql new file mode 100644 index 000000000..c8060be4a --- /dev/null +++ b/regress/sql/direct_field_access.sql @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Direct Field Access Optimizations Test + * + * Tests for optimizations that directly access agtype fields without + * using the full iterator machinery or binary search: + * + * 1. fill_agtype_value_no_copy() - Read-only access without memory allocation + * 2. compare_agtype_scalar_containers() - Fast path for scalar comparisons + * 3. Direct pairs[0] access for vertex/edge id comparison + * 4. Fast path in get_one_agtype_from_variadic_args() + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('direct_access'); + +-- +-- Section 1: Scalar Comparison Fast Path Tests +-- +-- These tests exercise the compare_agtype_scalar_containers() fast path +-- which uses fill_agtype_value_no_copy() for read-only comparisons. +-- + +-- Integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 2, 2 > 1, 1 = 1, 1 <> 2 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 100 < 50, 100 > 50, 100 = 100, 100 <> 100 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Float comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.5 < 2.5, 2.5 > 1.5, 1.5 = 1.5, 1.5 <> 2.5 +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- String comparisons (tests no-copy string pointer) +SELECT * FROM cypher('direct_access', $$ + RETURN 'abc' < 'abd', 'abd' > 'abc', 'abc' = 'abc', 'abc' <> 'abd' +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +SELECT * FROM cypher('direct_access', $$ + RETURN 'hello world' < 'hello worlds', 'test' > 'TEST' +$$) AS (lt agtype, gt agtype); + +-- Boolean comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN false < true, true > false, true = true, false <> true +$$) AS (lt agtype, gt agtype, eq agtype, ne agtype); + +-- Null comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN null = null, null <> null +$$) AS (eq agtype, ne agtype); + +-- Mixed numeric type comparisons (integer vs float) +SELECT * FROM cypher('direct_access', $$ + RETURN 1 < 1.5, 2.0 > 1, 1.0 = 1 +$$) AS (lt agtype, gt agtype, eq agtype); + +-- Numeric type comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 1.234::numeric < 1.235::numeric, + 1.235::numeric > 1.234::numeric, + 1.234::numeric = 1.234::numeric +$$) AS (lt agtype, gt agtype, eq agtype); + +-- +-- Section 2: ORDER BY Tests (exercises comparison fast path) +-- +-- ORDER BY uses compare_agtype_containers_orderability which now has +-- a fast path for scalar comparisons. +-- + +-- Integer ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n +$$) AS (n agtype); + +SELECT * FROM cypher('direct_access', $$ + UNWIND [5, 3, 8, 1, 9, 2, 7, 4, 6] AS n + RETURN n ORDER BY n DESC +$$) AS (n agtype); + +-- String ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND ['banana', 'apple', 'cherry', 'date'] AS s + RETURN s ORDER BY s +$$) AS (s agtype); + +-- Float ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [3.14, 2.71, 1.41, 1.73] AS f + RETURN f ORDER BY f +$$) AS (f agtype); + +-- Boolean ORDER BY +SELECT * FROM cypher('direct_access', $$ + UNWIND [true, false, true, false] AS b + RETURN b ORDER BY b +$$) AS (b agtype); + +-- +-- Section 3: Vertex/Edge Direct ID Access Tests +-- +-- These tests exercise the direct pairs[0] access optimization for +-- extracting graphid from vertices and edges during comparison. +-- + +-- Create test data +SELECT * FROM cypher('direct_access', $$ + CREATE (a:Person {name: 'Alice', age: 30}), + (b:Person {name: 'Bob', age: 25}), + (c:Person {name: 'Charlie', age: 35}), + (d:Person {name: 'Diana', age: 28}), + (e:Person {name: 'Eve', age: 32}), + (a)-[:KNOWS {since: 2020}]->(b), + (b)-[:KNOWS {since: 2019}]->(c), + (c)-[:KNOWS {since: 2021}]->(d), + (d)-[:KNOWS {since: 2018}]->(e), + (e)-[:KNOWS {since: 2022}]->(a) +$$) AS (result agtype); + +-- Test max() on vertices (uses compare_agtype_scalar_values with AGTV_VERTEX) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN max(p) +$$) AS (max_vertex agtype); + +-- Test min() on vertices +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN min(p) +$$) AS (min_vertex agtype); + +-- Test max() on edges (uses compare_agtype_scalar_values with AGTV_EDGE) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN max(r) +$$) AS (max_edge agtype); + +-- Test min() on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN min(r) +$$) AS (min_edge agtype); + +-- ORDER BY on vertices (uses direct id comparison) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p +$$) AS (name agtype); + +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person) + RETURN p.name ORDER BY p DESC +$$) AS (name agtype); + +-- ORDER BY on edges +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN r.since ORDER BY r +$$) AS (since agtype); + +-- Vertex comparison in WHERE +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person), (b:Person) + WHERE a < b + RETURN a.name, b.name +$$) AS (a_name agtype, b_name agtype); + +-- +-- Section 4: Fast Path for get_one_agtype_from_variadic_args +-- +-- These tests exercise the fast path that bypasses extract_variadic_args +-- when the argument is already agtype. +-- + +-- Direct agtype comparison operators (use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 42 = 42, 42 <> 43, 42 < 100, 42 > 10 +$$) AS (eq agtype, ne agtype, lt agtype, gt agtype); + +-- Arithmetic operators (also use the fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN 10 + 5, 10 - 5, 10 * 5, 10 / 5 +$$) AS (add agtype, sub agtype, mul agtype, div agtype); + +-- String functions that take agtype args +SELECT * FROM cypher('direct_access', $$ + RETURN toUpper('hello'), toLower('WORLD'), size('test') +$$) AS (upper agtype, lower agtype, sz agtype); + +-- Type checking functions +SELECT * FROM cypher('direct_access', $$ + RETURN toInteger('42'), toFloat('3.14'), toString(42) +$$) AS (int_val agtype, float_val agtype, str_val agtype); + +-- +-- Section 5: Direct Field Access for Accessor Functions +-- +-- These tests exercise the direct field access macros in id(), start_id(), +-- end_id(), label(), and properties() functions. +-- + +-- Test id() on vertices (uses AGTYPE_VERTEX_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN id(p) +$$) AS (vertex_id agtype); + +-- Test id() on edges (uses AGTYPE_EDGE_GET_ID macro - index 0) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN id(r) +$$) AS (edge_id agtype); + +-- Test start_id() on edges (uses AGTYPE_EDGE_GET_START_ID macro - index 3) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN start_id(r), id(a) +$$) AS (start_id agtype, alice_id agtype); + +-- Test end_id() on edges (uses AGTYPE_EDGE_GET_END_ID macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN end_id(r), id(b) +$$) AS (end_id agtype, bob_id agtype); + +-- Test label() on vertices (uses AGTYPE_VERTEX_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN label(p) +$$) AS (vertex_label agtype); + +-- Test label() on edges (uses AGTYPE_EDGE_GET_LABEL macro - index 1) +SELECT * FROM cypher('direct_access', $$ + MATCH ()-[r:KNOWS]->() + RETURN DISTINCT label(r) +$$) AS (edge_label agtype); + +-- Test properties() on vertices (uses AGTYPE_VERTEX_GET_PROPERTIES macro - index 2) +SELECT * FROM cypher('direct_access', $$ + MATCH (p:Person {name: 'Alice'}) + RETURN properties(p) +$$) AS (vertex_props agtype); + +-- Test properties() on edges (uses AGTYPE_EDGE_GET_PROPERTIES macro - index 4) +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'}) + RETURN properties(r) +$$) AS (edge_props agtype); + +-- Combined accessor test - verify all fields are accessible +SELECT * FROM cypher('direct_access', $$ + MATCH (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person) + RETURN id(a), label(a), properties(a).name, + id(r), start_id(r), end_id(r), label(r), properties(r).since, + id(b), label(b), properties(b).name +$$) AS (a_id agtype, a_label agtype, a_name agtype, + r_id agtype, r_start agtype, r_end agtype, r_label agtype, r_since agtype, + b_id agtype, b_label agtype, b_name agtype); + +-- +-- Section 6: Mixed Comparisons and Edge Cases +-- + +-- Array comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN [1,2,3] = [1,2,3], [1,2,3] < [1,2,4] +$$) AS (eq agtype, lt agtype); + +-- Object comparisons (should NOT use scalar fast path) +SELECT * FROM cypher('direct_access', $$ + RETURN {a:1, b:2} = {a:1, b:2} +$$) AS (eq agtype); + +-- Large integer comparisons +SELECT * FROM cypher('direct_access', $$ + RETURN 9223372036854775807 > 9223372036854775806, + -9223372036854775808 < -9223372036854775807 +$$) AS (big_gt agtype, neg_lt agtype); + +-- Empty string comparison +SELECT * FROM cypher('direct_access', $$ + RETURN '' < 'a', '' = '' +$$) AS (lt agtype, eq agtype); + +-- Special float values +SELECT * FROM cypher('direct_access', $$ + RETURN 0.0 = -0.0 +$$) AS (zero_eq agtype); + +-- +-- Cleanup +-- +SELECT drop_graph('direct_access', true); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 02fc3221c..f2458a30b 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -5409,10 +5409,24 @@ Datum age_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("id() argument must be a vertex, an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: id is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_ID(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_ID(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("id() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5447,10 +5461,11 @@ Datum age_start_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("start_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "start_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: start_id is at index 3 for edge + * objects due to key length sorting (id=0, label=1, end_id=2, start_id=3). + */ + agtv_result = AGTYPE_EDGE_GET_START_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -5485,10 +5500,11 @@ Datum age_end_id(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("end_id() argument must be an edge or null"))); - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "end_id"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_INTEGER); + /* + * Direct field access optimization: end_id is at index 2 for edge + * objects due to key length sorting (id=0, label=1, end_id=2). + */ + agtv_result = AGTYPE_EDGE_GET_END_ID(agtv_object); PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -6038,10 +6054,25 @@ Datum age_properties(PG_FUNCTION_ARGS) errmsg("properties() argument must be a vertex, an edge or null"))); } - agtv_result = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_object, "properties"); - - Assert(agtv_result != NULL); - Assert(agtv_result->type = AGTV_OBJECT); + /* + * Direct field access optimization: properties is at index 2 for vertex + * (id=0, label=1, properties=2) and index 4 for edge (id=0, label=1, + * end_id=2, start_id=3, properties=4) due to key length sorting. + */ + if (agtv_object->type == AGTV_VERTEX) + { + agtv_result = AGTYPE_VERTEX_GET_PROPERTIES(agtv_object); + } + else if (agtv_object->type == AGTV_EDGE) + { + agtv_result = AGTYPE_EDGE_GET_PROPERTIES(agtv_object); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("properties() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result)); } @@ -7170,8 +7201,24 @@ Datum age_label(PG_FUNCTION_ARGS) } - /* extract the label agtype value from the vertex or edge */ - label = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv_value, "label"); + /* + * Direct field access optimization: label is at a fixed index for both + * vertex and edge objects due to key length sorting. + */ + if (agtv_value->type == AGTV_VERTEX) + { + label = AGTYPE_VERTEX_GET_LABEL(agtv_value); + } + else if (agtv_value->type == AGTV_EDGE) + { + label = AGTYPE_EDGE_GET_LABEL(agtv_value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label() unexpected argument type"))); + } PG_RETURN_POINTER(agtype_value_to_agtype(label)); } @@ -10507,6 +10554,59 @@ agtype *get_one_agtype_from_variadic_args(FunctionCallInfo fcinfo, Oid *types = NULL; agtype *agtype_result = NULL; + /* + * Fast path optimization: For non-variadic calls where the argument + * is already an agtype, we can avoid the overhead of extract_variadic_args + * which allocates three arrays. This is the common case for most agtype + * comparison and arithmetic operators. + */ + if (!get_fn_expr_variadic(fcinfo->flinfo)) + { + int total_args = PG_NARGS(); + int actual_nargs = total_args - variadic_offset; + + /* Verify expected number of arguments */ + if (actual_nargs != expected_nargs) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("number of args %d does not match expected %d", + actual_nargs, expected_nargs))); + } + + /* Check for SQL NULL */ + if (PG_ARGISNULL(variadic_offset)) + { + return NULL; + } + + /* Check if the argument is already an agtype */ + if (get_fn_expr_argtype(fcinfo->flinfo, variadic_offset) == AGTYPEOID) + { + agtype_container *agtc; + + agtype_result = DATUM_GET_AGTYPE_P(PG_GETARG_DATUM(variadic_offset)); + agtc = &agtype_result->root; + + /* + * Is this a scalar (scalars are stored as one element arrays)? + * If so, test for agtype NULL. + */ + if (AGTYPE_CONTAINER_IS_SCALAR(agtc) && + AGTE_IS_NULL(agtc->children[0])) + { + return NULL; + } + + return agtype_result; + } + + /* + * Not an agtype, need to convert. Fall through to use + * extract_variadic_args for type conversion handling. + */ + } + + /* Standard path using extract_variadic_args */ nargs = extract_variadic_args(fcinfo, variadic_offset, false, &args, &types, &nulls); /* throw an error if the number of args is not the expected number */ diff --git a/src/backend/utils/adt/agtype_util.c b/src/backend/utils/adt/agtype_util.c index 01a965cdd..b39723413 100644 --- a/src/backend/utils/adt/agtype_util.c +++ b/src/backend/utils/adt/agtype_util.c @@ -41,6 +41,14 @@ #include "utils/agtype_ext.h" +/* + * Extended type header macros - must match definitions in agtype_ext.c. + * These are used for deserializing extended agtype values (INTEGER, FLOAT, + * VERTEX, EDGE, PATH) from their binary representation. + */ +#define AGT_HEADER_TYPE uint32 +#define AGT_HEADER_SIZE sizeof(AGT_HEADER_TYPE) + /* * Maximum number of elements in an array (or key/value pairs in an object). * This is limited by two things: the size of the agtentry array must fit @@ -56,6 +64,11 @@ static void fill_agtype_value(agtype_container *container, int index, char *base_addr, uint32 offset, agtype_value *result); +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result); +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b); static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b); static agtype *convert_to_agtype(agtype_value *val); static void convert_agtype_value(StringInfo buffer, agtentry *header, @@ -264,6 +277,24 @@ int compare_agtype_containers_orderability(agtype_container *a, agtype_iterator *itb; int res = 0; + /* + * Fast path optimization for scalar values. + * + * The most common case in ORDER BY and comparison operations is comparing + * scalar values (integers, strings, floats, etc.). For these cases, we can + * avoid the overhead of the full iterator machinery by directly extracting + * and comparing the scalar values. + * + * This provides significant performance improvement because: + * 1. We avoid allocating two agtype_iterator structures + * 2. We avoid the iterator state machine overhead + * 3. We use no-copy extraction where possible + */ + if (AGTYPE_CONTAINER_IS_SCALAR(a) && AGTYPE_CONTAINER_IS_SCALAR(b)) + { + return compare_agtype_scalar_containers(a, b); + } + ita = agtype_iterator_init(a); itb = agtype_iterator_init(b); @@ -751,6 +782,173 @@ static void fill_agtype_value(agtype_container *container, int index, } } +/* + * A helper function to fill in an agtype_value WITHOUT making deep copies. + * This is used for read-only comparison operations where the agtype_value + * will not outlive the container data. The caller MUST NOT free the + * agtype_value content or use it after the container is freed. + * + * This function provides significant performance improvements for comparison + * operations by avoiding palloc/memcpy for strings and numerics. + * + * Note: For AGTV_STRING, val.string.val points directly into container data. + * Note: For AGTV_NUMERIC, val.numeric points directly into container data. + * Note: Extended types (VERTEX, EDGE, PATH) still require deserialization, + * so they use the standard fill_agtype_value path. + */ +static void fill_agtype_value_no_copy(agtype_container *container, int index, + char *base_addr, uint32 offset, + agtype_value *result) +{ + agtentry entry = container->children[index]; + + if (AGTE_IS_NULL(entry)) + { + result->type = AGTV_NULL; + } + else if (AGTE_IS_STRING(entry)) + { + result->type = AGTV_STRING; + /* Point directly into the container data - no copy */ + result->val.string.val = base_addr + offset; + result->val.string.len = get_agtype_length(container, index); + } + else if (AGTE_IS_NUMERIC(entry)) + { + result->type = AGTV_NUMERIC; + /* Point directly into the container data - no copy */ + result->val.numeric = (Numeric)(base_addr + INTALIGN(offset)); + } + else if (AGTE_IS_AGTYPE(entry)) + { + /* + * For extended types (INTEGER, FLOAT, VERTEX, EDGE, PATH), we need + * to deserialize. INTEGER and FLOAT don't allocate, but composite + * types (VERTEX, EDGE, PATH) do. For simple scalar comparisons, + * we handle INTEGER and FLOAT directly here. + */ + char *base = base_addr + INTALIGN(offset); + AGT_HEADER_TYPE agt_header = *((AGT_HEADER_TYPE *)base); + + switch (agt_header) + { + case AGT_HEADER_INTEGER: + result->type = AGTV_INTEGER; + result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE)); + break; + + case AGT_HEADER_FLOAT: + result->type = AGTV_FLOAT; + result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE)); + break; + + default: + /* + * For VERTEX, EDGE, PATH - use standard deserialization. + * These are composite types that require full parsing. + */ + ag_deserialize_extended_type(base_addr, offset, result); + break; + } + } + else if (AGTE_IS_BOOL_TRUE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = true; + } + else if (AGTE_IS_BOOL_FALSE(entry)) + { + result->type = AGTV_BOOL; + result->val.boolean = false; + } + else + { + Assert(AGTE_IS_CONTAINER(entry)); + result->type = AGTV_BINARY; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = + (agtype_container *)(base_addr + INTALIGN(offset)); + result->val.binary.len = get_agtype_length(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Fast path comparison for scalar agtype containers. + * + * This function compares two scalar containers directly without the overhead + * of the full iterator machinery. It extracts the scalar values using no-copy + * fill and compares them directly. + * + * Returns: negative if a < b, 0 if a == b, positive if a > b + */ +static int compare_agtype_scalar_containers(agtype_container *a, + agtype_container *b) +{ + agtype_value va; + agtype_value vb; + char *base_addr_a; + char *base_addr_b; + int result; + bool need_free_a = false; + bool need_free_b = false; + + Assert(AGTYPE_CONTAINER_IS_SCALAR(a)); + Assert(AGTYPE_CONTAINER_IS_SCALAR(b)); + + /* Scalars are stored as single-element arrays */ + base_addr_a = (char *)&a->children[1]; + base_addr_b = (char *)&b->children[1]; + + /* Use no-copy fill to avoid allocations for simple types */ + fill_agtype_value_no_copy(a, 0, base_addr_a, 0, &va); + fill_agtype_value_no_copy(b, 0, base_addr_b, 0, &vb); + + /* + * Check if we need to free the values after comparison. + * Only VERTEX, EDGE, and PATH types allocate memory in no-copy mode. + */ + if (va.type == AGTV_VERTEX || va.type == AGTV_EDGE || va.type == AGTV_PATH) + { + need_free_a = true; + } + if (vb.type == AGTV_VERTEX || vb.type == AGTV_EDGE || vb.type == AGTV_PATH) + { + need_free_b = true; + } + + /* + * Compare the scalar values. If types match or are numeric compatible, + * use scalar comparison. Otherwise, use type-based ordering. + */ + if ((va.type == vb.type) || + ((va.type == AGTV_INTEGER || va.type == AGTV_FLOAT || + va.type == AGTV_NUMERIC) && + (vb.type == AGTV_INTEGER || vb.type == AGTV_FLOAT || + vb.type == AGTV_NUMERIC))) + { + result = compare_agtype_scalar_values(&va, &vb); + } + else + { + /* Type-defined order */ + result = (get_type_sort_priority(va.type) < + get_type_sort_priority(vb.type)) ? -1 : 1; + } + + /* Free any allocated memory from composite types */ + if (need_free_a) + { + pfree_agtype_value_content(&va); + } + if (need_free_b) + { + pfree_agtype_value_content(&vb); + } + + return result; +} + /* * Push agtype_value into agtype_parse_state. * @@ -1597,7 +1795,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_VERTEX: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_VERTEX_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1606,7 +1805,8 @@ void agtype_hash_scalar_value_extended(const agtype_value *scalar_val, case AGTV_EDGE: { graphid id; - agtype_value *id_agt = GET_AGTYPE_VALUE_OBJECT_VALUE(scalar_val, "id"); + agtype_value *id_agt; + id_agt = AGTYPE_EDGE_GET_ID(scalar_val); id = id_agt->val.int_value; tmp = DatumGetUInt64(DirectFunctionCall2( hashint8extended, Float8GetDatum(id), UInt64GetDatum(seed))); @@ -1704,8 +1904,8 @@ static bool equals_agtype_scalar_value(agtype_value *a, agtype_value *b) case AGTV_VERTEX: { graphid a_graphid, b_graphid; - a_graphid = a->val.object.pairs[0].value.val.int_value; - b_graphid = b->val.object.pairs[0].value.val.int_value; + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; return a_graphid == b_graphid; } @@ -1790,16 +1990,33 @@ int compare_agtype_scalar_values(agtype_value *a, agtype_value *b) return compare_two_floats_orderability(a->val.float_value, b->val.float_value); case AGTV_VERTEX: - case AGTV_EDGE: { - agtype_value *a_id, *b_id; graphid a_graphid, b_graphid; - a_id = GET_AGTYPE_VALUE_OBJECT_VALUE(a, "id"); - b_id = GET_AGTYPE_VALUE_OBJECT_VALUE(b, "id"); + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_VERTEX_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_VERTEX_GET_ID(b)->val.int_value; + + if (a_graphid == b_graphid) + { + return 0; + } + else if (a_graphid > b_graphid) + { + return 1; + } + else + { + return -1; + } + } + case AGTV_EDGE: + { + graphid a_graphid, b_graphid; - a_graphid = a_id->val.int_value; - b_graphid = b_id->val.int_value; + /* Direct field access optimization using macros defined in agtype.h. */ + a_graphid = AGTYPE_EDGE_GET_ID(a)->val.int_value; + b_graphid = AGTYPE_EDGE_GET_ID(b)->val.int_value; if (a_graphid == b_graphid) { diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index ab2ba08cc..ec9125073 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -322,6 +322,109 @@ enum agtype_value_type AGTV_BINARY }; +/* + * Direct field access indices for vertex and edge objects. + * + * Vertex and edge objects are serialized with keys sorted by length first, + * then lexicographically (via uniqueify_agtype_object). This means field + * positions are deterministic and can be accessed directly without binary + * search, providing O(1) access instead of O(log n). + * + * Vertex keys by length: "id"(2), "label"(5), "properties"(10) + * Edge keys by length: "id"(2), "label"(5), "end_id"(6), "start_id"(8), "properties"(10) + */ +#define VERTEX_FIELD_ID 0 +#define VERTEX_FIELD_LABEL 1 +#define VERTEX_FIELD_PROPERTIES 2 +#define VERTEX_NUM_FIELDS 3 + +#define EDGE_FIELD_ID 0 +#define EDGE_FIELD_LABEL 1 +#define EDGE_FIELD_END_ID 2 +#define EDGE_FIELD_START_ID 3 +#define EDGE_FIELD_PROPERTIES 4 +#define EDGE_NUM_FIELDS 5 + +/* + * Macros for direct field access from vertex/edge agtype_value objects. + * These avoid the binary search overhead of GET_AGTYPE_VALUE_OBJECT_VALUE. + * Validation is integrated - macros will error if field count is incorrect. + * Uses GCC statement expressions to allow validation within expressions. + */ +#define AGTYPE_VERTEX_GET_ID(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_ID].value; \ + }) +#define AGTYPE_VERTEX_GET_LABEL(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_LABEL].value; \ + }) +#define AGTYPE_VERTEX_GET_PROPERTIES(v) \ + ({ \ + if ((v)->val.object.num_pairs != VERTEX_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid vertex structure: expected %d fields, found %d", \ + VERTEX_NUM_FIELDS, (v)->val.object.num_pairs))); \ + &(v)->val.object.pairs[VERTEX_FIELD_PROPERTIES].value; \ + }) + +#define AGTYPE_EDGE_GET_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_ID].value; \ + }) +#define AGTYPE_EDGE_GET_LABEL(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_LABEL].value; \ + }) +#define AGTYPE_EDGE_GET_END_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_END_ID].value; \ + }) +#define AGTYPE_EDGE_GET_START_ID(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_START_ID].value; \ + }) +#define AGTYPE_EDGE_GET_PROPERTIES(e) \ + ({ \ + if ((e)->val.object.num_pairs != EDGE_NUM_FIELDS) \ + ereport(ERROR, \ + (errcode(ERRCODE_DATA_CORRUPTED), \ + errmsg("invalid edge structure: expected %d fields, found %d", \ + EDGE_NUM_FIELDS, (e)->val.object.num_pairs))); \ + &(e)->val.object.pairs[EDGE_FIELD_PROPERTIES].value; \ + }) + /* * agtype_value: In-memory representation of agtype. This is a convenient * deserialized representation, that can easily support using the "val" From 7112a9a69a16cc23121b6141867705003751c0cb Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 17 Jan 2026 03:18:10 -0800 Subject: [PATCH 22/25] Upgrade Jest to v29 for node: protocol compatibility (#2307) Note: This PR was created with AI tools and a human. The pg-connection-string module (dependency of pg) now uses the node: protocol prefix for built-in modules (e.g., require('node:process')). Jest 26 does not support this syntax, causing test failures. Changes: - Upgrade jest from ^26.6.3 to ^29.7.0 - Upgrade ts-jest from ^26.5.1 to ^29.4.6 - Upgrade @types/jest from ^26.0.20 to ^29.5.14 - Update typescript to ^4.9.5 This also resolves 19 npm audit vulnerabilities (17 moderate, 2 high) that existed in the older Jest 26 dependency tree. modified: drivers/nodejs/package.json --- drivers/nodejs/package.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nodejs/package.json b/drivers/nodejs/package.json index 9f88bc2ba..6be11c780 100644 --- a/drivers/nodejs/package.json +++ b/drivers/nodejs/package.json @@ -33,7 +33,7 @@ "pg": ">=6.0.0" }, "devDependencies": { - "@types/jest": "^26.0.20", + "@types/jest": "^29.5.14", "@types/pg": "^7.14.10", "@typescript-eslint/eslint-plugin": "^4.22.1", "@typescript-eslint/parser": "^4.22.1", @@ -44,8 +44,8 @@ "eslint-plugin-jest": "^24.3.6", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", - "jest": "^26.6.3", - "ts-jest": "^26.5.1", - "typescript": "^4.1.5" + "jest": "^29.7.0", + "ts-jest": "^29.4.6", + "typescript": "^4.9.5" } } From 49b186eac216a6466a7be8df266495120a2feae7 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 18 Jan 2026 10:02:55 -0800 Subject: [PATCH 23/25] Fix Issue 1884: Ambiguous column reference (#2306) Fix Issue 1884: Ambiguous column reference and invalid AGT header errors. Note: This PR was created with AI tools and a human, or 2. This commit addresses two related bugs that occur when using SET to store graph elements (vertices, edges, paths) as property values: Issue 1884 - "column reference is ambiguous" error: When a Cypher query uses the same variable in both the SET expression RHS and the RETURN clause (e.g., SET n.prop = n RETURN n), PostgreSQL would report "column reference is ambiguous" because the variable appeared in multiple subqueries without proper qualification. Solution: The fix for this issue was already in place through the target entry naming scheme that qualifies column references. "Invalid AGT header value" offset error: When deserializing nested VERTEX, EDGE, or PATH values stored in properties, the system would fail with errors like "Invalid AGT header value: 0x00000041". This occurred because ag_serialize_extended_type() did not include alignment padding (padlen) in the agtentry length calculation for these types, while fill_agtype_value() uses INTALIGN() when reading, causing offset mismatch. Solution: Modified ag_serialize_extended_type() in agtype_ext.c to include padlen in the agtentry length for VERTEX, EDGE, and PATH cases, matching the existing pattern used for INTEGER, FLOAT, and NUMERIC types: *agtentry = AGTENTRY_IS_AGTYPE | (padlen + (AGTENTRY_OFFLENMASK & ...)); This ensures the serialized length accounts for alignment padding, allowing correct deserialization of nested graph elements. Appropriate regression tests were added to verify the fixes. Co-authored by: Zainab Saad <105385638+Zainab-Saad@users.noreply.github.com> modified: regress/expected/cypher_set.out modified: regress/sql/cypher_set.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/utils/adt/agtype_ext.c --- regress/expected/cypher_set.out | 266 +++++++++++++++++++++++++++++ regress/sql/cypher_set.sql | 164 ++++++++++++++++++ src/backend/parser/cypher_clause.c | 19 ++- src/backend/utils/adt/agtype_ext.c | 8 +- 4 files changed, 451 insertions(+), 6 deletions(-) diff --git a/regress/expected/cypher_set.out b/regress/expected/cypher_set.out index 1d24a7f9b..239234ed6 100644 --- a/regress/expected/cypher_set.out +++ b/regress/expected/cypher_set.out @@ -988,6 +988,245 @@ SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); --- (0 rows) +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); +NOTICE: graph "issue_1884" has been created + create_graph +-------------- + +(1 row) + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1", "self": {"id": 844424930131969, "label": "TestA1", "properties": {"name": "A1"}}::vertex}}::vertex +(1 row) + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + a | e | b +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "A"}}::vertex | {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge | {"id": 1125899906842626, "label": "TestA2", "properties": {"edge": {"id": 1407374883553281, "label": "LINK", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {"w": 1}}::edge, "name": "B"}}::vertex +(1 row) + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + e +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 1970324836974593, "label": "REL", "end_id": 1688849860263938, "start_id": 1688849860263937, "properties": {"dst": {"id": 1688849860263938, "label": "TestA3", "properties": {"name": "Y"}}::vertex, "src": {"id": 1688849860263937, "label": "TestA3", "properties": {"name": "X"}}::vertex}}::edge +(1 row) + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1, "copy": {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex}}::vertex +(1 row) + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + copy +------------------------------------------------------------------------------- + {"id": 2251799813685249, "label": "TestB1", "properties": {"val": 1}}::vertex +(1 row) + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + a +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2533274790395905, "label": "TestB2", "properties": {"name": "start", "myEdge": {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge}}::vertex +(1 row) + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + edge +-------------------------------------------------------------------------------------------------------------------------------------- + {"id": 2814749767106561, "label": "B2REL", "end_id": 2533274790395906, "start_id": 2533274790395905, "properties": {"x": 100}}::edge +(1 row) + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex +(1 row) + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3096224743817217, "label": "TestB3", "properties": {"myPath": [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path}}::vertex +(1 row) + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 3096224743817217, "label": "TestB3", "properties": {}}::vertex, {"id": 3377699720527873, "label": "B3REL", "end_id": 3096224743817218, "start_id": 3096224743817217, "properties": {}}::edge, {"id": 3096224743817218, "label": "TestB3", "properties": {}}::vertex]::path +(1 row) + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"arr": [{"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex], "tag": "arrtest"}}::vertex +(1 row) + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + elem +--------------------------------------------------------------------------------------- + {"id": 3659174697238529, "label": "TestC1", "properties": {"tag": "arrtest"}}::vertex +(1 row) + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + result +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"obj": {"node": {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex}, "tag": "maptest"}}::vertex +(1 row) + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + node +--------------------------------------------------------------------------------------- + {"id": 3940649673949185, "label": "TestC2", "properties": {"tag": "maptest"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4222124650659841, "label": "TestD1", "properties": {"ref": {"id": 4222124650659841, "label": "TestD1", "properties": {"name": "merged"}}::vertex, "name": "merged"}}::vertex +(1 row) + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4503599627370497, "label": "TestD2", "properties": {"ref": {"id": 4503599627370497, "label": "TestD2", "properties": {"name": "created"}}::vertex, "name": "created"}}::vertex +(1 row) + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + result +---------------------------------------------------------------------------------------------------------------------------------------- + {"id": 4785074604081153, "label": "TestE1", "properties": {"myId": 4785074604081153, "name": "functest", "myLabel": "TestE1"}}::vertex +(1 row) + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + a +----------------------------------------------------------------------- + {"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex +(1 row) + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + a +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"id": 5066549580791809, "label": "TestE2", "properties": {"pathRels": [{"id": 5348024557502465, "label": "E2REL", "end_id": 5066549580791810, "start_id": 5066549580791809, "properties": {}}::edge], "pathNodes": [{"id": 5066549580791809, "label": "TestE2", "properties": {}}::vertex, {"id": 5066549580791810, "label": "TestE2", "properties": {}}::vertex]}}::vertex +(1 row) + -- -- Clean up -- @@ -1038,6 +1277,33 @@ NOTICE: graph "issue_1634" has been dropped (1 row) +SELECT drop_graph('issue_1884', true); +NOTICE: drop cascades to 19 other objects +DETAIL: drop cascades to table issue_1884._ag_label_vertex +drop cascades to table issue_1884._ag_label_edge +drop cascades to table issue_1884."TestA1" +drop cascades to table issue_1884."TestA2" +drop cascades to table issue_1884."LINK" +drop cascades to table issue_1884."TestA3" +drop cascades to table issue_1884."REL" +drop cascades to table issue_1884."TestB1" +drop cascades to table issue_1884."TestB2" +drop cascades to table issue_1884."B2REL" +drop cascades to table issue_1884."TestB3" +drop cascades to table issue_1884."B3REL" +drop cascades to table issue_1884."TestC1" +drop cascades to table issue_1884."TestC2" +drop cascades to table issue_1884."TestD1" +drop cascades to table issue_1884."TestD2" +drop cascades to table issue_1884."TestE1" +drop cascades to table issue_1884."TestE2" +drop cascades to table issue_1884."E2REL" +NOTICE: graph "issue_1884" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/sql/cypher_set.sql b/regress/sql/cypher_set.sql index a2667153d..e745d5d6e 100644 --- a/regress/sql/cypher_set.sql +++ b/regress/sql/cypher_set.sql @@ -379,6 +379,169 @@ SELECT * FROM cypher('issue_1634', $$ MERGE (v:PERSION {id: '1'}) SELECT * FROM cypher('issue_1634', $$ MATCH (u) DELETE (u) $$) AS (u agtype); +-- +-- Issue 1884: column reference is ambiguous when using same variable in +-- SET expression and RETURN clause +-- +-- These tests cover: +-- 1. "column reference is ambiguous" error when variable is used in both +-- SET expression RHS (e.g., SET n.prop = n) and RETURN clause +-- 2. "Invalid AGT header value" error caused by incorrect offset calculation +-- when nested VERTEX/EDGE/PATH values are serialized in properties +-- +-- Tests use isolated data to keep output manageable and avoid cumulative nesting +-- +SELECT * FROM create_graph('issue_1884'); + +-- ============================================================================ +-- Test Group A: Basic "column reference is ambiguous" fix (Issue 1884) +-- ============================================================================ + +-- Test A1: Core issue - SET n.prop = n with RETURN n (the original bug) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestA1 {name: 'A1'}) + SET n.self = n + RETURN n +$$) AS (result agtype); + +-- Test A2: Multiple variables in SET and RETURN +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA2 {name: 'A'})-[e:LINK {w: 1}]->(b:TestA2 {name: 'B'}) + SET a.edge = e, b.edge = e + RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +-- Test A3: SET edge property to node reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestA3 {name: 'X'})-[e:REL]->(b:TestA3 {name: 'Y'}) + SET e.src = a, e.dst = b + RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- Test Group B: Nested VERTEX/EDGE/PATH serialization (offset error fix) +-- ============================================================================ + +-- Test B1: Vertex nested in vertex property (tests VERTEX serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestB1 {val: 1}) + SET n.copy = n + RETURN n +$$) AS (result agtype); + +-- Verify nested vertex can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB1) + RETURN n.copy +$$) AS (copy agtype); + +-- Test B2: Edge nested in node property (tests EDGE serialization) +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB2 {name: 'start'})-[e:B2REL {x: 100}]->(b:TestB2 {name: 'end'}) + SET a.myEdge = e + RETURN a +$$) AS (a agtype); + +-- Verify nested edge can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB2 {name: 'start'}) + RETURN n.myEdge +$$) AS (edge agtype); + +-- Test B3: Path nested in node property (tests PATH serialization) +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestB3)-[e:B3REL]->(b:TestB3) + RETURN a +$$) AS (a agtype); + +-- Then match the path and set it (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestB3)-[e:B3REL]->(b:TestB3) + SET a.myPath = p + RETURN a +$$) AS (a agtype); + +-- Verify nested path can be read back +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestB3) + WHERE n.myPath IS NOT NULL + RETURN n.myPath +$$) AS (path agtype); + +-- ============================================================================ +-- Test Group C: Nested structures in arrays and maps +-- ============================================================================ + +-- Test C1: Vertex in array +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC1 {tag: 'arrtest'}) + SET n.arr = [n] + RETURN n +$$) AS (result agtype); + +-- Verify array with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC1) + RETURN n.arr[0] +$$) AS (elem agtype); + +-- Test C2: Vertex in map +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestC2 {tag: 'maptest'}) + SET n.obj = {node: n} + RETURN n +$$) AS (result agtype); + +-- Verify map with nested vertex +SELECT * FROM cypher('issue_1884', $$ + MATCH (n:TestC2) + RETURN n.obj.node +$$) AS (node agtype); + +-- ============================================================================ +-- Test Group D: MERGE and CREATE with self-reference +-- ============================================================================ + +-- Test D1: MERGE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + MERGE (n:TestD1 {name: 'merged'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- Test D2: CREATE with SET self-reference +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestD2 {name: 'created'}) + SET n.ref = n + RETURN n +$$) AS (result agtype); + +-- ============================================================================ +-- Test Group E: Functions with variable references +-- ============================================================================ + +-- Test E1: id() and label() functions +SELECT * FROM cypher('issue_1884', $$ + CREATE (n:TestE1 {name: 'functest'}) + SET n.myId = id(n), n.myLabel = label(n) + RETURN n +$$) AS (result agtype); + +-- Test E2: nodes() and relationships() with path +-- First create the pattern +SELECT * FROM cypher('issue_1884', $$ + CREATE (a:TestE2)-[e:E2REL]->(b:TestE2) + RETURN a +$$) AS (a agtype); + +-- Then match the path and extract nodes/relationships (MATCH only sees committed data) +SELECT * FROM cypher('issue_1884', $$ + MATCH p = (a:TestE2)-[e:E2REL]->(b:TestE2) + SET a.pathNodes = nodes(p), a.pathRels = relationships(p) + RETURN a +$$) AS (a agtype); + -- -- Clean up -- @@ -387,6 +550,7 @@ DROP FUNCTION set_test; SELECT drop_graph('cypher_set', true); SELECT drop_graph('cypher_set_1', true); SELECT drop_graph('issue_1634', true); +SELECT drop_graph('issue_1884', true); -- -- End diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index acc52349d..9960acd7b 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -71,6 +71,7 @@ #define AGE_VARNAME_MERGE_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"merge_clause" #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" +#define AGE_VARNAME_SET_VALUE AGE_DEFAULT_VARNAME_PREFIX"set_value" /* * In the transformation stage, we need to track @@ -1911,10 +1912,24 @@ cypher_update_information *transform_cypher_set_item_list( ((cypher_map*)set_item->expr)->keep_null = set_item->is_add; } - /* create target entry for the new property value */ + /* + * Create target entry for the new property value. + * + * We use a hidden variable name (AGE_VARNAME_SET_VALUE) for the + * SET expression value to prevent column name conflicts. This is + * necessary when the same variable is used on both the LHS and RHS + * of a SET clause (e.g., SET n.prop = n). Without this, the column + * name derived from the expression (e.g., "n") would duplicate the + * existing column name from the MATCH clause, causing a "column + * reference is ambiguous" error in subsequent clauses like RETURN. + * + * The hidden variable name will be filtered out by expand_pnsi_attrs + * when the targetlist is expanded for subsequent clauses. + */ item->prop_position = (AttrNumber)pstate->p_next_resno; target_item = transform_cypher_item(cpstate, set_item->expr, NULL, - EXPR_KIND_SELECT_TARGET, NULL, + EXPR_KIND_SELECT_TARGET, + AGE_VARNAME_SET_VALUE, false); if (nodeTag(target_item->expr) == T_Aggref) diff --git a/src/backend/utils/adt/agtype_ext.c b/src/backend/utils/adt/agtype_ext.c index 8fc6600d1..7a0ea991d 100644 --- a/src/backend/utils/adt/agtype_ext.c +++ b/src/backend/utils/adt/agtype_ext.c @@ -89,7 +89,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -109,7 +109,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -129,7 +129,7 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry, object_ae += pad_buffer_to_int(buffer); *agtentry = AGTENTRY_IS_AGTYPE | - ((AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); + (padlen + (AGTENTRY_OFFLENMASK & (int)object_ae) + AGT_HEADER_SIZE); break; } @@ -175,7 +175,7 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset, break; default: - elog(ERROR, "Invalid AGT header value."); + ereport(ERROR, (errmsg("Invalid AGT header value: 0x%08x", agt_header))); } } From 365681257b687e10fb3d819980624c8951ffbeb9 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Mon, 19 Jan 2026 22:21:02 +0500 Subject: [PATCH 24/25] Replace libcsv with pg COPY for csv loading (#2310) - Commit also adds permission checks - Resolves a critical memory spike issue on loading large file - Use pg's COPY infrastructure (BeginCopyFrom, NextCopyFromRawFields) for 64KB buffered CSV parsing instead of libcsv - Add byte based flush threshold (64KB) matching COPY behavior for memory safety - Use heap_multi_insert with BulkInsertState for optimized batch inserts - Add per batch memory context to prevent memory growth during large loads - Remove libcsv dependency (libcsv.c, csv.h) - Improves loading performance by 15-20% - No previous regression tests were impacted - Added regression tests for permissions/rls Assisted-by AI Resolved conflict with ExecInitRangeTable --- Makefile | 1 - regress/expected/age_load.out | 189 ++++++++ regress/expected/index.out | 12 +- regress/sql/age_load.sql | 125 ++++++ regress/sql/index.sql | 2 +- src/backend/utils/load/ag_load_edges.c | 388 +++++++++-------- src/backend/utils/load/ag_load_labels.c | 381 ++++++++-------- src/backend/utils/load/age_load.c | 248 ++++++++++- src/backend/utils/load/libcsv.c | 549 ------------------------ src/include/utils/load/ag_load_edges.h | 52 +-- src/include/utils/load/ag_load_labels.h | 50 +-- src/include/utils/load/age_load.h | 27 +- src/include/utils/load/csv.h | 108 ----- 13 files changed, 1000 insertions(+), 1132 deletions(-) delete mode 100644 src/backend/utils/load/libcsv.c delete mode 100644 src/include/utils/load/csv.h diff --git a/Makefile b/Makefile index ffad7d6af..a8faa2bb8 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ OBJS = src/backend/age.o \ src/backend/utils/load/ag_load_labels.o \ src/backend/utils/load/ag_load_edges.o \ src/backend/utils/load/age_load.o \ - src/backend/utils/load/libcsv.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 55d1ff1d6..1f76c31ce 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -454,6 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Test security and permissions +-- +SELECT create_graph('agload_security'); +NOTICE: graph "agload_security" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_security', 'SecEdge'); +NOTICE: ELabel "SecEdge" has been created + create_elabel +--------------- + +(1 row) + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +RESET ROLE; +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied for table Person1 +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied for table SecEdge +RESET ROLE; +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +RESET ROLE; +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 3: Row-Level Security (RLS) +-- +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; +-- Switch to load_test_user +SET ROLE load_test_user; +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +RESET ROLE; +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 4: Constraint checking (CHECK constraint) +-- +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: new row for relation "Person1" violates check constraint "check_bool_true" +DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...). +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true" +DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}). +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_security._ag_label_vertex +drop cascades to table agload_security._ag_label_edge +drop cascades to table agload_security."Person1" +drop cascades to table agload_security."Person2" +drop cascades to table agload_security."SecEdge" +NOTICE: graph "agload_security" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/expected/index.out b/regress/expected/index.out index 745cab269..ec62bf57d 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -264,19 +264,19 @@ $$) as (n agtype); (0 rows) -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; indexname | indexdef -----------------------------+------------------------------------------------------------------------------------------------ + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) - _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) - idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) - Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) - has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) - City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) (10 rows) SET enable_mergejoin = ON; diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index cefcfb4ca..976f050af 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -194,6 +194,131 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t -- SELECT drop_graph('agload_conversion', true); +-- +-- Test security and permissions +-- + +SELECT create_graph('agload_security'); +SELECT create_vlabel('agload_security', 'Person1'); +SELECT create_vlabel('agload_security', 'Person2'); +SELECT create_elabel('agload_security', 'SecEdge'); + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; + +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; + +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; + +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 3: Row-Level Security (RLS) +-- + +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; + +-- Switch to load_test_user +SET ROLE load_test_user; + +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +RESET ROLE; + +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; + +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 4: Constraint checking (CHECK constraint) +-- + +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; + +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); + -- -- End -- diff --git a/regress/sql/index.sql b/regress/sql/index.sql index a6e075c70..d4a4b24a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -165,7 +165,7 @@ SELECT * FROM cypher('cypher_index', $$ $$) as (n agtype); -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 931c6e0dc..c05bf3352 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -16,50 +16,30 @@ * specific language governing permissions and limitations * under the License. */ - #include "postgres.h" -#include "utils/load/ag_load_edges.h" -#include "utils/load/csv.h" +#include "access/heapam.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" -void edge_field_cb(void *field, size_t field_len, void *data) -{ - - csv_edge_reader *cr = (csv_edge_reader*)data; - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); - cr->cur_field += 1; -} +#include "utils/load/ag_load_edges.h" -/* Parser calls this function when it detects end of a row */ -void edge_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single edge row from COPY's raw fields. + * Edge CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + */ +static void process_edge_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + Oid graph_oid, bool load_as_agtype, + batch_insert_state *batch_state) { - - csv_edge_reader *cr = (csv_edge_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - - size_t i, n_fields; int64 start_id_int; graphid start_vertex_graph_id; int start_vertex_type_id; @@ -72,104 +52,92 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) int64 entry_id; TupleTableSlot *slot; - n_fields = cr->cur_field; + char *start_vertex_type; + char *end_vertex_type; + agtype *edge_properties; - if (cr->row == 0) - { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); + /* Generate edge ID */ + entry_id = nextval_internal(label_seq_relid, true); + edge_id = make_graphid(label_id, entry_id); - for (i = 0; icur_field; i++) - { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - edge_id = make_graphid(cr->label_id, entry_id); - - start_id_int = strtol(cr->fields[0], NULL, 10); - start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); - end_id_int = strtol(cr->fields[2], NULL, 10); - end_vertex_type_id = get_label_id(cr->fields[3], cr->graph_oid); - - start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); - end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - - /* Clear the slots contents */ - ExecClearTuple(slot); - - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); - slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); - slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); - slot->tts_values[3] = AGTYPE_P_GET_DATUM( - create_agtype_from_list_i( - cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; - slot->tts_isnull[2] = false; - slot->tts_isnull[3] = false; - - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - batch_state->num_tuples++; - - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + /* Trim whitespace from vertex type names */ + start_vertex_type = trim_whitespace(fields[1]); + end_vertex_type = trim_whitespace(fields[3]); - for (i = 0; i < n_fields; ++i) - { - pfree_if_not_null(cr->fields[i]); - } + /* Parse start vertex info */ + start_id_int = strtol(fields[0], NULL, 10); + start_vertex_type_id = get_label_id(start_vertex_type, graph_oid); - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } + /* Parse end vertex info */ + end_id_int = strtol(fields[2], NULL, 10); + end_vertex_type_id = get_label_id(end_vertex_type, graph_oid); - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; -} + /* Create graphids for start and end vertices */ + start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); + end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); -static int is_space(unsigned char c) -{ - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Build the agtype properties */ + edge_properties = create_agtype_from_list_i(header, fields, + nfields, 4, load_as_agtype); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM(edge_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + + batch_state->buffered_bytes += VARSIZE(edge_properties); + batch_state->num_tuples++; + + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - return 0; + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } } -static int is_term(unsigned char c) +/* + * Create COPY options for CSV parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; + + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load edges from CSV file using pg's COPY infrastructure. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -177,79 +145,133 @@ int create_edges_from_csv_file(char *file_path, int label_id, bool load_as_agtype) { + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Edge Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); + + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); + + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_edge_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + PG_TRY(); { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) + { + if (is_first_row) + { + int i; - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } - PG_TRY(); - { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_edge_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + graph_oid, load_as_agtype, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 1e86bbda4..5b11f68b8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,155 +17,114 @@ * under the License. */ #include "postgres.h" -#include "executor/spi.h" + +#include "access/heapam.h" +#include "access/table.h" #include "catalog/namespace.h" +#include "commands/copy.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/csv.h" - -void vertex_field_cb(void *field, size_t field_len, void *data) -{ - - csv_vertex_reader *cr = (csv_vertex_reader *) data; - - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); - cr->cur_field += 1; -} -void vertex_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single vertex row from COPY's raw fields. + * Vertex CSV format: [id,] [properties...] + */ +static void process_vertex_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + bool id_field_exists, bool load_as_agtype, + int64 *curr_seq_num, + batch_insert_state *batch_state) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - size_t i, n_fields; graphid vertex_id; int64 entry_id; TupleTableSlot *slot; + agtype *vertex_properties; - n_fields = cr->cur_field; - - if (cr->row == 0) + /* Generate or use provided entry_id */ + if (id_field_exists) { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); - - for (i = 0; icur_field; i++) + entry_id = strtol(fields[0], NULL, 10); + if (entry_id > *curr_seq_num) { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); + /* This is needed to ensure the sequence is up-to-date */ + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(label_seq_relid), + Int64GetDatum(entry_id)); + *curr_seq_num = entry_id; } } else { - if (cr->id_field_exists) - { - entry_id = strtol(cr->fields[0], NULL, 10); - if (entry_id > cr->curr_seq_num) - { - DirectFunctionCall2(setval_oid, - ObjectIdGetDatum(cr->label_seq_relid), - Int64GetDatum(entry_id)); - cr->curr_seq_num = entry_id; - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - } + entry_id = nextval_internal(label_seq_relid, true); + } - vertex_id = make_graphid(cr->label_id, entry_id); + vertex_id = make_graphid(label_id, entry_id); - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; - /* Clear the slots contents */ - ExecClearTuple(slot); + /* Clear the slots contents */ + ExecClearTuple(slot); - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - slot->tts_values[1] = AGTYPE_P_GET_DATUM( - create_agtype_from_list(cr->header, cr->fields, - n_fields, entry_id, - cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; + /* Build the agtype properties */ + vertex_properties = create_agtype_from_list(header, fields, + nfields, entry_id, + load_as_agtype); - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM(vertex_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; - batch_state->num_tuples++; + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + batch_state->buffered_bytes += VARSIZE(vertex_properties); + batch_state->num_tuples++; - for (i = 0; i < n_fields; ++i) + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - pfree_if_not_null(cr->fields[i]); + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } - - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } - - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; } -static int is_space(unsigned char c) +/* + * Create COPY options for csv parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; -} -static int is_term(unsigned char c) -{ - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load vertex labels from csv file using pg's COPY infrastructure. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -174,96 +133,146 @@ int create_labels_from_csv_file(char *file_path, bool id_field_exists, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_vertex_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + int64 curr_seq_num = 0; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + if (id_field_exists) { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + */ + curr_seq_num = nextval_internal(label_seq_relid, true); } - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); PG_TRY(); { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist - NULL means all columns */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + if (is_first_row) + { + int i; - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } + + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_vertex_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + id_field_exists, load_as_agtype, + &curr_seq_num, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index c7cf0677f..e4f10d7e4 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,24 +18,81 @@ */ #include "postgres.h" + +#include "access/heapam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" #include "catalog/indexing.h" +#include "catalog/pg_authid.h" #include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "parser/parse_relation.h" +#include "utils/acl.h" #include "utils/json.h" +#include "utils/rel.h" +#include "utils/rls.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" -#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); static char *build_safe_filename(char *name); +static void check_file_read_permission(void); +static void check_table_permissions(Oid relid); +static void check_rls_for_load(Oid relid); #define AGE_BASE_CSV_DIRECTORY "/tmp/age/" #define AGE_CSV_FILE_EXTENSION ".csv" +/* + * Trim leading and trailing whitespace from a string. + * Returns a newly allocated string with whitespace removed. + * Returns empty string for NULL input. + */ +char *trim_whitespace(const char *str) +{ + const char *start; + const char *end; + size_t len; + + if (str == NULL) + { + return pstrdup(""); + } + + /* Find first non-whitespace character */ + start = str; + while (*start && (*start == ' ' || *start == '\t' || + *start == '\n' || *start == '\r')) + { + start++; + } + + /* If string is all whitespace, return empty string */ + if (*start == '\0') + { + return pstrdup(""); + } + + /* Find last non-whitespace character */ + end = str + strlen(str) - 1; + while (end > start && (*end == ' ' || *end == '\t' || + *end == '\n' || *end == '\r')) + { + end--; + } + + /* Copy the trimmed string */ + len = end - start + 1; + return pnstrdup(start, len); +} + static char *build_safe_filename(char *name) { int length; @@ -88,6 +145,51 @@ static char *build_safe_filename(char *name) return resolved; } +/* + * Check if the current user has permission to read server files. + * Only users with the pg_read_server_files role can load from files. + */ +static void check_file_read_permission(void) +{ + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to LOAD from a file"), + errdetail("Only roles with privileges of the \"%s\" role may LOAD from a file.", + "pg_read_server_files"))); + } +} + +/* + * Check if the current user has INSERT permission on the target table. + */ +static void check_table_permissions(Oid relid) +{ + AclResult aclresult; + + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_INSERT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, get_rel_name(relid)); + } +} + +/* + * Check if RLS is enabled on the target table. + * CSV loading is not supported with row-level security. + */ +static void check_rls_for_load(Oid relid) +{ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOAD from file is not supported with row-level security"), + errhint("Use Cypher CREATE clause instead."))); + } +} + agtype *create_empty_agtype(void) { agtype* out; @@ -118,6 +220,14 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) char *new_csv_val; agtype_value *res; + /* Handle NULL or empty input - return null agtype value */ + if (csv_val == NULL || csv_val[0] == '\0') + { + res = palloc(sizeof(agtype_value)); + res->type = AGTV_NULL; + return res; + } + if (!json_validate(cstring_to_text(csv_val), false, false)) { /* wrap the string with double-quote */ @@ -175,18 +285,40 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, for (i = 0; itype = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -228,18 +360,40 @@ agtype* create_agtype_from_list_i(char **header, char **fields, for (i = start_index; i < fields_len; i++) { + char *trimmed_value; + + /* Skip empty header fields (e.g., from trailing commas) */ + if (header[i] == NULL || header[i][0] == '\0') + { + continue; + } + key_agtype = string_to_agtype_value(header[i]); result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); + /* Trim whitespace from field value */ + trimmed_value = trim_whitespace(fields[i]); + if (load_as_agtype) { - value_agtype = csv_value_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(trimmed_value); } else { - value_agtype = string_to_agtype_value(fields[i]); + /* Handle empty field values */ + if (trimmed_value[0] == '\0') + { + value_agtype = palloc(sizeof(agtype_value)); + value_agtype->type = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -362,11 +516,24 @@ void insert_batch(batch_insert_state *batch_state) List *result; int i; + /* Check constraints for each tuple before inserting */ + if (batch_state->resultRelInfo->ri_RelationDesc->rd_att->constr) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + ExecConstraints(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate); + } + } + /* Insert the tuples */ heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, batch_state->slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - + GetCurrentCommandId(true), + TABLE_INSERT_SKIP_FSM, /* Skip free space map for bulk */ + batch_state->bistate); /* Use bulk insert state */ + /* Insert index entries for the tuples */ if (batch_state->resultRelInfo->ri_NumIndices > 0) { @@ -405,6 +572,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool id_field_exists; bool load_as_agtype; @@ -427,6 +595,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -447,6 +618,11 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_VERTEX); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); @@ -459,7 +635,6 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(load_edges_from_file); Datum load_edges_from_file(PG_FUNCTION_ARGS) { - Name graph_name; Name label_name; text* file_name; @@ -467,6 +642,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool load_as_agtype; @@ -488,6 +664,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -507,6 +686,11 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_EDGE); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); @@ -597,19 +781,42 @@ void init_batch_insert(batch_insert_state **batch_state, Oid relid; EState *estate; ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + RTEPermissionInfo *perminfo; + List *range_table = NIL; + List *perminfos = NIL; int i; - /* Open the relation */ + /* Get the relation OID */ relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, RowExclusiveLock); /* Initialize executor state */ estate = CreateExecutorState(); - /* Initialize resultRelInfo */ + /* Create range table entry for ExecConstraints */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; + rte->rellockmode = RowExclusiveLock; + rte->perminfoindex = 1; + range_table = list_make1(rte); + + /* Create permission info */ + perminfo = makeNode(RTEPermissionInfo); + perminfo->relid = relid; + perminfo->requiredPerms = ACL_INSERT; + perminfos = list_make1(perminfo); + + /* Initialize range table in executor state */ + ExecInitRangeTable(estate, range_table, perminfos, NULL); + + /* Initialize resultRelInfo - this opens the relation */ resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Get relation from resultRelInfo (opened by ExecInitResultRelation) */ + relation = resultRelInfo->ri_RelationDesc; /* Open the indices */ ExecOpenIndices(resultRelInfo, false); @@ -619,8 +826,9 @@ void init_batch_insert(batch_insert_state **batch_state, (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); (*batch_state)->estate = estate; (*batch_state)->resultRelInfo = resultRelInfo; - (*batch_state)->max_tuples = BATCH_SIZE; (*batch_state)->num_tuples = 0; + (*batch_state)->buffered_bytes = 0; + (*batch_state)->bistate = GetBulkInsertState(); /* Create slots */ for (i = 0; i < BATCH_SIZE; i++) @@ -651,12 +859,14 @@ void finish_batch_insert(batch_insert_state **batch_state) ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); } - /* Clean up, close the indices and relation */ - ExecCloseIndices((*batch_state)->resultRelInfo); - table_close((*batch_state)->resultRelInfo->ri_RelationDesc, - RowExclusiveLock); + /* Free BulkInsertState */ + FreeBulkInsertState((*batch_state)->bistate); + + /* Close result relations and range table relations */ + ExecCloseResultRelations((*batch_state)->estate); + ExecCloseRangeTableRelations((*batch_state)->estate); - /* Clean up batch state */ + /* Clean up executor state */ FreeExecutorState((*batch_state)->estate); pfree((*batch_state)->slots); pfree(*batch_state); diff --git a/src/backend/utils/load/libcsv.c b/src/backend/utils/load/libcsv.c deleted file mode 100644 index f0e8b46be..000000000 --- a/src/backend/utils/load/libcsv.c +++ /dev/null @@ -1,549 +0,0 @@ -/* -libcsv - parse and write csv data -Copyright (C) 2008 Robert Gamble - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include - -#if __STDC_VERSION__ >= 199901L -# include -#else - /* C89 doesn't have stdint.h or SIZE_MAX */ -# define SIZE_MAX ((size_t)-1) -#endif - -#include "utils/load/csv.h" - -#define VERSION "3.0.3" - -#define ROW_NOT_BEGUN 0 -#define FIELD_NOT_BEGUN 1 -#define FIELD_BEGUN 2 -#define FIELD_MIGHT_HAVE_ENDED 3 - -/* - Explanation of states - ROW_NOT_BEGUN There have not been any fields encountered for this row - FIELD_NOT_BEGUN There have been fields but we are currently not in one - FIELD_BEGUN We are in a field - FIELD_MIGHT_HAVE_ENDED - We encountered a double quote inside a quoted field, the - field is either ended or the quote is literal -*/ - -#define MEM_BLK_SIZE 128 - -#define SUBMIT_FIELD(p) \ - do { \ - if (!quoted) \ - entry_pos -= spaces; \ - if (p->options & CSV_APPEND_NULL) \ - ((p)->entry_buf[entry_pos]) = '\0'; \ - if (cb1 && (p->options & CSV_EMPTY_IS_NULL) && !quoted && entry_pos == 0) \ - cb1(NULL, entry_pos, data); \ - else if (cb1) \ - cb1(p->entry_buf, entry_pos, data); \ - pstate = FIELD_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_ROW(p, c) \ - do { \ - if (cb2) \ - cb2(c, data); \ - pstate = ROW_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) - -static const char *csv_errors[] = {"success", - "error parsing data while strict checking enabled", - "memory exhausted while increasing buffer size", - "data size too large", - "invalid status code"}; - -int -csv_error(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Return the current status of the parser */ - return p->status; -} - -const char * -csv_strerror(int status) -{ - /* Return a textual description of status */ - if (status >= CSV_EINVALID || status < 0) - return csv_errors[CSV_EINVALID]; - else - return csv_errors[status]; -} - -int -csv_get_opts(const struct csv_parser *p) -{ - /* Return the currently set options of parser */ - if (p == NULL) - return -1; - - return p->options; -} - -int -csv_set_opts(struct csv_parser *p, unsigned char options) -{ - /* Set the options */ - if (p == NULL) - return -1; - - p->options = options; - return 0; -} - -int -csv_init(struct csv_parser *p, unsigned char options) -{ - /* Initialize a csv_parser object returns 0 on success, -1 on error */ - if (p == NULL) - return -1; - - p->entry_buf = NULL; - p->pstate = ROW_NOT_BEGUN; - p->quoted = 0; - p->spaces = 0; - p->entry_pos = 0; - p->entry_size = 0; - p->status = 0; - p->options = options; - p->quote_char = CSV_QUOTE; - p->delim_char = CSV_COMMA; - p->is_space = NULL; - p->is_term = NULL; - p->blk_size = MEM_BLK_SIZE; - p->malloc_func = NULL; - p->realloc_func = realloc; - p->free_func = free; - - return 0; -} - -void -csv_free(struct csv_parser *p) -{ - /* Free the entry_buffer of csv_parser object */ - if (p == NULL) - return; - - if (p->entry_buf && p->free_func) - p->free_func(p->entry_buf); - - p->entry_buf = NULL; - p->entry_size = 0; - - return; -} - -int -csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - int quoted; - int pstate; - size_t spaces; - size_t entry_pos; - - if (p == NULL) - return -1; - - /* Finalize parsing. Needed, for example, when file does not end in a newline */ - quoted = p->quoted; - pstate = p->pstate; - spaces = p->spaces; - entry_pos = p->entry_pos; - - if ((pstate == FIELD_BEGUN) && p->quoted && (p->options & CSV_STRICT) && (p->options & CSV_STRICT_FINI)) { - /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ - p->status = CSV_EPARSE; - return -1; - } - - switch (pstate) { - case FIELD_MIGHT_HAVE_ENDED: - p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ - entry_pos = p->entry_pos; - /*lint -fallthrough */ - case FIELD_NOT_BEGUN: - case FIELD_BEGUN: - /* Unnecessary: - quoted = p->quoted, pstate = p->pstate; - spaces = p->spaces, entry_pos = p->entry_pos; - */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, -1); - break; - case ROW_NOT_BEGUN: /* Already ended properly */ - ; - } - - /* Reset parser */ - p->spaces = p->quoted = p->entry_pos = p->status = 0; - p->pstate = ROW_NOT_BEGUN; - - return 0; -} - -void -csv_set_delim(struct csv_parser *p, unsigned char c) -{ - /* Set the delimiter */ - if (p) p->delim_char = c; -} - -void -csv_set_quote(struct csv_parser *p, unsigned char c) -{ - /* Set the quote character */ - if (p) p->quote_char = c; -} - -unsigned char -csv_get_delim(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the delimiter */ - return p->delim_char; -} - -unsigned char -csv_get_quote(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the quote character */ - return p->quote_char; -} - -void -csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the space function */ - if (p) p->is_space = f; -} - -void -csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the term function */ - if (p) p->is_term = f; -} - -void -csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) -{ - /* Set the realloc function used to increase buffer size */ - if (p && f) p->realloc_func = f; -} - -void -csv_set_free_func(struct csv_parser *p, void (*f)(void *)) -{ - /* Set the free function used to free the buffer */ - if (p && f) p->free_func = f; -} - -void -csv_set_blk_size(struct csv_parser *p, size_t size) -{ - /* Set the block size used to increment buffer size */ - if (p) p->blk_size = size; -} - -size_t -csv_get_buffer_size(const struct csv_parser *p) -{ - /* Get the size of the entry buffer */ - if (p) - return p->entry_size; - return 0; -} - -static int -csv_increase_buffer(struct csv_parser *p) -{ - size_t to_add; - void *vp; - - if (p == NULL) return 0; - if (p->realloc_func == NULL) return 0; - - /* Increase the size of the entry buffer. Attempt to increase size by - * p->blk_size, if this is larger than SIZE_MAX try to increase current - * buffer size to SIZE_MAX. If allocation fails, try to allocate halve - * the size and try again until successful or increment size is zero. - */ - - to_add = p->blk_size; - - if ( p->entry_size >= SIZE_MAX - to_add ) - to_add = SIZE_MAX - p->entry_size; - - if (!to_add) { - p->status = CSV_ETOOBIG; - return -1; - } - - while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) { - to_add /= 2; - if (!to_add) { - p->status = CSV_ENOMEM; - return -1; - } - } - - /* Update entry buffer pointer and entry_size if successful */ - p->entry_buf = vp; - p->entry_size += to_add; - return 0; -} - -size_t -csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - unsigned const char *us = s; /* Access input data as array of unsigned char */ - unsigned char c; /* The character we are currently processing */ - size_t pos = 0; /* The number of characters we have processed in this call */ - - /* Store key fields into local variables for performance */ - unsigned char delim = p->delim_char; - unsigned char quote = p->quote_char; - int (*is_space)(unsigned char) = p->is_space; - int (*is_term)(unsigned char) = p->is_term; - int quoted = p->quoted; - int pstate = p->pstate; - size_t spaces = p->spaces; - size_t entry_pos = p->entry_pos; - - - if (!p->entry_buf && pos < len) { - /* Buffer hasn't been allocated yet and len > 0 */ - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - while (pos < len) { - /* Check memory usage, increase buffer if necessary */ - if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - c = us[pos++]; - - switch (pstate) { - case ROW_NOT_BEGUN: - case FIELD_NOT_BEGUN: - if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ - continue; - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (pstate == FIELD_NOT_BEGUN) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { /* ROW_NOT_BEGUN */ - /* Don't submit empty rows by default */ - if (p->options & CSV_REPALL_NL) { - SUBMIT_ROW(p, c); - } - } - continue; - } else if (c == delim) { /* Comma */ - SUBMIT_FIELD(p); - break; - } else if (c == quote) { /* Quote */ - pstate = FIELD_BEGUN; - quoted = 1; - } else { /* Anything else */ - pstate = FIELD_BEGUN; - quoted = 0; - SUBMIT_CHAR(p, c); - } - break; - case FIELD_BEGUN: - if (c == quote) { /* Quote */ - if (quoted) { - SUBMIT_CHAR(p, c); - pstate = FIELD_MIGHT_HAVE_ENDED; - } else { - /* STRICT ERROR - double quote inside non-quoted field */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - SUBMIT_CHAR(p, c); - spaces = 0; - } - } else if (c == delim) { /* Comma */ - if (quoted) { - SUBMIT_CHAR(p, c); - } else { - SUBMIT_FIELD(p); - } - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (!quoted) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { - SUBMIT_CHAR(p, c); - } - } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ - SUBMIT_CHAR(p, c); - spaces++; - } else { /* Anything else */ - SUBMIT_CHAR(p, c); - spaces = 0; - } - break; - case FIELD_MIGHT_HAVE_ENDED: - /* This only happens when a quote character is encountered in a quoted field */ - if (c == delim) { /* Comma */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ - SUBMIT_CHAR(p, c); - spaces++; - } else if (c == quote) { /* Quote */ - if (spaces) { - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - spaces = 0; - SUBMIT_CHAR(p, c); - } else { - /* Two quotes in a row */ - pstate = FIELD_BEGUN; - } - } else { /* Anything else */ - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - pstate = FIELD_BEGUN; - spaces = 0; - SUBMIT_CHAR(p, c); - } - break; - default: - break; - } - } - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; -} - -size_t -csv_write (void *dest, size_t dest_size, const void *src, size_t src_size) -{ - return csv_write2(dest, dest_size, src, src_size, CSV_QUOTE); -} - -int -csv_fwrite (FILE *fp, const void *src, size_t src_size) -{ - return csv_fwrite2(fp, src, src_size, CSV_QUOTE); -} - -size_t -csv_write2 (void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) -{ - unsigned char *cdest = dest; - const unsigned char *csrc = src; - size_t chars = 0; - - if (src == NULL) - return 0; - - if (dest == NULL) - dest_size = 0; - - if (dest_size > 0) - *cdest++ = quote; - chars++; - - while (src_size) { - if (*csrc == quote) { - if (dest_size > chars) - *cdest++ = quote; - if (chars < SIZE_MAX) chars++; - } - if (dest_size > chars) - *cdest++ = *csrc; - if (chars < SIZE_MAX) chars++; - src_size--; - csrc++; - } - - if (dest_size > chars) - *cdest = quote; - if (chars < SIZE_MAX) chars++; - - return chars; -} - -int -csv_fwrite2 (FILE *fp, const void *src, size_t src_size, unsigned char quote) -{ - const unsigned char *csrc = src; - - if (fp == NULL || src == NULL) - return 0; - - if (fputc(quote, fp) == EOF) - return EOF; - - while (src_size) { - if (*csrc == quote) { - if (fputc(quote, fp) == EOF) - return EOF; - } - if (fputc(*csrc, fp) == EOF) - return EOF; - src_size--; - csrc++; - } - - if (fputc(quote, fp) == EOF) { - return EOF; - } - - return 0; -} diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index df663b1dd..4db00d93a 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,42 +17,28 @@ * under the License. */ -#include "access/heapam.h" -#include "utils/load/age_load.h" - #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - char *start_vertex; - char *end_vertex; - bool load_as_agtype; - batch_insert_state *batch_state; -} csv_edge_reader; - - -void edge_field_cb(void *field, size_t field_len, void *data); -void edge_row_cb(int delim __attribute__((unused)), void *data); +#include "utils/load/age_load.h" +/* + * Load edges from a CSV file using pg's COPY infrastructure. + * + * CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the edge label + * label_id - ID of the label + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *label_name, int label_id, - bool load_as_agtype); - -#endif /*AG_LOAD_EDGES_H */ + char *label_name, int label_id, + bool load_as_agtype); +#endif /* AG_LOAD_EDGES_H */ diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index b8ed1572e..c3d517f30 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -17,46 +17,26 @@ * under the License. */ - #ifndef AG_LOAD_LABELS_H #define AG_LOAD_LABELS_H -#include "access/heapam.h" #include "utils/load/age_load.h" -struct counts { - long unsigned fields; - long unsigned allvalues; - long unsigned rows; -}; - -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - bool id_field_exists; - bool load_as_agtype; - int curr_seq_num; - batch_insert_state *batch_state; -} csv_vertex_reader; - - -void vertex_field_cb(void *field, size_t field_len, void *data); -void vertex_row_cb(int delim __attribute__((unused)), void *data); - +/* + * Load vertex labels from a CSV file using pg's COPY infrastructure. + * CSV format: [id,] [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the vertex label + * label_id - ID of the label + * id_field_exists - If true, first CSV column contains the vertex ID + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 72f11493d..6573c79f3 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -17,6 +17,10 @@ * under the License. */ +#ifndef AG_LOAD_H +#define AG_LOAD_H + +#include "access/heapam.h" #include "commands/sequence.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -27,10 +31,8 @@ #include "commands/graph_commands.h" #include "utils/ag_cache.h" -#ifndef AGE_ENTITY_CREATOR_H -#define AGE_ENTITY_CREATOR_H - #define BATCH_SIZE 1000 +#define MAX_BUFFERED_BYTES 65535 /* 64KB, same as pg COPY */ typedef struct batch_insert_state { @@ -38,26 +40,29 @@ typedef struct batch_insert_state ResultRelInfo *resultRelInfo; TupleTableSlot **slots; int num_tuples; - int max_tuples; + size_t buffered_bytes; + BulkInsertState bistate; } batch_insert_state; -agtype* create_empty_agtype(void); - -agtype* create_agtype_from_list(char **header, char **fields, +agtype *create_empty_agtype(void); +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id, bool load_as_agtype); -agtype* create_agtype_from_list_i(char **header, char **fields, +agtype *create_agtype_from_list_i(char **header, char **fields, size_t fields_len, size_t start_index, bool load_as_agtype); + void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, - agtype* end_properties); -void insert_batch(batch_insert_state *batch_state); + agtype *edge_properties); void init_batch_insert(batch_insert_state **batch_state, char *label_name, Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); void finish_batch_insert(batch_insert_state **batch_state); -#endif /* AGE_ENTITY_CREATOR_H */ +char *trim_whitespace(const char *str); + +#endif /* AG_LOAD_H */ diff --git a/src/include/utils/load/csv.h b/src/include/utils/load/csv.h deleted file mode 100644 index 062536977..000000000 --- a/src/include/utils/load/csv.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Created by Shoaib on 12/5/2021. -*/ - -/* -libcsv - parse and write csv data -Copyright (C) 2008-2021 Robert Gamble -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifndef LIBCSV_H__ -#define LIBCSV_H__ -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define CSV_MAJOR 3 -#define CSV_MINOR 0 -#define CSV_RELEASE 3 - -/* Error Codes */ -#define CSV_SUCCESS 0 -#define CSV_EPARSE 1 /* Parse error in strict mode */ -#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ -#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ -#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ - - -/* parser options */ -#define CSV_STRICT 1 /* enable strict mode */ -#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ -#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last - field is quoted and doesn't contain ending - quote */ -#define CSV_APPEND_NULL 8 /* Ensure that all fields are null-terminated */ -#define CSV_EMPTY_IS_NULL 16 /* Pass null pointer to cb1 function when - empty, unquoted fields are encountered */ - - -/* Character values */ -#define CSV_TAB 0x09 -#define CSV_SPACE 0x20 -#define CSV_CR 0x0d -#define CSV_LF 0x0a -#define CSV_COMMA 0x2c -#define CSV_QUOTE 0x22 - -struct csv_parser { - int pstate; /* Parser state */ - int quoted; /* Is the current field a quoted field? */ - size_t spaces; /* Number of continuous spaces after quote or in a non-quoted field */ - unsigned char * entry_buf; /* Entry buffer */ - size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ - size_t entry_size; /* Size of entry buffer */ - int status; /* Operation status */ - unsigned char options; - unsigned char quote_char; - unsigned char delim_char; - int (*is_space)(unsigned char); - int (*is_term)(unsigned char); - size_t blk_size; - void *(*malloc_func)(size_t); /* not used */ - void *(*realloc_func)(void *, size_t); /* function used to allocate buffer memory */ - void (*free_func)(void *); /* function used to free buffer memory */ -}; - -/* Function Prototypes */ -int csv_init(struct csv_parser *p, unsigned char options); -int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -void csv_free(struct csv_parser *p); -int csv_error(const struct csv_parser *p); -const char * csv_strerror(int error); -size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size); -int csv_fwrite(FILE *fp, const void *src, size_t src_size); -size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote); -int csv_fwrite2(FILE *fp, const void *src, size_t src_size, unsigned char quote); -int csv_get_opts(const struct csv_parser *p); -int csv_set_opts(struct csv_parser *p, unsigned char options); -void csv_set_delim(struct csv_parser *p, unsigned char c); -void csv_set_quote(struct csv_parser *p, unsigned char c); -unsigned char csv_get_delim(const struct csv_parser *p); -unsigned char csv_get_quote(const struct csv_parser *p); -void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_realloc_func(struct csv_parser *p, void *(*)(void *, size_t)); -void csv_set_free_func(struct csv_parser *p, void (*)(void *)); -void csv_set_blk_size(struct csv_parser *p, size_t); -size_t csv_get_buffer_size(const struct csv_parser *p); - -#ifdef __cplusplus -} -#endif - -#endif From 9e04372a6dfcfda3293567f6100e46009335a006 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Tue, 20 Jan 2026 23:44:19 +0500 Subject: [PATCH 25/25] Add RLS support and fix permission checks (#2309) - Previously, age only set ACL_SELECT and ACL_INSERT in RTEPermissionInfo, bypassing pg's privilege checking for DELETE and UPDATE operations. - Additionally, RLS policies were not enforced because AGE uses CMD_SELECT for all Cypher queries, causing the rewriter to skip RLS policy application. Permission fixes: - Add ACL_DELETE permission flag for DELETE clause operations - Add ACL_UPDATE permission flag for SET/REMOVE clause operations - Recursively search RTEs including subqueries for permission info RLS support: - Implemented at executor level because age transforms all cypher queries to CMD_SELECT, so pg's rewriter never adds RLS policies for INSERT/UPDATE/DELETE operations. There isnt an appropriate rewriter hook to modify this behavior, so we do it in executor instead. - Add setup_wcos() to apply WITH CHECK policies at execution time for CREATE, SET, and MERGE operations - Add setup_security_quals() and check_security_quals() to apply USING policies for UPDATE and DELETE operations - USING policies silently filter rows (matching pg behavior) - WITH CHECK policies raise errors on violation - DETACH DELETE raises error if edge RLS blocks deletion to prevent dangling edges - Add permission checks and rls in startnode/endnode functions - Add regression tests Assisted-by AI Resolved Conflicts: src/backend/executor/cypher_create.c src/backend/executor/cypher_delete.c src/backend/executor/cypher_merge.c src/backend/executor/cypher_set.c src/backend/executor/cypher_utils.c --- Makefile | 3 +- regress/expected/security.out | 1657 ++++++++++++++++++++++++++ regress/sql/security.sql | 1451 ++++++++++++++++++++++ src/backend/executor/cypher_create.c | 7 + src/backend/executor/cypher_delete.c | 97 ++ src/backend/executor/cypher_merge.c | 9 +- src/backend/executor/cypher_set.c | 88 +- src/backend/executor/cypher_utils.c | 779 ++++++++++++ src/backend/parser/cypher_clause.c | 103 ++ src/backend/utils/adt/agtype.c | 27 +- src/include/executor/cypher_utils.h | 22 + 11 files changed, 4238 insertions(+), 5 deletions(-) create mode 100644 regress/expected/security.out create mode 100644 regress/sql/security.sql diff --git a/Makefile b/Makefile index a8faa2bb8..2d2912571 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,8 @@ REGRESS = scan \ jsonb_operators \ list_comprehension \ map_projection \ - direct_field_access + direct_field_access \ + security ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/regress/expected/security.out b/regress/expected/security.out new file mode 100644 index 000000000..59e58cb05 --- /dev/null +++ b/regress/expected/security.out @@ -0,0 +1,1657 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- Test Privileges +-- +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); +NOTICE: graph "security_test" has been created + create_graph +-------------- + +(1 row) + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- +-- Create test roles with different permission levels +-- +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ +SET ROLE security_test_noread; +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); +ERROR: permission denied for table Person +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); +ERROR: permission denied for table _ag_label_vertex +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); +ERROR: permission denied for table _ag_label_vertex +RESET ROLE; +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; +SET ROLE security_test_base_only; +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + n +------------------------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Person", "properties": {"age": 30, "name": "Alice"}}::vertex + {"id": 844424930131970, "label": "Person", "properties": {"age": 25, "name": "Bob"}}::vertex + {"id": 1125899906842625, "label": "Document", "properties": {"title": "Secret", "content": "classified"}}::vertex +(3 rows) + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + e +----------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1407374883553281, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"since": 2020}}::edge + {"id": 1688849860263937, "label": "OWNS", "end_id": 1125899906842625, "start_id": 844424930131969, "properties": {}}::edge +(2 rows) + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+---------- + "Alice" | "Secret" +(1 row) + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); +ERROR: permission denied for table Person +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); +ERROR: permission denied for schema security_test +LINE 1: SELECT * FROM cypher('security_test', $$ + ^ +RESET ROLE; +SET ROLE security_test_insert; +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-----------+----- + "Charlie" | 35 +(1 row) + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +SET ROLE security_test_update; +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Alice" | 31 +(1 row) + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+----------- + "Bob" | "reading" +(1 row) + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + name | hobby +-------+------- + "Bob" | +(1 row) + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_update; +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_delete; +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ +RESET ROLE; +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); +ERROR: permission denied for table KNOWS +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + p +--- +(0 rows) + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_readonly; +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); +ERROR: permission denied for table Person +RESET ROLE; +SET ROLE security_test_insert; +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +-------+----- + "Eve" | 28 +(1 row) + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + name +------- + "Eve" +(1 row) + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ +RESET ROLE; +SET ROLE security_test_full; +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + name | age +---------+----- + "Frank" | 51 +(1 row) + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ +RESET ROLE; +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; +SET ROLE security_test_person_only; +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + name +--------- + "Alice" +(1 row) + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); +ERROR: permission denied for table Document +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); +ERROR: permission denied for table Document +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ +RESET ROLE; +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; +SET ROLE security_test_noexec; +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); +ERROR: permission denied for function drop_graph +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); +ERROR: permission denied for function create_vlabel +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); +ERROR: permission denied for function create_elabel +RESET ROLE; +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; +SET ROLE security_test_noexec; +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); +ERROR: permission denied for schema security_test +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); +ERROR: permission denied for function create_graph +RESET ROLE; +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" +SET ROLE security_test_edge_only; +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: permission denied for table Person +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: permission denied for table Person +RESET ROLE; +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; +SET ROLE security_test_edge_only; +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+---------- + "Alice" | "Bob" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- Cleanup +-- ============================================================================ +RESET ROLE; +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; +-- Drop test graph +SELECT drop_graph('security_test', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table security_test._ag_label_vertex +drop cascades to table security_test._ag_label_edge +drop cascades to table security_test."Person" +drop cascades to table security_test."Document" +drop cascades to table security_test."KNOWS" +drop cascades to table security_test."OWNS" +NOTICE: graph "security_test" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Row-Level Security (RLS) Tests +-- +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); +NOTICE: graph "rls_graph" has been created + create_graph +-------------- + +(1 row) + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Bob" + "Diana" +(2 rows) + +RESET ROLE; +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +---------------- + "User1Created" +(1 row) + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + name +------ +(0 rows) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true +(1 row) + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + name | updated +------+--------- +(0 rows) + +RESET ROLE; +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + name | updated +---------+--------- + "Alice" | true + "Bob" | +(2 rows) + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner +SET ROLE rls_user1; +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + name | verified +---------+---------- + "Alice" | true +(1 row) + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + owner +------------- + "rls_user1" +(1 row) + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+---------- + "Alice" | "active" +(1 row) + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +------+-------- +(0 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ +-- Keep existing update policy, test REMOVE operation +SET ROLE rls_user1; +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + name | status +---------+-------- + "Alice" | +(1 row) + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + name | dept +------+------ +(0 rows) + +RESET ROLE; +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + dept +--------------- + "Engineering" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY person_insert_all ON rls_graph."Person"; +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); +SET ROLE rls_user1; +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest2" + "DeleteTest3" +(2 rows) + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; +SET ROLE rls_user1; +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DeleteTest3" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); +ERROR: new row violates row-level security policy for table "Person" +RESET ROLE; +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +---------+------------- + "Alice" | "rls_user1" +(1 row) + +RESET ROLE; +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------------- + "MergeNew1" +(1 row) + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); +SET ROLE rls_user1; +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2021 + 2022 +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); +ERROR: new row violates row-level security policy for table "KNOWS" +RESET ROLE; +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + strength +---------- + "strong" +(1 row) + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2021 | "updated" +(1 row) + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + since | notes +-------+------- +(0 rows) + +RESET ROLE; +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + since | notes +-------+----------- + 2020 | + 2021 | "updated" +(2 rows) + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); +SET ROLE rls_user1; +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + since +------- + 2019 +(1 row) + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); +SET ROLE rls_user1; +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); +ERROR: cannot delete edge due to row-level security policy on "KNOWS" +HINT: DETACH DELETE requires permission to delete all connected edges. +RESET ROLE; +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + name +--------------- + "DetachTest1" +(1 row) + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + a +--- +(0 rows) + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); +SET ROLE rls_user1; +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Charlie" +(2 rows) + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + title +-------------- + "Public Doc" +(1 row) + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + person | doc +---------+-------------- + "Alice" | "Public Doc" +(1 row) + +RESET ROLE; +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); +SET ROLE rls_user1; +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" +(3 rows) + +RESET ROLE; +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + name | level +-----------+------- + "Alice" | 1 + "Bob" | 2 + "Charlie" | 1 +(3 rows) + +RESET ROLE; +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); +SET ROLE rls_user1; +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +--------- + "Alice" + "Bob" +(2 rows) + +RESET ROLE; +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +------ +(0 rows) + +RESET ROLE; +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +RESET ROLE; +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Diana" +(4 rows) + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ +DROP POLICY person_very_restrictive ON rls_graph."Person"; +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + a +--- +(0 rows) + +RESET ROLE; +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + name | owner +----------------+------------- + "NewFromMatch" | "rls_user1" +(1 row) + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true +(1 row) + +RESET ROLE; +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + name | test +---------+------ + "Alice" | true + "Bob" | +(2 rows) + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); +ERROR: access to vertex 844424930131970 denied by row-level security policy on "Person" +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); +ERROR: access to vertex 844424930131969 denied by row-level security policy on "Person" +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + start_name | end_name +------------+----------- + "Alice" | "Charlie" +(1 row) + +RESET ROLE; +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ +RESET ROLE; +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; +-- Drop test graph +SELECT drop_graph('rls_graph', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table rls_graph._ag_label_vertex +drop cascades to table rls_graph._ag_label_edge +drop cascades to table rls_graph."Person" +drop cascades to table rls_graph."Document" +drop cascades to table rls_graph."KNOWS" +drop cascades to table rls_graph."AUTHORED" +NOTICE: graph "rls_graph" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/security.sql b/regress/sql/security.sql new file mode 100644 index 000000000..344dd23d4 --- /dev/null +++ b/regress/sql/security.sql @@ -0,0 +1,1451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- Test Privileges +-- + +-- +-- Setup: Create test graph and data as superuser +-- +SELECT create_graph('security_test'); + +-- Create test vertices +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Alice', age: 30}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Bob', age: 25}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'Secret', content: 'classified'}) +$$) AS (a agtype); + +-- Create test edges +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document) + CREATE (a)-[:OWNS]->(d) +$$) AS (a agtype); + +-- +-- Create test roles with different permission levels +-- + +-- Role with only SELECT (read-only) +CREATE ROLE security_test_readonly LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_readonly; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_readonly; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_readonly; + +-- Role with SELECT and INSERT +CREATE ROLE security_test_insert LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_insert; +GRANT SELECT, INSERT ON ALL TABLES IN SCHEMA security_test TO security_test_insert; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_insert; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_insert; +-- Grant sequence usage for ID generation +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_insert; + +-- Role with SELECT and UPDATE +CREATE ROLE security_test_update LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_update; +GRANT SELECT, UPDATE ON ALL TABLES IN SCHEMA security_test TO security_test_update; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_update; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_update; + +-- Role with SELECT and DELETE +CREATE ROLE security_test_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_delete; +GRANT SELECT, DELETE ON ALL TABLES IN SCHEMA security_test TO security_test_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_delete; + +CREATE ROLE security_test_detach_delete LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA security_test TO security_test_detach_delete; +GRANT DELETE ON security_test."Person" TO security_test_detach_delete; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_detach_delete; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_detach_delete; + +-- Role with all permissions +CREATE ROLE security_test_full LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_full; +GRANT ALL ON ALL TABLES IN SCHEMA security_test TO security_test_full; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_full; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_full; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_full; + +-- Role with NO SELECT on graph tables (to test read failures) +CREATE ROLE security_test_noread LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noread; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noread; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_noread; +-- No SELECT on security_test tables + +-- ============================================================================ +-- PART 1: SELECT Permission Tests - Failure Cases (No Read Permission) +-- ============================================================================ + +SET ROLE security_test_noread; + +-- Test: MATCH on vertices should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name +$$) AS (name agtype); + +-- Test: MATCH on edges should fail without SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() RETURN k +$$) AS (k agtype); + +-- Test: MATCH with path should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a)-[e]->(b) RETURN a, e, b +$$) AS (a agtype, e agtype, b agtype); + +RESET ROLE; + +-- Create role with SELECT only on base label tables, not child labels +-- NOTE: PostgreSQL inheritance allows access to child table rows when querying +-- through a parent table. This is expected behavior - SELECT on _ag_label_vertex +-- allows reading all vertices (including Person, Document) via inheritance. +CREATE ROLE security_test_base_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_base_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_base_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_base_only; +-- Only grant SELECT on base tables, NOT on Person, Document, KNOWS, OWNS +GRANT SELECT ON security_test._ag_label_vertex TO security_test_base_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_base_only; + +SET ROLE security_test_base_only; + +-- Test: MATCH (n) succeeds because PostgreSQL inheritance allows access to child rows +-- when querying through parent table. Permission on _ag_label_vertex grants read +-- access to all vertices via inheritance hierarchy. +SELECT * FROM cypher('security_test', $$ + MATCH (n) RETURN n +$$) AS (n agtype); + +-- Test: MATCH ()-[e]->() succeeds via inheritance (same reason as above) +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e]->() RETURN e +$$) AS (e agtype); + +-- ============================================================================ +-- PART 2: SELECT Permission Tests - Success Cases (Read-Only Role) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MATCH should succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test: MATCH with edges should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person)-[k:KNOWS]->(b:Person) + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- Test: MATCH across multiple labels should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person)-[:OWNS]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +-- ============================================================================ +-- PART 3: INSERT Permission Tests (CREATE clause) +-- ============================================================================ + +-- Test: CREATE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie'}) +$$) AS (a agtype); + +-- Test: CREATE edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:FRIENDS]->(b) +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: CREATE vertex should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Charlie', age: 35}) +$$) AS (a agtype); + +-- Test: CREATE edge should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023}]->(b) +$$) AS (a agtype); + +-- Verify the inserts worked +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- ============================================================================ +-- PART 4: UPDATE Permission Tests (SET clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: SET should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p +$$) AS (p agtype); + +-- Test: SET on edge should fail +SELECT * FROM cypher('security_test', $$ + MATCH ()-[k:KNOWS]->() + SET k.since = 2021 + RETURN k +$$) AS (k agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: SET should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) + SET p.age = 31 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: SET on edge should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'})-[k:KNOWS]->(b:Person {name: 'Bob'}) + SET k.since = 2019 + RETURN k.since +$$) AS (since agtype); + +-- Test: SET with map update should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + SET p += {hobby: 'reading'} + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 5: UPDATE Permission Tests (REMOVE clause) +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: REMOVE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: REMOVE should succeed with UPDATE permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Bob'}) + REMOVE p.hobby + RETURN p.name, p.hobby +$$) AS (name agtype, hobby agtype); + +-- ============================================================================ +-- PART 6: DELETE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: DELETE should fail with only SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_update; + +-- Test: DELETE should fail with only UPDATE permission (need DELETE) +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +RESET ROLE; +SET ROLE security_test_delete; + +-- Test: DELETE vertex should succeed with DELETE permission +-- First delete the edge connected to Charlie +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'})-[k:KNOWS]->() + DELETE k +$$) AS (a agtype); + +-- Now delete the vertex +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) + DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Charlie'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 7: DETACH DELETE Tests +-- ============================================================================ + +RESET ROLE; + +-- Create a new vertex with edge for DETACH DELETE test +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Dave', age: 40}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (a:Person {name: 'Alice'}), (d:Person {name: 'Dave'}) + CREATE (a)-[:KNOWS {since: 2022}]->(d) +$$) AS (a agtype); + +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should fail without DELETE on edge table +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; +GRANT DELETE ON security_test."KNOWS" TO security_test_detach_delete; +SET ROLE security_test_detach_delete; + +-- Test: DETACH DELETE should succeed now when user has DELETE on both vertex and edge tables +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) + DETACH DELETE p +$$) AS (a agtype); + +-- Verify deletion +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Dave'}) RETURN p +$$) AS (p agtype); + +-- ============================================================================ +-- PART 8: MERGE Permission Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_readonly; + +-- Test: MERGE that would create should fail without INSERT +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p +$$) AS (p agtype); + +RESET ROLE; +SET ROLE security_test_insert; + +-- Test: MERGE that creates should succeed with INSERT permission +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve', age: 28}) + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +-- Test: MERGE that matches existing should succeed (only needs SELECT) +SELECT * FROM cypher('security_test', $$ + MERGE (p:Person {name: 'Eve'}) + RETURN p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 9: Full Permission Role Tests +-- ============================================================================ + +RESET ROLE; +SET ROLE security_test_full; + +-- Full permission role should be able to do everything +SELECT * FROM cypher('security_test', $$ + CREATE (:Person {name: 'Frank', age: 50}) +$$) AS (a agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + SET p.age = 51 + RETURN p.name, p.age +$$) AS (name agtype, age agtype); + +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Frank'}) + DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 10: Permission on Specific Labels +-- ============================================================================ + +RESET ROLE; + +-- Create a role with permission only on Person label, not Document +CREATE ROLE security_test_person_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_person_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_person_only; +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA ag_catalog TO security_test_person_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_person_only; +-- Only grant permissions on Person table +GRANT SELECT, INSERT, UPDATE, DELETE ON security_test."Person" TO security_test_person_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_person_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_person_only; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA security_test TO security_test_person_only; + +SET ROLE security_test_person_only; + +-- Test: Operations on Person should succeed +SELECT * FROM cypher('security_test', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.name +$$) AS (name agtype); + +-- Test: SELECT on Document should fail (no permission) +SELECT * FROM cypher('security_test', $$ + MATCH (d:Document) RETURN d.title +$$) AS (title agtype); + +-- Test: CREATE Document should fail (no permission on Document table) +SELECT * FROM cypher('security_test', $$ + CREATE (:Document {title: 'New Doc'}) +$$) AS (a agtype); + +-- ============================================================================ +-- PART 11: Function EXECUTE Permission Tests +-- ============================================================================ + +RESET ROLE; + +-- Create role with no function execute permissions +CREATE ROLE security_test_noexec LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_noexec; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_noexec; + +-- Revoke execute from PUBLIC on functions we want to test +REVOKE EXECUTE ON FUNCTION ag_catalog.create_graph(name) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) FROM PUBLIC; + +SET ROLE security_test_noexec; + +-- Test: create_graph should fail without EXECUTE permission +SELECT create_graph('unauthorized_graph'); + +-- Test: drop_graph should fail without EXECUTE permission +SELECT drop_graph('security_test', true); + +-- Test: create_vlabel should fail without EXECUTE permission +SELECT create_vlabel('security_test', 'NewLabel'); + +-- Test: create_elabel should fail without EXECUTE permission +SELECT create_elabel('security_test', 'NewEdge'); + +RESET ROLE; + +-- Grant execute on specific function and test +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO security_test_noexec; + +SET ROLE security_test_noexec; + +-- Test: create_vlabel should now get past execute check (will fail on schema permission instead) +SELECT create_vlabel('security_test', 'TestLabel'); + +-- Test: create_graph should still fail with execute permission denied +SELECT create_graph('unauthorized_graph'); + +RESET ROLE; + +-- Restore execute permissions to PUBLIC +GRANT EXECUTE ON FUNCTION ag_catalog.create_graph(name) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.drop_graph(name, boolean) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_vlabel(cstring, cstring) TO PUBLIC; +GRANT EXECUTE ON FUNCTION ag_catalog.create_elabel(cstring, cstring) TO PUBLIC; + +-- ============================================================================ +-- PART 12: startNode/endNode Permission Tests +-- ============================================================================ + +-- Create role with SELECT on base tables but NOT on Person label +CREATE ROLE security_test_edge_only LOGIN; +GRANT USAGE ON SCHEMA security_test TO security_test_edge_only; +GRANT USAGE ON SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON ALL TABLES IN SCHEMA ag_catalog TO security_test_edge_only; +GRANT SELECT ON security_test."KNOWS" TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_edge TO security_test_edge_only; +GRANT SELECT ON security_test._ag_label_vertex TO security_test_edge_only; +-- Note: NOT granting SELECT on security_test."Person" + +SET ROLE security_test_edge_only; + +-- Test: endNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- Test: startNode fails without SELECT on Person table +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e) +$$) AS (start_vertex agtype); + +RESET ROLE; + +-- Grant SELECT on Person and verify success +GRANT SELECT ON security_test."Person" TO security_test_edge_only; + +SET ROLE security_test_edge_only; + +-- Test: Should now succeed with SELECT permission +SELECT * FROM cypher('security_test', $$ + MATCH ()-[e:KNOWS]->() + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- ============================================================================ +-- Cleanup +-- ============================================================================ + +RESET ROLE; + +-- Drop all owned objects and privileges for each role, then drop the role +DROP OWNED BY security_test_noread CASCADE; +DROP ROLE security_test_noread; + +DROP OWNED BY security_test_base_only CASCADE; +DROP ROLE security_test_base_only; + +DROP OWNED BY security_test_readonly CASCADE; +DROP ROLE security_test_readonly; + +DROP OWNED BY security_test_insert CASCADE; +DROP ROLE security_test_insert; + +DROP OWNED BY security_test_update CASCADE; +DROP ROLE security_test_update; + +DROP OWNED BY security_test_delete CASCADE; +DROP ROLE security_test_delete; + +DROP OWNED BY security_test_detach_delete CASCADE; +DROP ROLE security_test_detach_delete; + +DROP OWNED BY security_test_full CASCADE; +DROP ROLE security_test_full; + +DROP OWNED BY security_test_person_only CASCADE; +DROP ROLE security_test_person_only; + +DROP OWNED BY security_test_noexec CASCADE; +DROP ROLE security_test_noexec; + +DROP OWNED BY security_test_edge_only CASCADE; +DROP ROLE security_test_edge_only; + +-- Drop test graph +SELECT drop_graph('security_test', true); + +-- +-- Row-Level Security (RLS) Tests +-- + +-- +-- Setup: Create test graph, data and roles for RLS tests +-- +SELECT create_graph('rls_graph'); + +-- Create test roles +CREATE ROLE rls_user1 LOGIN; +CREATE ROLE rls_user2 LOGIN; +CREATE ROLE rls_admin LOGIN BYPASSRLS; -- Role that bypasses RLS + +-- Create base test data FIRST (as superuser) - this creates the label tables +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Alice', owner: 'rls_user1', department: 'Engineering', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Bob', owner: 'rls_user2', department: 'Engineering', level: 2}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Charlie', owner: 'rls_user1', department: 'Sales', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'Diana', owner: 'rls_user2', department: 'Sales', level: 3}) +$$) AS (a agtype); + +-- Create a second vertex label for multi-label tests +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Public Doc', classification: 'public', owner: 'rls_user1'}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Document {title: 'Secret Doc', classification: 'secret', owner: 'rls_user2'}) +$$) AS (a agtype); + +-- Create edges +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'}) + CREATE (a)-[:KNOWS {since: 2020, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Charlie'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2021, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2022, strength: 'strong'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}), (d:Document {title: 'Public Doc'}) + CREATE (a)-[:AUTHORED]->(d) +$$) AS (a agtype); + +-- Grant permissions AFTER creating tables (so Person, Document, KNOWS, AUTHORED exist) +GRANT USAGE ON SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT ALL ON ALL TABLES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON SCHEMA ag_catalog TO rls_user1, rls_user2, rls_admin; +GRANT USAGE ON ALL SEQUENCES IN SCHEMA rls_graph TO rls_user1, rls_user2, rls_admin; + +-- ============================================================================ +-- PART 1: Vertex SELECT Policies (USING clause) +-- ============================================================================ + +-- Enable RLS on Person label +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- 1.1: Basic ownership filtering +CREATE POLICY person_select_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should only see Alice and Charlie (owned by rls_user1) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- Test as rls_user2 - should only see Bob and Diana (owned by rls_user2) +SET ROLE rls_user2; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 1.2: Default deny - no permissive policies means no access +DROP POLICY person_select_own ON rls_graph."Person"; + +-- With no policies, RLS blocks all access +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 2: Vertex INSERT Policies (WITH CHECK) - CREATE +-- ============================================================================ + +-- Allow SELECT for all (so we can verify results) +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 2.1: Basic WITH CHECK - users can only insert rows they own +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- Test as rls_user1 - should succeed (owner matches current_user) +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Created', owner: 'rls_user1', department: 'Test', level: 1}) +$$) AS (a agtype); + +-- Test as rls_user1 - should FAIL (owner doesn't match current_user) +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'User1Fake', owner: 'rls_user2', department: 'Test', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only User1Created was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 2.2: Default deny for INSERT - no INSERT policy blocks all inserts +DROP POLICY person_insert_own ON rls_graph."Person"; + +SET ROLE rls_user1; +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'ShouldFail', owner: 'rls_user1', department: 'Blocked', level: 1}) +$$) AS (a agtype); +RESET ROLE; + +-- Verify nothing was created in Blocked department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Blocked' RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Test' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 3: Vertex UPDATE Policies - SET +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- 3.1: USING clause only - filter which rows can be updated +CREATE POLICY person_update_using ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - rls_user1 owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +-- Should silently skip - rls_user1 doesn't own Bob (USING filters it out) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.updated = true RETURN p.name, p.updated +$$) AS (name agtype, updated agtype); + +RESET ROLE; + +-- Verify Alice was updated, Bob was not +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] RETURN p.name, p.updated ORDER BY p.name +$$) AS (name agtype, updated agtype); + +-- 3.2: WITH CHECK clause - validate new values +DROP POLICY person_update_using ON rls_graph."Person"; + +CREATE POLICY person_update_check ON rls_graph."Person" + FOR UPDATE + USING (true) -- Can update any row + WITH CHECK (properties->>'"owner"' = current_user); -- But new value must keep owner + +SET ROLE rls_user1; + +-- Should succeed - modifying property but keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.verified = true RETURN p.name, p.verified +$$) AS (name agtype, verified agtype); + +-- Should FAIL - trying to change owner to someone else +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.owner = 'rls_user2' RETURN p.owner +$$) AS (owner agtype); + +RESET ROLE; + +-- Verify owner wasn't changed +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) RETURN p.owner +$$) AS (owner agtype); + +-- 3.3: Both USING and WITH CHECK together +DROP POLICY person_update_check ON rls_graph."Person"; + +CREATE POLICY person_update_both ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns Alice, keeping owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob (USING filters) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) SET p.status = 'active' RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 4: Vertex UPDATE Policies - REMOVE +-- ============================================================================ + +-- Keep existing update policy, test REMOVE operation + +SET ROLE rls_user1; + +-- Should succeed - owns Alice +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Alice'}) REMOVE p.status RETURN p.name, p.status +$$) AS (name agtype, status agtype); + +-- Should silently skip - doesn't own Bob +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) REMOVE p.department RETURN p.name, p.department +$$) AS (name agtype, dept agtype); + +RESET ROLE; + +-- Verify Bob still has department +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'Bob'}) RETURN p.department +$$) AS (dept agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_update_both ON rls_graph."Person"; + +-- ============================================================================ +-- PART 5: Vertex DELETE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +-- Create test data for delete tests +CREATE POLICY person_insert_all ON rls_graph."Person" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest1', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest2', owner: 'rls_user2', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DeleteTest3', owner: 'rls_user1', department: 'DeleteTest', level: 1}) +$$) AS (a agtype); + +DROP POLICY person_insert_all ON rls_graph."Person"; + +-- 5.1: Basic USING filtering for DELETE +CREATE POLICY person_delete_own ON rls_graph."Person" + FOR DELETE + USING (properties->>'"owner"' = current_user); + +SET ROLE rls_user1; + +-- Should succeed - owns DeleteTest1 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest1'}) DELETE p +$$) AS (a agtype); + +-- Should silently skip - doesn't own DeleteTest2 +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest2'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest1 deleted, DeleteTest2 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- 5.2: Default deny for DELETE - no policy blocks all deletes +DROP POLICY person_delete_own ON rls_graph."Person"; + +SET ROLE rls_user1; + +-- Should silently skip - no DELETE policy means default deny +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify DeleteTest3 still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DeleteTest3'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'DeleteTest' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 6: MERGE Policies +-- ============================================================================ + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +-- 6.1: MERGE creating new vertex - INSERT policy applies +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew1', owner: 'rls_user1', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +-- Should FAIL - creating with wrong owner +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'MergeNew2', owner: 'rls_user2', department: 'Merge', level: 1}) + RETURN p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 6.2: MERGE matching existing - only SELECT needed +SET ROLE rls_user1; + +-- Should succeed - Alice exists and SELECT allowed +SELECT * FROM cypher('rls_graph', $$ + MERGE (p:Person {name: 'Alice'}) + RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +RESET ROLE; + +-- Verify only MergeNew1 was created +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_select_all ON rls_graph."Person"; +DROP POLICY person_insert_own ON rls_graph."Person"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Merge' DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 7: Edge SELECT Policies +-- ============================================================================ + +-- Disable vertex RLS, enable edge RLS +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" FORCE ROW LEVEL SECURITY; + +-- Policy: Only see edges from 2021 or later +CREATE POLICY knows_select_recent ON rls_graph."KNOWS" + FOR SELECT + USING ((properties->>'"since"')::int >= 2021); + +SET ROLE rls_user1; + +-- Should only see 2021 and 2022 edges (not 2020) +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 8: Edge INSERT Policies (CREATE edge) +-- ============================================================================ + +DROP POLICY knows_select_recent ON rls_graph."KNOWS"; + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Policy: Can only create edges with strength = 'strong' +CREATE POLICY knows_insert_strong ON rls_graph."KNOWS" + FOR INSERT + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - strength is 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Diana'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'strong'}]->(b) +$$) AS (a agtype); + +-- Should FAIL - strength is 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS {since: 2023, strength: 'weak'}]->(b) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify only strong edge was created +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since = 2023 RETURN k.strength ORDER BY k.strength +$$) AS (strength agtype); + +-- cleanup +DROP POLICY knows_insert_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 9: Edge UPDATE Policies (SET on edge) +-- ============================================================================ + +-- Policy: Can only update edges with strength = 'strong' +CREATE POLICY knows_update_strong ON rls_graph."KNOWS" + FOR UPDATE + USING (properties->>'"strength"' = 'strong') + WITH CHECK (properties->>'"strength"' = 'strong'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2021}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +-- Should silently skip - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2020}]->() SET k.notes = 'updated' RETURN k.since, k.notes +$$) AS (since agtype, notes agtype); + +RESET ROLE; + +-- Verify only 2021 edge was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2020, 2021] RETURN k.since, k.notes ORDER BY k.since +$$) AS (since agtype, notes agtype); + +-- cleanup +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_update_strong ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 10: Edge DELETE Policies +-- ============================================================================ + +CREATE POLICY knows_select_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- Create test edges for delete +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Bob'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2018, strength: 'weak'}]->(b) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Diana'}), (b:Person {name: 'Charlie'}) + CREATE (a)-[:KNOWS {since: 2019, strength: 'strong'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Can only delete edges with strength = 'weak' +CREATE POLICY knows_delete_weak ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' = 'weak'); + +SET ROLE rls_user1; + +-- Should succeed - edge has strength 'weak' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2018}]->() DELETE k +$$) AS (a agtype); + +-- Should silently skip - edge has strength 'strong' +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS {since: 2019}]->() DELETE k +$$) AS (a agtype); + +RESET ROLE; + +-- Verify 2018 edge deleted, 2019 edge still exists +SELECT * FROM cypher('rls_graph', $$ + MATCH ()-[k:KNOWS]->() WHERE k.since IN [2018, 2019] RETURN k.since ORDER BY k.since +$$) AS (since agtype); + +-- cleanup +DROP POLICY knows_delete_weak ON rls_graph."KNOWS"; + +-- ============================================================================ +-- PART 11: DETACH DELETE +-- ============================================================================ + +-- Re-enable Person RLS +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +CREATE POLICY person_all ON rls_graph."Person" + FOR ALL USING (true) WITH CHECK (true); + +-- Create test data with a protected edge +CREATE POLICY knows_insert_all ON rls_graph."KNOWS" + FOR INSERT WITH CHECK (true); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest1', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + CREATE (:Person {name: 'DetachTest2', owner: 'test', department: 'Detach', level: 1}) +$$) AS (a agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'DetachTest1'}), (b:Person {name: 'DetachTest2'}) + CREATE (a)-[:KNOWS {since: 2010, strength: 'protected'}]->(b) +$$) AS (a agtype); + +DROP POLICY knows_insert_all ON rls_graph."KNOWS"; + +-- Policy: Cannot delete edges with strength = 'protected' +CREATE POLICY knows_delete_not_protected ON rls_graph."KNOWS" + FOR DELETE + USING (properties->>'"strength"' != 'protected'); + +SET ROLE rls_user1; + +-- Should ERROR - DETACH DELETE cannot silently skip (would leave dangling edge) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) DETACH DELETE p +$$) AS (a agtype); + +RESET ROLE; + +-- Verify vertex still exists (delete was blocked) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'DetachTest1'}) RETURN p.name +$$) AS (name agtype); + +-- cleanup +DROP POLICY person_all ON rls_graph."Person"; +DROP POLICY knows_select_all ON rls_graph."KNOWS"; +DROP POLICY knows_delete_not_protected ON rls_graph."KNOWS"; +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department = 'Detach' DETACH DELETE p +$$) AS (a agtype); + +-- ============================================================================ +-- PART 12: Multiple Labels in Single Query +-- ============================================================================ + +-- Enable RLS on Document too +ALTER TABLE rls_graph."Document" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" FORCE ROW LEVEL SECURITY; + +-- Policy: Users see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Policy: Users see only public documents +CREATE POLICY doc_public ON rls_graph."Document" + FOR SELECT + USING (properties->>'"classification"' = 'public'); + +SET ROLE rls_user1; + +-- Should only see Alice and Charlie (Person) with Public Doc (Document) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +SELECT * FROM cypher('rls_graph', $$ + MATCH (d:Document) RETURN d.title ORDER BY d.title +$$) AS (title agtype); + +-- Combined query - should respect both policies +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person)-[:AUTHORED]->(d:Document) + RETURN p.name, d.title +$$) AS (person agtype, doc agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 13: Permissive vs Restrictive Policies +-- ============================================================================ + +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY doc_public ON rls_graph."Document"; + +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- 13.1: Multiple permissive policies (OR logic) +CREATE POLICY person_permissive_own ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"owner"' = current_user); + +CREATE POLICY person_permissive_eng ON rls_graph."Person" + AS PERMISSIVE FOR SELECT + USING (properties->>'"department"' = 'Engineering'); + +SET ROLE rls_user1; + +-- Should see: Alice (own), Charlie (own), Bob (Engineering) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.department IN ['Engineering', 'Sales'] + RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 13.2: Add restrictive policy (AND with permissive) +CREATE POLICY person_restrictive_level ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING ((properties->>'"level"')::int <= 2); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1), Bob (Engineering, level 2), Charlie (own, level 1) +-- Diana (level 3) blocked by restrictive +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name, p.level ORDER BY p.name +$$) AS (name agtype, level agtype); + +RESET ROLE; + +-- 13.3: Multiple restrictive policies (all must pass) +CREATE POLICY person_restrictive_sales ON rls_graph."Person" + AS RESTRICTIVE FOR SELECT + USING (properties->>'"department"' != 'Sales'); + +SET ROLE rls_user1; + +-- Should see: Alice (own, level 1, not Sales), Bob (Engineering, level 2, not Sales) +-- Charlie blocked by Sales restriction +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- ============================================================================ +-- PART 14: BYPASSRLS Role and Superuser Behavior +-- ============================================================================ + +DROP POLICY person_permissive_own ON rls_graph."Person"; +DROP POLICY person_permissive_eng ON rls_graph."Person"; +DROP POLICY person_restrictive_level ON rls_graph."Person"; +DROP POLICY person_restrictive_sales ON rls_graph."Person"; + +-- Restrictive policy that blocks most access +CREATE POLICY person_very_restrictive ON rls_graph."Person" + FOR SELECT + USING (properties->>'"name"' = 'Nobody'); + +-- 14.1: Regular user sees nothing +SET ROLE rls_user1; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.2: BYPASSRLS role sees everything +SET ROLE rls_admin; + +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +RESET ROLE; + +-- 14.3: Superuser sees everything (implicit bypass) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) RETURN p.name ORDER BY p.name +$$) AS (name agtype); + +-- ============================================================================ +-- PART 15: Complex Multi-Operation Queries +-- ============================================================================ + +DROP POLICY person_very_restrictive ON rls_graph."Person"; + +CREATE POLICY person_select_all ON rls_graph."Person" + FOR SELECT USING (true); + +CREATE POLICY person_insert_own ON rls_graph."Person" + FOR INSERT + WITH CHECK (properties->>'"owner"' = current_user); + +CREATE POLICY person_update_own ON rls_graph."Person" + FOR UPDATE + USING (properties->>'"owner"' = current_user) + WITH CHECK (properties->>'"owner"' = current_user); + +-- 15.1: MATCH + CREATE in one query +SET ROLE rls_user1; + +-- Should succeed - creating with correct owner +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'}) + CREATE (a)-[:KNOWS]->(:Person {name: 'NewFromMatch', owner: 'rls_user1', department: 'Complex', level: 1}) +$$) AS (a agtype); + +RESET ROLE; + +-- Verify creation +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person {name: 'NewFromMatch'}) RETURN p.name, p.owner +$$) AS (name agtype, owner agtype); + +-- 15.2: MATCH + SET in one query +SET ROLE rls_user1; + +-- Should succeed on Alice (own), skip Bob (not own) +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + SET p.complexTest = true + RETURN p.name, p.complexTest +$$) AS (name agtype, test agtype); + +RESET ROLE; + +-- Verify only Alice was updated +SELECT * FROM cypher('rls_graph', $$ + MATCH (p:Person) WHERE p.name IN ['Alice', 'Bob'] + RETURN p.name, p.complexTest ORDER BY p.name +$$) AS (name agtype, test agtype); + +-- cleanup +DROP POLICY IF EXISTS person_select_all ON rls_graph."Person"; +DROP POLICY IF EXISTS person_insert_own ON rls_graph."Person"; +DROP POLICY IF EXISTS person_update_own ON rls_graph."Person"; + +-- ============================================================================ +-- PART 16: startNode/endNode RLS Enforcement +-- ============================================================================ + +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; + +-- Enable RLS on Person with restrictive policy +ALTER TABLE rls_graph."Person" ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Person" FORCE ROW LEVEL SECURITY; + +-- Policy: users can only see their own Person records +CREATE POLICY person_own ON rls_graph."Person" + FOR SELECT + USING (properties->>'"owner"' = current_user); + +-- Enable edge access for testing +ALTER TABLE rls_graph."KNOWS" ENABLE ROW LEVEL SECURITY; +CREATE POLICY knows_all ON rls_graph."KNOWS" + FOR SELECT USING (true); + +-- 16.1: startNode blocked by RLS - should error +SET ROLE rls_user1; + +-- rls_user1 can see the edge (Alice->Bob) but cannot see Bob (owned by rls_user2) +-- endNode should error because Bob is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(b) + RETURN endNode(e) +$$) AS (end_vertex agtype); + +-- 16.2: endNode blocked by RLS - should error +-- rls_user1 cannot see Bob, so startNode on an edge starting from Bob should error +SET ROLE rls_user2; + +-- rls_user2 can see Bob but not Alice (owned by rls_user1) +-- startNode should error because Alice is blocked by RLS +SELECT * FROM cypher('rls_graph', $$ + MATCH (a)-[e:KNOWS]->(b:Person {name: 'Bob'}) + RETURN startNode(e) +$$) AS (start_vertex agtype); + +-- 16.3: startNode/endNode succeed when RLS allows access +SET ROLE rls_user1; + +-- Alice->Charlie edge: rls_user1 owns both, should succeed +SELECT * FROM cypher('rls_graph', $$ + MATCH (a:Person {name: 'Alice'})-[e:KNOWS]->(c:Person {name: 'Charlie'}) + RETURN startNode(e).name, endNode(e).name +$$) AS (start_name agtype, end_name agtype); + +RESET ROLE; + +-- cleanup +DROP POLICY person_own ON rls_graph."Person"; +DROP POLICY knows_all ON rls_graph."KNOWS"; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- ============================================================================ +-- RLS CLEANUP +-- ============================================================================ + +RESET ROLE; + +-- Disable RLS on all tables +ALTER TABLE rls_graph."Person" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."Document" DISABLE ROW LEVEL SECURITY; +ALTER TABLE rls_graph."KNOWS" DISABLE ROW LEVEL SECURITY; + +-- Drop roles +DROP OWNED BY rls_user1 CASCADE; +DROP ROLE rls_user1; + +DROP OWNED BY rls_user2 CASCADE; +DROP ROLE rls_user2; + +DROP OWNED BY rls_admin CASCADE; +DROP ROLE rls_admin; + +-- Drop test graph +SELECT drop_graph('rls_graph', true); diff --git a/src/backend/executor/cypher_create.c b/src/backend/executor/cypher_create.c index 2031fe8d8..495eb3a08 100644 --- a/src/backend/executor/cypher_create.c +++ b/src/backend/executor/cypher_create.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -122,6 +123,12 @@ static void begin_cypher_create(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } } diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index f86c6126b..0b486ad5e 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -22,6 +22,9 @@ #include "executor/executor.h" #include "storage/bufmgr.h" #include "common/hashfn.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -371,6 +374,16 @@ static void process_delete_list(CustomScanState *node) ExprContext *econtext = css->css.ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; EState *estate = node->ss.ps.state; + HTAB *qual_cache = NULL; + HASHCTL hashctl; + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("delete_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); foreach(lc, css->delete_data->delete_items) { @@ -383,6 +396,7 @@ static void process_delete_list(CustomScanState *node) char *label_name; Integer *pos; int entity_position; + Oid relid; item = lfirst(lc); @@ -401,6 +415,7 @@ static void process_delete_list(CustomScanState *node) label_name = pnstrdup(label->val.string.val, label->val.string.len); resultRelInfo = create_entity_result_rel_info(estate, css->delete_data->graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); /* * Setup the scan key to require the id field on-disc to match the @@ -448,6 +463,36 @@ static void process_delete_list(CustomScanState *node) continue; } + /* Check RLS security quals (USING policy) before delete */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + bool found; + + /* Get cached security quals and slot for this label */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_DELETE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + entry->withCheckOptions = NIL; + entry->withCheckOptionExprs = NIL; + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + + /* Silently skip if USING policy filters out this row */ + if (!check_security_quals(entry->qualExprs, entry->slot, econtext)) + { + table_endscan(scan_desc); + destroy_entity_result_rel_info(resultRelInfo); + continue; + } + } + /* * For vertices, we insert the vertex ID in the hashtable * vertex_id_htab. This hashtable is used later to process @@ -467,6 +512,9 @@ static void process_delete_list(CustomScanState *node) table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } + + /* Clean up the cache */ + hash_destroy(qual_cache); } /* @@ -490,9 +538,14 @@ static void check_for_connected_edges(CustomScanState *node) TableScanDesc scan_desc; HeapTuple tuple; TupleTableSlot *slot; + Oid relid; + bool rls_enabled = false; + List *qualExprs = NIL; + ExprContext *econtext = NULL; resultRelInfo = create_entity_result_rel_info(estate, graph_name, label_name); + relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); estate->es_snapshot->curcid = GetCurrentCommandId(false); estate->es_output_cid = GetCurrentCommandId(false); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, @@ -501,6 +554,22 @@ static void check_for_connected_edges(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* + * For DETACH DELETE with RLS enabled, compile the security qual + * expressions once per label for efficient evaluation. + */ + if (css->delete_data->detach) + { + /* Setup RLS security quals for this label */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + rls_enabled = true; + econtext = css->css.ss.ps.ps_ExprContext; + qualExprs = setup_security_quals(resultRelInfo, estate, node, + CMD_DELETE); + } + } + /* for each row */ while (true) { @@ -538,6 +607,34 @@ static void check_for_connected_edges(CustomScanState *node) { if (css->delete_data->detach) { + AclResult aclresult; + + /* Check that the user has DELETE permission on the edge table */ + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_DELETE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, label_name); + } + + /* Check RLS security quals (USING policy) before delete */ + if (rls_enabled) + { + /* + * For DETACH DELETE, error out if edge RLS check fails. + * Unlike normal DELETE which silently skips, we cannot + * silently skip edges here as it would leave dangling + * edges pointing to deleted vertices. + */ + if (!check_security_quals(qualExprs, slot, econtext)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("cannot delete edge due to row-level security policy on \"%s\"", + label_name), + errhint("DETACH DELETE requires permission to delete all connected edges."))); + } + } + delete_entity(estate, resultRelInfo, tuple); } else diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 6cfa70d48..a1bb4686c 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -20,11 +20,12 @@ #include "postgres.h" #include "executor/executor.h" +#include "utils/datum.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" -#include "utils/datum.h" /* * The following structure is used to hold a single vertex or edge component @@ -182,6 +183,12 @@ static void begin_cypher_merge(CustomScanState *node, EState *estate, cypher_node->prop_expr_state = ExecInitExpr(cypher_node->prop_expr, (PlanState *)node); } + + /* Setup RLS WITH CHECK policies if RLS is enabled */ + if (check_enable_rls(rel->rd_id, InvalidOid, true) == RLS_ENABLED) + { + setup_wcos(cypher_node->resultRelInfo, estate, node, CMD_INSERT); + } } /* diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index 9fd599eed..a1063af32 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -19,8 +19,10 @@ #include "postgres.h" +#include "common/hashfn.h" #include "executor/executor.h" #include "storage/bufmgr.h" +#include "utils/rls.h" #include "executor/cypher_executor.h" #include "executor/cypher_utils.h" @@ -136,6 +138,13 @@ static HeapTuple update_entity_tuple(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + result = table_tuple_update(resultRelInfo->ri_RelationDesc, &tuple->t_self, elemTupleSlot, cid, estate->es_snapshot, @@ -372,9 +381,20 @@ static void process_update_list(CustomScanState *node) EState *estate = css->css.ss.ps.state; int *luindex = NULL; int lidx = 0; + HTAB *qual_cache = NULL; + HASHCTL hashctl; /* allocate an array to hold the last update index of each 'entity' */ luindex = palloc0(sizeof(int) * scanTupleSlot->tts_nvalid); + + /* Hash table for caching compiled security quals per label */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(Oid); + hashctl.entrysize = sizeof(RLSCacheEntry); + hashctl.hcxt = CurrentMemoryContext; + qual_cache = hash_create("update_qual_cache", 8, &hashctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + /* * Iterate through the SET items list and store the loop index of each * 'entity' update. As there is only one entry for each entity, this will @@ -522,6 +542,38 @@ static void process_update_list(CustomScanState *node) estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); + /* Setup RLS policies if RLS is enabled */ + if (check_enable_rls(resultRelInfo->ri_RelationDesc->rd_id, + InvalidOid, true) == RLS_ENABLED) + { + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + RLSCacheEntry *entry; + bool found; + + /* Get cached RLS state for this label, or set it up */ + entry = hash_search(qual_cache, &relid, HASH_ENTER, &found); + if (!found) + { + /* Setup WITH CHECK policies */ + setup_wcos(resultRelInfo, estate, node, CMD_UPDATE); + entry->withCheckOptions = resultRelInfo->ri_WithCheckOptions; + entry->withCheckOptionExprs = resultRelInfo->ri_WithCheckOptionExprs; + + /* Setup security quals */ + entry->qualExprs = setup_security_quals(resultRelInfo, estate, + node, CMD_UPDATE); + entry->slot = ExecInitExtraTupleSlot( + estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), + &TTSOpsHeapTuple); + } + else + { + /* Use cached WCOs */ + resultRelInfo->ri_WithCheckOptions = entry->withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = entry->withCheckOptionExprs; + } + } + /* * Now that we have the updated properties, create a either a vertex or * edge Datum for the in-memory update, and setup the tupleTableSlot @@ -597,8 +649,36 @@ static void process_update_list(CustomScanState *node) */ if (HeapTupleIsValid(heap_tuple)) { - heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, - heap_tuple); + bool should_update = true; + Oid relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + + /* Check RLS security quals (USING policy) before update */ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + RLSCacheEntry *entry; + + /* Entry was already created earlier when setting up WCOs */ + entry = hash_search(qual_cache, &relid, HASH_FIND, NULL); + if (!entry) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("missing RLS cache entry for relation %u", + relid))); + } + + ExecStoreHeapTuple(heap_tuple, entry->slot, false); + should_update = check_security_quals(entry->qualExprs, + entry->slot, + econtext); + } + + /* Silently skip if USING policy filters out this row */ + if (should_update) + { + heap_tuple = update_entity_tuple(resultRelInfo, slot, estate, + heap_tuple); + } } /* close the ScanDescription */ table_endscan(scan_desc); @@ -612,6 +692,10 @@ static void process_update_list(CustomScanState *node) /* increment loop index */ lidx++; } + + /* Clean up the cache */ + hash_destroy(qual_cache); + /* free our lookup array */ pfree_if_not_null(luindex); } diff --git a/src/backend/executor/cypher_utils.c b/src/backend/executor/cypher_utils.c index d7a55f709..eff829925 100644 --- a/src/backend/executor/cypher_utils.c +++ b/src/backend/executor/cypher_utils.c @@ -25,14 +25,35 @@ #include "postgres.h" #include "executor/executor.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_relation.h" +#include "rewrite/rewriteManip.h" +#include "rewrite/rowsecurity.h" +#include "utils/acl.h" +#include "utils/rls.h" #include "catalog/ag_label.h" #include "commands/label_commands.h" #include "executor/cypher_utils.h" #include "utils/ag_cache.h" +/* RLS helper function declarations */ +static void get_policies_for_relation(Relation relation, CmdType cmd, + Oid user_id, List **permissive_policies, + List **restrictive_policies); +static void add_with_check_options(Relation rel, int rt_index, WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, bool *hasSubLinks, + bool force_using); +static void add_security_quals(int rt_index, List *permissive_policies, + List *restrictive_policies, + List **securityQuals, bool *hasSubLinks); +static void sort_policies_by_name(List *policies); +static int row_security_policy_cmp(const ListCell *a, const ListCell *b); +static bool check_role_for_policy(ArrayType *policy_roles, Oid user_id); + /* * Given the graph name and the label name, create a ResultRelInfo for the table * those two variables represent. Open the Indices too. @@ -256,6 +277,13 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, ExecConstraints(resultRelInfo, elemTupleSlot, estate); } + /* Check RLS WITH CHECK policies if configured */ + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + ExecWithCheckOptions(WCO_RLS_INSERT_CHECK, resultRelInfo, + elemTupleSlot, estate); + } + /* Insert the tuple normally */ table_tuple_insert(resultRelInfo->ri_RelationDesc, elemTupleSlot, cid, 0, NULL); @@ -269,3 +297,754 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, return tuple; } + +/* + * setup_wcos + * + * WithCheckOptions are added during the rewrite phase, but since AGE uses + * CMD_SELECT for all queries, WCOs don't get added for CREATE/SET/MERGE + * operations. This function compensates by adding WCOs at execution time. + * + * Based on PostgreSQL's row security implementation in rowsecurity.c + */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *withCheckOptions = NIL; + List *wcoExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + WCOKind wco_kind; + bool hasSubLinks = false; + + /* Determine the WCO kind based on command type */ + if (cmd == CMD_INSERT) + { + wco_kind = WCO_RLS_INSERT_CHECK; + } + else if (cmd == CMD_UPDATE) + { + wco_kind = WCO_RLS_UPDATE_CHECK; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_wcos"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build WithCheckOptions from the policies */ + add_with_check_options(rel, rt_index, wco_kind, + permissive_policies, + restrictive_policies, + &withCheckOptions, + &hasSubLinks, + false); + + /* Compile the WCO expressions */ + foreach(lc, withCheckOptions) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, lc); + ExprState *wcoExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(wco->qual, List)) + { + wco->qual = (Node *) list_make1(wco->qual); + } + + wcoExpr = ExecInitQual((List *) wco->qual, (PlanState *) node); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + /* Set up the ResultRelInfo with WCOs */ + resultRelInfo->ri_WithCheckOptions = withCheckOptions; + resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; +} + +/* + * get_policies_for_relation + * + * Returns lists of permissive and restrictive policies to be applied to the + * specified relation, based on the command type and role. + * + * This includes any policies added by extensions. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id, + List **permissive_policies, + List **restrictive_policies) +{ + ListCell *item; + + *permissive_policies = NIL; + *restrictive_policies = NIL; + + /* No policies if RLS descriptor is not present */ + if (relation->rd_rsdesc == NULL) + { + return; + } + + /* First find all internal policies for the relation. */ + foreach(item, relation->rd_rsdesc->policies) + { + bool cmd_matches = false; + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + /* Always add ALL policies, if they exist. */ + if (policy->polcmd == '*') + { + cmd_matches = true; + } + else + { + /* Check whether the policy applies to the specified command type */ + switch (cmd) + { + case CMD_SELECT: + if (policy->polcmd == ACL_SELECT_CHR) + { + cmd_matches = true; + } + break; + case CMD_INSERT: + if (policy->polcmd == ACL_INSERT_CHR) + { + cmd_matches = true; + } + break; + case CMD_UPDATE: + if (policy->polcmd == ACL_UPDATE_CHR) + { + cmd_matches = true; + } + break; + case CMD_DELETE: + if (policy->polcmd == ACL_DELETE_CHR) + { + cmd_matches = true; + } + break; + case CMD_MERGE: + /* + * We do not support a separate policy for MERGE command. + * Instead it derives from the policies defined for other + * commands. + */ + break; + default: + elog(ERROR, "unrecognized policy command type %d", + (int) cmd); + break; + } + } + + /* + * Add this policy to the relevant list of policies if it applies to + * the specified role. + */ + if (cmd_matches && check_role_for_policy(policy->roles, user_id)) + { + if (policy->permissive) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + else + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + /* + * We sort restrictive policies by name so that any WCOs they generate are + * checked in a well-defined order. + */ + sort_policies_by_name(*restrictive_policies); + + /* + * Then add any permissive or restrictive policies defined by extensions. + * These are simply appended to the lists of internal policies, if they + * apply to the specified role. + */ + if (row_security_policy_hook_restrictive) + { + List *hook_policies = + (*row_security_policy_hook_restrictive) (cmd, relation); + + /* + * As with built-in restrictive policies, we sort any hook-provided + * restrictive policies by name also. Note that we also intentionally + * always check all built-in restrictive policies, in name order, + * before checking restrictive policies added by hooks, in name order. + */ + sort_policies_by_name(hook_policies); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *restrictive_policies = lappend(*restrictive_policies, policy); + } + } + } + + if (row_security_policy_hook_permissive) + { + List *hook_policies = + (*row_security_policy_hook_permissive) (cmd, relation); + + foreach(item, hook_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (check_role_for_policy(policy->roles, user_id)) + { + *permissive_policies = lappend(*permissive_policies, policy); + } + } + } +} + +/* + * add_with_check_options + * + * Add WithCheckOptions of the specified kind to check that new records + * added by an INSERT or UPDATE are consistent with the specified RLS + * policies. Normally new data must satisfy the WITH CHECK clauses from the + * policies. If a policy has no explicit WITH CHECK clause, its USING clause + * is used instead. In the special case of an UPDATE arising from an + * INSERT ... ON CONFLICT DO UPDATE, existing records are first checked using + * a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING + * clauses from RLS policies. + * + * New WCOs are added to withCheckOptions, and hasSubLinks is set to true if + * any of the check clauses added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_with_check_options(Relation rel, + int rt_index, + WCOKind kind, + List *permissive_policies, + List *restrictive_policies, + List **withCheckOptions, + bool *hasSubLinks, + bool force_using) +{ + ListCell *item; + List *permissive_quals = NIL; + +#define QUAL_FOR_WCO(policy) \ + ( !force_using && \ + (policy)->with_check_qual != NULL ? \ + (policy)->with_check_qual : (policy)->qual ) + + /* + * First collect up the permissive policy clauses, similar to + * add_security_quals. + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + permissive_quals = lappend(permissive_quals, copyObject(qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * There must be at least one permissive qual found or no rows are allowed + * to be added. This is the same as in add_security_quals. + * + * If there are no permissive_quals then we fall through and return a + * single 'false' WCO, preventing all new rows. + */ + if (permissive_quals != NIL) + { + /* + * Add a single WithCheckOption for all the permissive policy clauses, + * combining them together using OR. This check has no policy name, + * since if the check fails it means that no policy granted permission + * to perform the update, rather than any particular policy being + * violated. + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->cascaded = false; + + if (list_length(permissive_quals) == 1) + { + wco->qual = (Node *) linitial(permissive_quals); + } + else + { + wco->qual = (Node *) makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes(wco->qual, 1, rt_index, 0); + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + + /* + * Now add WithCheckOptions for each of the restrictive policy clauses + * (which will be combined together using AND). We use a separate + * WithCheckOption for each restrictive policy to allow the policy + * name to be included in error reports if the policy is violated. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual = QUAL_FOR_WCO(policy); + + if (qual != NULL) + { + qual = copyObject(qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = pstrdup(policy->policy_name); + wco->qual = (Node *) qual; + wco->cascaded = false; + + *withCheckOptions = list_append_unique(*withCheckOptions, wco); + *hasSubLinks |= policy->hassublinks; + } + } + } + else + { + /* + * If there were no policy clauses to check new data, add a single + * always-false WCO (a default-deny policy). + */ + WithCheckOption *wco; + + wco = makeNode(WithCheckOption); + wco->kind = kind; + wco->relname = pstrdup(RelationGetRelationName(rel)); + wco->polname = NULL; + wco->qual = (Node *) makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true); + wco->cascaded = false; + + *withCheckOptions = lappend(*withCheckOptions, wco); + } +} + +/* + * sort_policies_by_name + * + * This is only used for restrictive policies, ensuring that any + * WithCheckOptions they generate are applied in a well-defined order. + * This is not necessary for permissive policies, since they are all combined + * together using OR into a single WithCheckOption check. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +sort_policies_by_name(List *policies) +{ + list_sort(policies, row_security_policy_cmp); +} + +/* + * list_sort comparator to sort RowSecurityPolicy entries by name + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static int +row_security_policy_cmp(const ListCell *a, const ListCell *b) +{ + const RowSecurityPolicy *pa = (const RowSecurityPolicy *) lfirst(a); + const RowSecurityPolicy *pb = (const RowSecurityPolicy *) lfirst(b); + + /* Guard against NULL policy names from extensions */ + if (pa->policy_name == NULL) + { + return pb->policy_name == NULL ? 0 : 1; + } + if (pb->policy_name == NULL) + { + return -1; + } + + return strcmp(pa->policy_name, pb->policy_name); +} + +/* + * check_role_for_policy - + * determines if the policy should be applied for the current role + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static bool +check_role_for_policy(ArrayType *policy_roles, Oid user_id) +{ + int i; + Oid *roles = (Oid *) ARR_DATA_PTR(policy_roles); + + /* Quick fall-thru for policies applied to all roles */ + if (roles[0] == ACL_ID_PUBLIC) + { + return true; + } + + for (i = 0; i < ARR_DIMS(policy_roles)[0]; i++) + { + if (has_privs_of_role(user_id, roles[i])) + { + return true; + } + } + + return false; +} + +/* + * add_security_quals + * + * Add security quals to enforce the specified RLS policies, restricting + * access to existing data in a table. If there are no policies controlling + * access to the table, then all access is prohibited --- i.e., an implicit + * default-deny policy is used. + * + * New security quals are added to securityQuals, and hasSubLinks is set to + * true if any of the quals added contain sublink subqueries. + * + * Copied from PostgreSQL's src/backend/rewrite/rowsecurity.c + */ +static void +add_security_quals(int rt_index, + List *permissive_policies, + List *restrictive_policies, + List **securityQuals, + bool *hasSubLinks) +{ + ListCell *item; + List *permissive_quals = NIL; + Expr *rowsec_expr; + + /* + * First collect up the permissive quals. If we do not find any + * permissive policies then no rows are visible (this is handled below). + */ + foreach(item, permissive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + + if (policy->qual != NULL) + { + permissive_quals = lappend(permissive_quals, + copyObject(policy->qual)); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * We must have permissive quals, always, or no rows are visible. + * + * If we do not, then we simply return a single 'false' qual which results + * in no rows being visible. + */ + if (permissive_quals != NIL) + { + /* + * We now know that permissive policies exist, so we can now add + * security quals based on the USING clauses from the restrictive + * policies. Since these need to be combined together using AND, we + * can just add them one at a time. + */ + foreach(item, restrictive_policies) + { + RowSecurityPolicy *policy = (RowSecurityPolicy *) lfirst(item); + Expr *qual; + + if (policy->qual != NULL) + { + qual = copyObject(policy->qual); + ChangeVarNodes((Node *) qual, 1, rt_index, 0); + + *securityQuals = list_append_unique(*securityQuals, qual); + *hasSubLinks |= policy->hassublinks; + } + } + + /* + * Then add a single security qual combining together the USING + * clauses from all the permissive policies using OR. + */ + if (list_length(permissive_quals) == 1) + { + rowsec_expr = (Expr *) linitial(permissive_quals); + } + else + { + rowsec_expr = makeBoolExpr(OR_EXPR, permissive_quals, -1); + } + + ChangeVarNodes((Node *) rowsec_expr, 1, rt_index, 0); + *securityQuals = list_append_unique(*securityQuals, rowsec_expr); + } + else + { + /* + * A permissive policy must exist for rows to be visible at all. + * Therefore, if there were no permissive policies found, return a + * single always-false clause. + */ + *securityQuals = lappend(*securityQuals, + makeConst(BOOLOID, -1, InvalidOid, + sizeof(bool), BoolGetDatum(false), + false, true)); + } +} + +/* + * setup_security_quals + * + * Security quals (USING policies) are added during the rewrite phase, but + * since AGE uses CMD_SELECT for all queries, they don't get added for + * UPDATE/DELETE operations. This function sets up security quals at + * execution time to be evaluated against each tuple before modification. + * + * Returns a list of compiled ExprState for the security quals. + */ +List * +setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + List *qualExprs = NIL; + ListCell *lc; + Relation rel; + Oid user_id; + int rt_index; + bool hasSubLinks = false; + + /* Only UPDATE and DELETE have security quals */ + if (cmd != CMD_UPDATE && cmd != CMD_DELETE) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("unexpected command type for setup_security_quals"))); + } + + rel = resultRelInfo->ri_RelationDesc; + + /* If no RLS policies exist, return empty list */ + if (rel->rd_rsdesc == NULL) + { + return NIL; + } + + /* + * Use rt_index=1 since we're evaluating policies against a single relation. + * Policy quals are stored with varno=1, and we set ecxt_scantuple to the + * tuple we want to check, so keeping varno=1 is correct. + */ + rt_index = 1; + user_id = GetUserId(); + + /* Get the policies for the specified command type */ + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies */ + add_security_quals(rt_index, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* Compile the security qual expressions */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + + /* Ensure qual is a List for ExecInitQual */ + if (!IsA(qual, List)) + { + qual = (Expr *) list_make1(qual); + } + + qualExpr = ExecInitQual((List *) qual, (PlanState *) node); + qualExprs = lappend(qualExprs, qualExpr); + } + + return qualExprs; +} + +/* + * check_security_quals + * + * Evaluate security quals against a tuple. Returns true if all quals pass + * (row can be modified), false if any qual fails (row should be silently + * skipped). + * + * This matches PostgreSQL's behavior where USING expressions for UPDATE/DELETE + * silently filter rows rather than raising errors. + */ +bool +check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext) +{ + ListCell *lc; + TupleTableSlot *saved_scantuple; + bool result = true; + + if (qualExprs == NIL) + { + return true; + } + + /* Save and set up the scan tuple for expression evaluation */ + saved_scantuple = econtext->ecxt_scantuple; + econtext->ecxt_scantuple = slot; + + foreach(lc, qualExprs) + { + ExprState *qualExpr = (ExprState *) lfirst(lc); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + econtext->ecxt_scantuple = saved_scantuple; + return result; +} + +/* + * check_rls_for_tuple + * + * Check RLS policies for a tuple without needing full executor context. + * Used by standalone functions like startNode()/endNode() that access + * tables directly. + * + * Returns true if the tuple passes RLS checks (or if RLS is not enabled), + * false if the tuple should be filtered out. + */ +bool +check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd) +{ + List *permissive_policies; + List *restrictive_policies; + List *securityQuals = NIL; + ListCell *lc; + Oid user_id; + bool hasSubLinks = false; + bool result = true; + EState *estate; + ExprContext *econtext; + TupleTableSlot *slot; + + /* If RLS is not enabled, tuple passes */ + if (check_enable_rls(RelationGetRelid(rel), InvalidOid, true) != RLS_ENABLED) + { + return true; + } + + /* If no RLS policies exist on the relation, tuple passes */ + if (rel->rd_rsdesc == NULL) + { + return true; + } + + /* Get the policies for the specified command type */ + user_id = GetUserId(); + get_policies_for_relation(rel, cmd, user_id, + &permissive_policies, + &restrictive_policies); + + /* Build security quals from the policies (use rt_index=1) */ + add_security_quals(1, permissive_policies, restrictive_policies, + &securityQuals, &hasSubLinks); + + /* If no quals, tuple passes */ + if (securityQuals == NIL) + { + return true; + } + + /* Create minimal execution environment */ + estate = CreateExecutorState(); + econtext = CreateExprContext(estate); + + /* Create tuple slot and store the tuple */ + slot = MakeSingleTupleTableSlot(RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + econtext->ecxt_scantuple = slot; + + /* Compile and evaluate each qual */ + foreach(lc, securityQuals) + { + Expr *qual = (Expr *) lfirst(lc); + ExprState *qualExpr; + List *qualList; + + /* ExecPrepareQual expects a List */ + if (!IsA(qual, List)) + { + qualList = list_make1(qual); + } + else + { + qualList = (List *) qual; + } + + /* Use ExecPrepareQual for standalone expression evaluation */ + qualExpr = ExecPrepareQual(qualList, estate); + + if (!ExecQual(qualExpr, econtext)) + { + result = false; + break; + } + } + + /* Clean up */ + ExecDropSingleTupleTableSlot(slot); + FreeExprContext(econtext, true); + FreeExecutorState(estate); + + return result; +} diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 9960acd7b..991e3f785 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -346,6 +346,100 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Recursively searches through RTEs including subqueries. + */ +static bool +add_rte_permissions_recurse(List *rtable, List *rteperminfos, + Oid relid, AclMode permissions) +{ + ListCell *lc; + + /* First check the perminfos at this level */ + foreach(lc, rteperminfos) + { + RTEPermissionInfo *perminfo = lfirst(lc); + + if (perminfo->relid == relid) + { + perminfo->requiredPerms |= permissions; + return true; + } + } + + /* Then recurse into subqueries */ + foreach(lc, rtable) + { + RangeTblEntry *rte = lfirst(lc); + + if (rte->rtekind == RTE_SUBQUERY && rte->subquery != NULL) + { + if (add_rte_permissions_recurse(rte->subquery->rtable, + rte->subquery->rteperminfos, + relid, permissions)) + { + return true; + } + } + } + + return false; +} + +/* + * Add required permissions to the RTEPermissionInfo for a relation. + * Searches through p_rteperminfos and subqueries for a matching relOid + * and adds the specified permissions to requiredPerms. + */ +static void +add_rte_permissions(ParseState *pstate, Oid relid, AclMode permissions) +{ + add_rte_permissions_recurse(pstate->p_rtable, pstate->p_rteperminfos, + relid, permissions); +} + +/* + * Add required permissions to the label table for a given entity variable. + * Looks up the entity by variable name, extracts its label, and adds + * the specified permissions to the corresponding RTEPermissionInfo. + */ +static void +add_entity_permissions(cypher_parsestate *cpstate, char *var_name, + AclMode permissions) +{ + ParseState *pstate = (ParseState *)cpstate; + transform_entity *entity; + char *label = NULL; + Oid relid; + + entity = find_variable(cpstate, var_name); + if (entity == NULL) + { + return; + } + + if (entity->type == ENT_VERTEX) + { + label = entity->entity.node->label; + } + else if (entity->type == ENT_EDGE) + { + label = entity->entity.rel->label; + } + + if (label == NULL) + { + return; + } + + relid = get_label_relation(label, cpstate->graph_oid); + if (OidIsValid(relid)) + { + add_rte_permissions(pstate, relid, permissions); + } +} + /* * transform a cypher_clause */ @@ -1561,6 +1655,9 @@ static List *transform_cypher_delete_item_list(cypher_parsestate *cpstate, parser_errposition(pstate, col->location))); } + /* Add ACL_DELETE permission to the entity's label table */ + add_entity_permissions(cpstate, val->sval, ACL_DELETE); + add_volatile_wrapper_to_target_entry(query->targetList, resno); pos = makeInteger(resno); @@ -1726,6 +1823,9 @@ cypher_update_information *transform_cypher_remove_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); @@ -1903,6 +2003,9 @@ cypher_update_information *transform_cypher_set_item_list( parser_errposition(pstate, set_item->location))); } + /* Add ACL_UPDATE permission to the entity's label table */ + add_entity_permissions(cpstate, variable_name, ACL_UPDATE); + add_volatile_wrapper_to_target_entry(query->targetList, item->entity_position); diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index f2458a30b..c552727d8 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -44,7 +44,10 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/parse_coerce.h" +#include "nodes/nodes.h" +#include "utils/acl.h" #include "utils/builtins.h" +#include "executor/cypher_utils.h" #include "utils/float.h" #include "utils/lsyscache.h" #include "utils/snapmgr.h" @@ -5625,15 +5628,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, HeapTuple tuple; TupleDesc tupdesc; Datum id, properties, result; + AclResult aclresult; /* get the specific graph namespace (schema) */ Oid graph_namespace_oid = get_namespace_oid(graph, false); /* get the specific vertex label table (schema.vertex_label) */ Oid vertex_label_table_oid = get_relname_relid(vertex_label, - graph_namespace_oid); + graph_namespace_oid); /* get the active snapshot */ Snapshot snapshot = GetActiveSnapshot(); + /* check for SELECT permission on the table */ + aclresult = pg_class_aclcheck(vertex_label_table_oid, GetUserId(), + ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, vertex_label); + } + /* initialize the scan key */ ScanKeyInit(&scan_keys[0], 1, BTEqualStrategyNumber, F_OIDEQ, Int64GetDatum(graphid)); @@ -5646,11 +5658,24 @@ static Datum get_vertex(const char *graph, const char *vertex_label, /* bail if the tuple isn't valid */ if (!HeapTupleIsValid(tuple)) { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("graphid %lu does not exist", graphid))); } + /* Check RLS policies - error if filtered out */ + if (!check_rls_for_tuple(graph_vertex_label, tuple, CMD_SELECT)) + { + table_endscan(scan_desc); + table_close(graph_vertex_label, ShareLock); + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("access to vertex %lu denied by row-level security policy on \"%s\"", + graphid, vertex_label))); + } + /* get the tupdesc - we don't need to release this one */ tupdesc = RelationGetDescr(graph_vertex_label); /* bail if the number of columns differs */ diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 0798f153c..fc4067455 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -21,6 +21,7 @@ #define AG_CYPHER_UTILS_H #include "access/heapam.h" +#include "nodes/execnodes.h" #include "nodes/cypher_nodes.h" #include "utils/agtype.h" @@ -127,4 +128,25 @@ HeapTuple insert_entity_tuple_cid(ResultRelInfo *resultRelInfo, TupleTableSlot *elemTupleSlot, EState *estate, CommandId cid); +/* RLS support */ +void setup_wcos(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +List *setup_security_quals(ResultRelInfo *resultRelInfo, EState *estate, + CustomScanState *node, CmdType cmd); +bool check_security_quals(List *qualExprs, TupleTableSlot *slot, + ExprContext *econtext); +bool check_rls_for_tuple(Relation rel, HeapTuple tuple, CmdType cmd); + +/* Hash table entry for caching RLS state per label */ +typedef struct RLSCacheEntry +{ + Oid relid; /* hash key */ + /* Security quals (USING policies) for UPDATE/DELETE */ + List *qualExprs; + TupleTableSlot *slot; /* slot for old tuple (RLS check) */ + /* WCOs - used only in SET */ + List *withCheckOptions; + List *withCheckOptionExprs; +} RLSCacheEntry; + #endif