From 7b2234355b9434f9aca9339a04c8f295845339be Mon Sep 17 00:00:00 2001 From: Umar Hayat Date: Sat, 22 Feb 2025 03:20:38 +0900 Subject: [PATCH 1/9] Add support for PG17 (#2130) - A new node type is introduced for JSON support, that is JsonConstructorExpr - wrapper over FuncExpr/Aggref/WindowFunc for SQL/JSON constructors. - Added additional checks for JsonConstructorExpr expression node for which the walker would crash. - Removed palloc0fast function call (which is not available in PG17) --- src/backend/nodes/ag_nodes.c | 2 +- src/backend/parser/cypher_analyze.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/backend/nodes/ag_nodes.c b/src/backend/nodes/ag_nodes.c index e20670b2b..54bd27314 100644 --- a/src/backend/nodes/ag_nodes.c +++ b/src/backend/nodes/ag_nodes.c @@ -156,7 +156,7 @@ ExtensibleNode *_new_ag_node(Size size, ag_node_tag tag) { ExtensibleNode *n; - n = (ExtensibleNode *)palloc0fast(size); + n = (ExtensibleNode *)palloc0(size); n->type = T_ExtensibleNode; n->extnodename = node_names[tag]; diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 128acd0fb..d293df8b0 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -174,6 +174,8 @@ static bool convert_cypher_walker(Node *node, ParseState *pstate) * OpExpr - expression node for an operator invocation * Const - constant value or expression node * BoolExpr - expression node for the basic Boolean operators AND, OR, NOT + * JsonConstructorExpr - wrapper over FuncExpr/Aggref/WindowFunc for + * SQL/JSON constructors * * These are a special case that needs to be ignored. * @@ -181,7 +183,8 @@ static bool convert_cypher_walker(Node *node, ParseState *pstate) if (IsA(funcexpr, SQLValueFunction) || IsA(funcexpr, CoerceViaIO) || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) - || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr)) + || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) + || IsA(funcexpr, JsonConstructorExpr)) { return false; } @@ -346,6 +349,8 @@ static bool is_func_cypher(FuncExpr *funcexpr) * OpExpr - expression node for an operator invocation * Const - constant value or expression node * BoolExpr - expression node for the basic Boolean operators AND, OR, NOT + * JsonConstructorExpr - wrapper over FuncExpr/Aggref/WindowFunc for + * SQL/JSON constructors * * These are a special case that needs to be ignored. * @@ -353,7 +358,8 @@ static bool is_func_cypher(FuncExpr *funcexpr) if (IsA(funcexpr, SQLValueFunction) || IsA(funcexpr, CoerceViaIO) || IsA(funcexpr, Var) || IsA(funcexpr, OpExpr) - || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr)) + || IsA(funcexpr, Const) || IsA(funcexpr, BoolExpr) + || IsA(funcexpr, JsonConstructorExpr)) { return false; } From 43dcfa57f19a689ef696fad7d3d9b88401f7b2eb Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Tue, 4 Mar 2025 20:25:25 +0500 Subject: [PATCH 2/9] Update CI, README and repo settings for PG17 (#2156) - Currently, all workflows are targeting the `PG17_prepare` branch, which will be changed to `PG17` once the branch is renamed. - Updated all the github workflows - Updated the README - Updated repo settings - Updated the Dockerfiles --- .asf.yaml | 4 +++ .github/labeler.yml | 3 ++ .github/workflows/go-driver.yml | 4 +-- .github/workflows/installcheck.yaml | 41 ++++++++++++++++------------ .github/workflows/jdbc-driver.yaml | 4 +-- .github/workflows/nodejs-driver.yaml | 4 +-- .github/workflows/python-driver.yaml | 4 +-- README.md | 10 +++---- docker/Dockerfile | 12 ++++---- docker/Dockerfile.dev | 4 +-- drivers/docker-compose.yml | 2 +- 11 files changed, 52 insertions(+), 40 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index 049387b12..75419f24f 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -49,6 +49,10 @@ github: required_pull_request_reviews: required_approving_review_count: 2 + PG17: + required_pull_request_reviews: + required_approving_review_count: 2 + PG16: required_pull_request_reviews: required_approving_review_count: 2 diff --git a/.github/labeler.yml b/.github/labeler.yml index 92ab6db8e..6dfd5f530 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -13,5 +13,8 @@ PG14: PG15: - base-branch: 'PG15' +PG17: +- base-branch: 'PG17' + master: - base-branch: 'master' \ No newline at end of file diff --git a/.github/workflows/go-driver.yml b/.github/workflows/go-driver.yml index 5b6d15030..ea0092b6e 100644 --- a/.github/workflows/go-driver.yml +++ b/.github/workflows/go-driver.yml @@ -2,10 +2,10 @@ name: Go Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG17_prepare" ] pull_request: - branches: [ "master" ] + branches: [ "PG17_prepare" ] jobs: build: diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index dddefa48a..276b2709e 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -2,57 +2,62 @@ name: Build / Regression on: push: - branches: [ "master" ] + branches: [ "PG17_prepare" ] pull_request: - branches: [ "master" ] + branches: [ "PG17_prepare" ] jobs: build: runs-on: ubuntu-latest steps: - - name: Get latest commit id of PostgreSQL 16 + - name: Get latest commit id of PostgreSQL 17 run: | - echo "PG_COMMIT_HASH=$(git ls-remote git://git.postgresql.org/git/postgresql.git refs/heads/REL_16_STABLE | awk '{print $1}')" >> $GITHUB_ENV + echo "PG_COMMIT_HASH=$(git ls-remote git://git.postgresql.org/git/postgresql.git refs/heads/REL_17_STABLE | awk '{print $1}')" >> $GITHUB_ENV - - name: Cache PostgreSQL 16 + - name: Cache PostgreSQL 17 uses: actions/cache@v3 - id: pg16cache + id: pg17cache with: - path: ~/pg16 - key: ${{ runner.os }}-v1-pg16-${{ env.PG_COMMIT_HASH }} + path: ~/pg17 + key: ${{ runner.os }}-v1-pg17-${{ env.PG_COMMIT_HASH }} - - name: Install PostgreSQL 16 and some extensions - if: steps.pg16cache.outputs.cache-hit != 'true' + - name: Install dependencies run: | - git clone --depth 1 --branch REL_16_STABLE git://git.postgresql.org/git/postgresql.git ~/pg16source - cd ~/pg16source - ./configure --prefix=$HOME/pg16 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert + sudo apt-get update + sudo apt-get install -y build-essential libreadline-dev zlib1g-dev flex bison + + - name: Install PostgreSQL 17 and some extensions + if: steps.pg17cache.outputs.cache-hit != 'true' + run: | + git clone --depth 1 --branch REL_17_STABLE git://git.postgresql.org/git/postgresql.git ~/pg17source + cd ~/pg17source + ./configure --prefix=$HOME/pg17 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert make install -j$(nproc) > /dev/null cd contrib cd fuzzystrmatch - make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null cd ../pg_trgm - make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null - uses: actions/checkout@v3 - name: Build AGE id: build run: | - make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) + make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) - name: Pull and build pgvector id: pgvector run: | git clone https://github.com/pgvector/pgvector.git cd pgvector - make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null + make PG_CONFIG=$HOME/pg17/bin/pg_config install -j$(nproc) > /dev/null - name: Regression tests id: regression_tests run: | - make PG_CONFIG=$HOME/pg16/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" + make PG_CONFIG=$HOME/pg17/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" continue-on-error: true - name: Dump regression test errors diff --git a/.github/workflows/jdbc-driver.yaml b/.github/workflows/jdbc-driver.yaml index 29b368438..2b074b855 100644 --- a/.github/workflows/jdbc-driver.yaml +++ b/.github/workflows/jdbc-driver.yaml @@ -2,10 +2,10 @@ name: JDBC Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG17_prepare" ] pull_request: - branches: [ "master" ] + branches: [ "PG17_prepare" ] jobs: build: diff --git a/.github/workflows/nodejs-driver.yaml b/.github/workflows/nodejs-driver.yaml index 3d9e07023..8e8d2af67 100644 --- a/.github/workflows/nodejs-driver.yaml +++ b/.github/workflows/nodejs-driver.yaml @@ -2,10 +2,10 @@ name: Nodejs Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG17_prepare" ] pull_request: - branches: [ "master" ] + branches: [ "PG17_prepare" ] jobs: build: diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index 099b5c871..03f1ca84a 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -2,10 +2,10 @@ name: Python Driver Tests on: push: - branches: [ "master" ] + branches: [ "PG17_prepare" ] pull_request: - branches: [ "master" ] + branches: [ "PG17_prepare" ] jobs: build: diff --git a/README.md b/README.md index a89c6f65a..613a6643c 100644 --- a/README.md +++ b/README.md @@ -33,8 +33,8 @@   - - + +   @@ -125,7 +125,7 @@ Apache AGE is intended to be simple to install and run. It can be installed with  Install PostgreSQL -You will need to install an AGE compatible version of Postgres, for now AGE supports Postgres 11, 12, 13, 14, 15 & 16. Supporting the latest versions is on AGE roadmap. +You will need to install an AGE compatible version of Postgres, for now AGE supports Postgres 11, 12, 13, 14, 15, 16 & 17. Supporting the latest versions is on AGE roadmap.

 Installation via Package Manager @@ -143,7 +143,7 @@ sudo apt install postgresql  Installation From Source Code

-You can
download the Postgres source code and install your own instance of Postgres. You can read instructions on how to install from source code for different versions on the official Postgres Website. +You can download the Postgres source code and install your own instance of Postgres. You can read instructions on how to install from source code for different versions on the official Postgres Website. @@ -152,7 +152,7 @@ You can download the Postgres Clone the github repository or download the download an official release. -Run the pg_config utility and check the version of PostgreSQL. Currently, only PostgreSQL versions 11, 12, 13, 14, 15 & 16 are supported. If you have any other version of Postgres, you will need to install PostgreSQL version 11, 12, 13, 14, 15, or 16. +Run the pg_config utility and check the version of PostgreSQL. Currently, only PostgreSQL versions 11, 12, 13, 14, 15, 16 & 17 are supported. If you have any other version of Postgres, you will need to install PostgreSQL version 11, 12, 13, 14, 15, 16 & 17.
```bash diff --git a/docker/Dockerfile b/docker/Dockerfile index 336070589..91c626d63 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,14 +17,14 @@ # # Build stage: Install necessary development tools for compilation and installation -FROM postgres:16 AS build +FROM postgres:17 AS build RUN apt-get update \ && apt-get install -y --no-install-recommends --no-install-suggests \ bison \ build-essential \ flex \ - postgresql-server-dev-16 + postgresql-server-dev-17 COPY . /age @@ -34,7 +34,7 @@ RUN make && make install # Final stage: Create a final image by copying the files created in the build stage -FROM postgres:16 +FROM postgres:17 RUN apt-get update \ && apt-get install -y --no-install-recommends --no-install-suggests \ @@ -48,9 +48,9 @@ ENV LANG=en_US.UTF-8 ENV LC_COLLATE=en_US.UTF-8 ENV LC_CTYPE=en_US.UTF-8 -COPY --from=build /usr/lib/postgresql/16/lib/age.so /usr/lib/postgresql/16/lib/ -COPY --from=build /usr/share/postgresql/16/extension/age--1.5.0.sql /usr/share/postgresql/16/extension/ -COPY --from=build /usr/share/postgresql/16/extension/age.control /usr/share/postgresql/16/extension/ +COPY --from=build /usr/lib/postgresql/17/lib/age.so /usr/lib/postgresql/17/lib/ +COPY --from=build /usr/share/postgresql/17/extension/age--1.5.0.sql /usr/share/postgresql/17/extension/ +COPY --from=build /usr/share/postgresql/17/extension/age.control /usr/share/postgresql/17/extension/ COPY docker/docker-entrypoint-initdb.d/00-create-extension-age.sql /docker-entrypoint-initdb.d/00-create-extension-age.sql CMD ["postgres", "-c", "shared_preload_libraries=age"] diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev index bdf0c40d0..48b2db3ed 100644 --- a/docker/Dockerfile.dev +++ b/docker/Dockerfile.dev @@ -17,14 +17,14 @@ # -FROM postgres:16 +FROM postgres:17 RUN apt-get update RUN apt-get install --assume-yes --no-install-recommends --no-install-suggests \ bison \ build-essential \ flex \ - postgresql-server-dev-16 \ + postgresql-server-dev-17 \ locales ENV LANG=en_US.UTF-8 diff --git a/drivers/docker-compose.yml b/drivers/docker-compose.yml index 9ec072db5..3789fe4a9 100644 --- a/drivers/docker-compose.yml +++ b/drivers/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.3" services: db: - image: apache/age:dev_snapshot_master + image: apache/age:dev_snapshot_PG17_prepare environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=agens From ad425f4857623490bc8114f372595ad97795de8a Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Mon, 12 May 2025 19:40:33 +0500 Subject: [PATCH 3/9] Remove stale bot and update .asf.yaml settings (#2171) - Removed stale bot. (https://lists.apache.org/thread/qh4h2z6hsjy2v7wg8mwfnl6cbjp28y08) - Decrease required PR approvals by one. (https://lists.apache.org/thread/kmz155t6k0h3b26fjpz36924zthqjlpm) - Fixed a warning reported by apache infra i.e. "An error occurred while processing the github feature in .asf.yaml: GitHub discussions can only be enabled if a mailing list target exists for it." --- .asf.yaml | 15 ++++++++------- .github/workflows/stale.yaml | 26 -------------------------- 2 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 .github/workflows/stale.yaml diff --git a/.asf.yaml b/.asf.yaml index 75419f24f..da422d3bd 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -19,6 +19,7 @@ notifications: commits: commits@age.apache.org pullrequests: commits@age.apache.org + discussions: dev@age.apache.org github: description: "Graph database optimized for fast analysis and real-time data processing. @@ -47,7 +48,7 @@ github: protected_branches: master: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG17: required_pull_request_reviews: @@ -55,24 +56,24 @@ github: PG16: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG15: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG14: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG13: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG12: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 PG11: required_pull_request_reviews: - required_approving_review_count: 2 + required_approving_review_count: 1 diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml deleted file mode 100644 index cd3bcd16e..000000000 --- a/.github/workflows/stale.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: 'Close stale issues and PRs' -on: - schedule: - - cron: '0 0 * * *' - -jobs: - stale: - runs-on: ubuntu-latest - steps: - - uses: actions/stale@v9 - with: - stale-issue-message: 'This issue is stale because it has been open 60 days with no activity. Remove "Abondoned" label or comment or this will be closed in 14 days.' - close-issue-message: 'This issue was closed because it has been stalled for further 14 days with no activity.' - stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove "Abondoned" label or comment or this will be closed in 14 days.' - close-pr-message: 'This PR was closed because it has been stalled for further 14 days with no activity' - stale-issue-label: Stale - exempt-issue-labels: 'override-stale' - stale-pr-label: Stale - exempt-pr-labels: 'override-stale' - days-before-issue-stale: 60 - days-before-issue-close: 14 - days-before-pr-stale: 60 - days-before-pr-close: 14 - # only stale issue/PR created after the 1st Jan 2023: - start-date: '2023-01-01T00:00:00Z' - operations-per-run: 500 \ No newline at end of file From 1d957808c331e68b188e464dc41873fa2a605a70 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 6 Jun 2025 20:32:15 -0700 Subject: [PATCH 4/9] Update labeler.yml Adjust workflow/labeler.yml to add permissions. --- .github/workflows/labeler.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 4234e3582..5c302618e 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -5,8 +5,9 @@ on: jobs: triage: permissions: - contents: read + contents: write pull-requests: write + issues: write runs-on: ubuntu-latest steps: - name: Apply branch labels From 1d87379741a37e33990bda705d436a3db6c054de Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 8 Jun 2025 00:42:56 -0700 Subject: [PATCH 5/9] Adjust CI for PG17 after rename from PG17_prepare (#2182) Adjusted the following CI files (workflows) for PG17, they originally pointed to PG17_prepare - modified: .github/workflows/go-driver.yml modified: .github/workflows/installcheck.yaml modified: .github/workflows/jdbc-driver.yaml modified: .github/workflows/nodejs-driver.yaml modified: .github/workflows/python-driver.yaml modified: drivers/docker-compose.yml modified: .github/labeler.yml --- .github/labeler.yml | 5 ++++- .github/workflows/go-driver.yml | 4 ++-- .github/workflows/installcheck.yaml | 8 ++++---- .github/workflows/jdbc-driver.yaml | 4 ++-- .github/workflows/labeler.yml | 3 +++ .github/workflows/nodejs-driver.yaml | 4 ++-- .github/workflows/python-driver.yaml | 4 ++-- drivers/docker-compose.yml | 2 +- 8 files changed, 20 insertions(+), 14 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 6dfd5f530..6baa297c5 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -13,8 +13,11 @@ PG14: PG15: - base-branch: 'PG15' +PG16: +- base-branch: 'PG16' + PG17: - base-branch: 'PG17' master: -- base-branch: 'master' \ No newline at end of file +- base-branch: 'master' diff --git a/.github/workflows/go-driver.yml b/.github/workflows/go-driver.yml index ea0092b6e..64044f91e 100644 --- a/.github/workflows/go-driver.yml +++ b/.github/workflows/go-driver.yml @@ -2,10 +2,10 @@ name: Go Driver Tests on: push: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] pull_request: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] jobs: build: diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index 276b2709e..f2b69a02f 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -2,9 +2,9 @@ name: Build / Regression on: push: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] pull_request: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] jobs: build: @@ -13,7 +13,7 @@ jobs: steps: - name: Get latest commit id of PostgreSQL 17 run: | - echo "PG_COMMIT_HASH=$(git ls-remote git://git.postgresql.org/git/postgresql.git refs/heads/REL_17_STABLE | awk '{print $1}')" >> $GITHUB_ENV + echo "PG_COMMIT_HASH=$(git ls-remote https://git.postgresql.org/git/postgresql.git refs/heads/REL_17_STABLE | awk '{print $1}')" >> $GITHUB_ENV - name: Cache PostgreSQL 17 uses: actions/cache@v3 @@ -30,7 +30,7 @@ jobs: - name: Install PostgreSQL 17 and some extensions if: steps.pg17cache.outputs.cache-hit != 'true' run: | - git clone --depth 1 --branch REL_17_STABLE git://git.postgresql.org/git/postgresql.git ~/pg17source + git clone --depth 1 --branch REL_17_STABLE https://git.postgresql.org/git/postgresql.git ~/pg17source cd ~/pg17source ./configure --prefix=$HOME/pg17 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert make install -j$(nproc) > /dev/null diff --git a/.github/workflows/jdbc-driver.yaml b/.github/workflows/jdbc-driver.yaml index 2b074b855..54ca612f1 100644 --- a/.github/workflows/jdbc-driver.yaml +++ b/.github/workflows/jdbc-driver.yaml @@ -2,10 +2,10 @@ name: JDBC Driver Tests on: push: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] pull_request: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] jobs: build: diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 5c302618e..d5fc8c835 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -10,6 +10,9 @@ jobs: issues: write runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Apply branch labels uses: actions/labeler@v5.0.0 diff --git a/.github/workflows/nodejs-driver.yaml b/.github/workflows/nodejs-driver.yaml index 8e8d2af67..156cb8518 100644 --- a/.github/workflows/nodejs-driver.yaml +++ b/.github/workflows/nodejs-driver.yaml @@ -2,10 +2,10 @@ name: Nodejs Driver Tests on: push: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] pull_request: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] jobs: build: diff --git a/.github/workflows/python-driver.yaml b/.github/workflows/python-driver.yaml index 03f1ca84a..70488e738 100644 --- a/.github/workflows/python-driver.yaml +++ b/.github/workflows/python-driver.yaml @@ -2,10 +2,10 @@ name: Python Driver Tests on: push: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] pull_request: - branches: [ "PG17_prepare" ] + branches: [ "PG17" ] jobs: build: diff --git a/drivers/docker-compose.yml b/drivers/docker-compose.yml index 3789fe4a9..c83d26f47 100644 --- a/drivers/docker-compose.yml +++ b/drivers/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.3" services: db: - image: apache/age:dev_snapshot_PG17_prepare + image: apache/age:dev_snapshot_PG17 environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=agens From 99f2c23ca8bcb1320dcae751674a5bc0562f0528 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Tue, 17 Jun 2025 10:36:32 +0500 Subject: [PATCH 6/9] Reimplement list comprehension (#2169) (#2188) * Revert "Fix issue 1955 - List comprehension in WHERE clause (#2094)" This reverts commit 0f0d9be9ba02fb90272d2053986f2b5aa4a0c25c. * Revert "Fix error using list comprehension with WITH * (#1838)" This reverts commit 5e08a2f58693adca55085da8d56eb1831d963d20. * Revert "Fix shift/reduce conflict in grammar (#1719)" This reverts commit fab3119a109280fd63237ce17c6d4dd60b7dfc03. * Revert "Implement list comprehension (#1610)" This reverts commit 3b2b394eb669c4f80fc893ad224cf5ea4e10c5a9. * Reimplement list comprehension - Reimplement list comprehension to use ARRAY sublinks. - Some test results were not correct. All the test results that are modified are correct and are verified with neo4j. - Also resolves the issue of using list comprehension in MERGE clause (1611) and issue 1850 * Add expression tree walkers for cypher nodes - Added cypher_raw_expr_tree_walker and cypher_expr_tree_walker, based on Postgres's raw_expression_tree_walker and expression_tree_walker. These follow the same walker API as Postgres and add support for Cypher-specific nodes. - Also added the agtype[] to agtype func and typecast to 1.5.0-y.y.y.sql - Simplifies logic for cypher_subquery handling in where clause. - Fixes a crash when list comprehension in the WHERE clause references a variable from the preceding MATCH clause. --- age--1.5.0--y.y.y.sql | 13 +- regress/expected/list_comprehension.out | 102 ++++-- regress/sql/list_comprehension.sql | 16 +- sql/agtype_coercions.sql | 11 + sql/agtype_typecast.sql | 3 +- src/backend/nodes/ag_nodes.c | 2 + src/backend/nodes/cypher_outfuncs.c | 14 +- src/backend/optimizer/cypher_pathnode.c | 82 ++++- src/backend/parser/cypher_analyze.c | 416 +++++++----------------- src/backend/parser/cypher_clause.c | 320 +++++++++--------- src/backend/parser/cypher_expr.c | 67 +--- src/backend/parser/cypher_gram.y | 148 +++------ src/backend/parser/cypher_item.c | 242 +------------- src/backend/utils/adt/agtype.c | 55 +--- src/include/nodes/ag_nodes.h | 2 +- src/include/nodes/cypher_nodes.h | 14 +- src/include/nodes/cypher_outfuncs.h | 1 + src/include/parser/cypher_analyze.h | 18 +- src/include/parser/cypher_clause.h | 9 - src/include/parser/cypher_item.h | 2 - src/include/parser/cypher_parse_node.h | 1 - 21 files changed, 578 insertions(+), 960 deletions(-) diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 85c2db7a4..6b7560aa2 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -137,4 +137,15 @@ PARALLEL SAFE AS 'MODULE_PATHNAME'; CREATE CAST (agtype AS json) - WITH FUNCTION ag_catalog.agtype_to_json(agtype); \ No newline at end of file + WITH FUNCTION ag_catalog.agtype_to_json(agtype); + +CREATE FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]) + RETURNS agtype + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE CAST (agtype[] AS agtype) + WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); \ No newline at end of file diff --git a/regress/expected/list_comprehension.out b/regress/expected/list_comprehension.out index bf5731d2a..07f777707 100644 --- a/regress/expected/list_comprehension.out +++ b/regress/expected/list_comprehension.out @@ -166,9 +166,9 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH p=(u) RETURN [i IN range(0, SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list] $$) AS (result agtype); result ------------------------- + [0, 2, 4, 6, 8, 10, 12] [1, 3, 5, 7, 9, 11, 13] [] - [0, 2, 4, 6, 8, 10, 12] (3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHERE i % 3 = 0] $$) AS (result agtype); @@ -176,35 +176,40 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHER ------------ [0, 6, 12] [3, 9] -(2 rows) + [] +(3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHERE i % 3 = 0 | i/3] $$) AS (result agtype); result ----------- [0, 2, 4] [1, 3] -(2 rows) + [] +(3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHERE i % 3 = 0 | i/3][1] $$) AS (result agtype); result -------- 2 3 -(2 rows) + +(3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHERE i % 3 = 0 | i/3][0..2] $$) AS (result agtype); result -------- [0, 2] [1, 3] -(2 rows) + [] +(3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) RETURN [i IN u.list WHERE i % 3 = 0 | i/3][0..2][1] $$) AS (result agtype); result -------- 2 3 -(2 rows) + +(3 rows) -- Nested cases SELECT * FROM cypher('list_comprehension', $$ RETURN [i IN [i IN [1,2,3]]] $$) AS (result agtype); @@ -299,9 +304,9 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list@>[i IN rang SELECT * FROM cypher('list_comprehension', $$ MATCH (u) MATCH (v) WHERE v.list=[i IN u.list] RETURN v $$) AS (result agtype); result ----------------------------------------------------------------------------------------------- - {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex (3 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list:[i IN range(0,12,2)]}) RETURN u $$) AS (result agtype); @@ -330,11 +335,11 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list:[i IN range(1,13,2) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) MATCH (v {list:[i IN u.list]}) RETURN v $$) AS (result agtype); result ----------------------------------------------------------------------------------------------- - {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex - {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex + {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex (5 rows) SELECT * FROM cypher('list_comprehension', $$ CREATE (u {list:[i IN range(12,24,2)]}) RETURN u $$) AS (result agtype); @@ -371,26 +376,26 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH(u) WITH u WHERE u.list = [u SELECT * FROM cypher('list_comprehension', $$ MATCH(u) WITH u WHERE u.list = [u IN [0, 2, 4, 6, 8, 10, 12]] OR size(u.list) = 0 RETURN u $$) AS (u agtype); u ----------------------------------------------------------------------------------------------- - {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex (2 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH(u) WITH u WHERE u.list = [u IN [0, 2, 4, 6, 8, 10, 12]] OR size(u.list)::bool RETURN u $$) AS (u agtype); u -------------------------------------------------------------------------------------------------------- + {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex - {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex - {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex (6 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH(u) WITH u WHERE u.list = [u IN [0, 2, 4, 6, 8, 10, 12]] OR NOT size(u.list)::bool RETURN u $$) AS (u agtype); u ----------------------------------------------------------------------------------------------- - {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710657, "label": "", "properties": {"list": [0, 2, 4, 6, 8, 10, 12]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex (2 rows) SELECT * FROM cypher('list_comprehension', $$ CREATE(u:csm_match {list: ['abc', 'def', 'ghi']}) $$) AS (u agtype); @@ -473,9 +478,6 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH(u {list: [0, 2, 4, 6, 8, 10, ERROR: Invalid use of aggregation in this context LINE 1: ..., $$ MATCH(u {list: [0, 2, 4, 6, 8, 10, 12]}) SET u.c = coll... ^ --- Known issue -SELECT * FROM cypher('list_comprehension', $$ MERGE (u {list:[i IN [1,2,3]]}) RETURN u $$) AS (result agtype); -ERROR: Aggref found in non-Agg plan node -- List comprehension variable scoping SELECT * FROM cypher('list_comprehension', $$ WITH 1 AS m, [m IN [1, 2, 3]] AS n RETURN [m IN [1, 2, 3]] $$) AS (result agtype); result @@ -622,12 +624,12 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.lis result --------------------------------------------------------------------------------------------------------------------------------------------------------------- {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex - {"id": 281474976710657, "label": "", "properties": {"a": [], "b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex - {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex - {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex + {"id": 281474976710657, "label": "", "properties": {"a": [], "b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex {"id": 844424930131969, "label": "csm_match", "properties": {"list": ["abc", "def", "ghi"]}}::vertex (8 rows) @@ -635,11 +637,12 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE u.list=[i IN u.lis result -------------------------------------------------------------------------------------------------------- {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex - {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex -(5 rows) +(6 rows) SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE size([e in u.list where e starts with "a"])>0 RETURN u $$) AS (result agtype); result @@ -657,11 +660,64 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list WHERE result -------------------------------------------------------------------------------------------------------- {"id": 281474976710658, "label": "", "properties": {"list": [1, 3, 5, 7, 9, 11, 13]}}::vertex + {"id": 281474976710659, "label": "", "properties": {"list": []}}::vertex {"id": 281474976710660, "label": "", "properties": {"list": [12, 14, 16, 18, 20, 22, 24]}}::vertex + {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex {"id": 281474976710662, "label": "", "properties": {"list": [25.0, 49.0, 81.0, 121.0, 169.0]}}::vertex {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex - {"id": 281474976710661, "label": "", "properties": {"list": [6, 8, 10, 12]}}::vertex -(5 rows) +(6 rows) + +-- List comprehension in MERGE +SELECT * FROM cypher('list_comprehension', $$ MERGE (u {list:[i IN [1,2,3]]}) RETURN u $$) AS (result agtype); + result +--------------------------------------------------------------------------------- + {"id": 281474976710663, "label": "", "properties": {"list": [1, 2, 3]}}::vertex +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ MERGE (u ={list:[i IN [1,2,3] WHERE i>1]}) RETURN u $$) AS (result agtype); + result +------------------------------------------------------------------------------ + {"id": 281474976710666, "label": "", "properties": {"list": [2, 3]}}::vertex +(1 row) + +SELECT * FROM cypher('list_comprehension', $$ MERGE (u ={list:[i IN [1,2,3] WHERE i>1 | i^2]}) RETURN u $$) AS (result agtype); + result +---------------------------------------------------------------------------------- + {"id": 281474976710667, "label": "", "properties": {"list": [4.0, 9.0]}}::vertex +(1 row) + +-- Issue 1850 +SELECT * FROM cypher('list_comprehension', $$ CREATE (u {list: [0, 2, 4, 6, 8, 10, 12]}) $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('list_comprehension', $$ WITH [1, 2, 3] AS u UNWIND collect(u) AS v RETURN v $$) AS (result agtype); +ERROR: Invalid use of aggregation in this context +LINE 1: ...ist_comprehension', $$ WITH [1, 2, 3] AS u UNWIND collect(u)... + ^ +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WITH u, collect(u.list) AS v SET u += {b: [u IN range(0, 5)]} SET u.c = [u IN v[0]] RETURN u $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 281474976710657, "label": "", "properties": {"a": [], "b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex + {"id": 281474976710668, "label": "", "properties": {"b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex +(2 rows) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) SET u.c = collect(u.list) RETURN u $$) AS (u agtype); +ERROR: Invalid use of aggregation in this context +LINE 1: ... $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) SET u.c = coll... + ^ +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list = [u IN [1, u]] RETURN u $$) AS (u agtype); + u +--- +(0 rows) + +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list IN [u IN [1, u.list]] RETURN u $$) AS (u agtype); + u +--------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 281474976710657, "label": "", "properties": {"a": [], "b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex + {"id": 281474976710668, "label": "", "properties": {"b": [0, 1, 2, 3, 4, 5], "c": [0, 2, 4, 6, 8, 10, 12], "list": [0, 2, 4, 6, 8, 10, 12]}}::vertex +(2 rows) -- Clean up SELECT * FROM drop_graph('list_comprehension', true); diff --git a/regress/sql/list_comprehension.sql b/regress/sql/list_comprehension.sql index cb941a61b..572b2e6bb 100644 --- a/regress/sql/list_comprehension.sql +++ b/regress/sql/list_comprehension.sql @@ -117,9 +117,6 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH(u {list: [0, 2, 4, 6, 8, 10, -- invalid use of aggregation in SET SELECT * FROM cypher('list_comprehension', $$ MATCH(u {list: [0, 2, 4, 6, 8, 10, 12]}) SET u.c = collect(u.list) RETURN u $$) AS (u agtype); --- Known issue -SELECT * FROM cypher('list_comprehension', $$ MERGE (u {list:[i IN [1,2,3]]}) RETURN u $$) AS (result agtype); - -- List comprehension variable scoping SELECT * FROM cypher('list_comprehension', $$ WITH 1 AS m, [m IN [1, 2, 3]] AS n RETURN [m IN [1, 2, 3]] $$) AS (result agtype); SELECT * FROM cypher('list_comprehension', $$ WITH 1 AS m RETURN [m IN [1, 2, 3]], m $$) AS (result agtype, result2 agtype); @@ -164,5 +161,18 @@ SELECT * FROM cypher('list_comprehension', $$ MATCH (u) WHERE size([e in u.list SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list | i+1]}) RETURN u $$) AS (result agtype); SELECT * FROM cypher('list_comprehension', $$ MATCH (u ={list:[i IN u.list WHERE i>0]}) RETURN u$$) AS (result agtype); +-- List comprehension in MERGE +SELECT * FROM cypher('list_comprehension', $$ MERGE (u {list:[i IN [1,2,3]]}) RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MERGE (u ={list:[i IN [1,2,3] WHERE i>1]}) RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MERGE (u ={list:[i IN [1,2,3] WHERE i>1 | i^2]}) RETURN u $$) AS (result agtype); + +-- Issue 1850 +SELECT * FROM cypher('list_comprehension', $$ CREATE (u {list: [0, 2, 4, 6, 8, 10, 12]}) $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ WITH [1, 2, 3] AS u UNWIND collect(u) AS v RETURN v $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WITH u, collect(u.list) AS v SET u += {b: [u IN range(0, 5)]} SET u.c = [u IN v[0]] RETURN u $$) AS (result agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) SET u.c = collect(u.list) RETURN u $$) AS (u agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list = [u IN [1, u]] RETURN u $$) AS (u agtype); +SELECT * FROM cypher('list_comprehension', $$ MATCH (u {list: [0, 2, 4, 6, 8, 10, 12]}) WHERE u.list IN [u IN [1, u.list]] RETURN u $$) AS (u agtype); + -- Clean up SELECT * FROM drop_graph('list_comprehension', true); \ No newline at end of file diff --git a/sql/agtype_coercions.sql b/sql/agtype_coercions.sql index bdc33af80..c7895fabc 100644 --- a/sql/agtype_coercions.sql +++ b/sql/agtype_coercions.sql @@ -173,3 +173,14 @@ AS 'MODULE_PATHNAME'; CREATE CAST (agtype AS json) WITH FUNCTION ag_catalog.agtype_to_json(agtype); + +CREATE FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]) + RETURNS agtype + LANGUAGE c + IMMUTABLE +RETURNS NULL ON NULL INPUT +PARALLEL SAFE +AS 'MODULE_PATHNAME'; + +CREATE CAST (agtype[] AS agtype) + WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); diff --git a/sql/agtype_typecast.sql b/sql/agtype_typecast.sql index aa551407a..c29c0a657 100644 --- a/sql/agtype_typecast.sql +++ b/sql/agtype_typecast.sql @@ -181,8 +181,7 @@ CREATE FUNCTION ag_catalog.age_range(variadic "any") PARALLEL SAFE AS 'MODULE_PATHNAME'; -CREATE FUNCTION ag_catalog.age_unnest(agtype, - list_comprehension boolean = false) +CREATE FUNCTION ag_catalog.age_unnest(agtype) RETURNS SETOF agtype LANGUAGE c IMMUTABLE diff --git a/src/backend/nodes/ag_nodes.c b/src/backend/nodes/ag_nodes.c index 54bd27314..7aaaecaa5 100644 --- a/src/backend/nodes/ag_nodes.c +++ b/src/backend/nodes/ag_nodes.c @@ -48,6 +48,7 @@ const char *node_names[] = { "cypher_map_projection", "cypher_map_projection_element", "cypher_list", + "cypher_list_comprehension", "cypher_comparison_aexpr", "cypher_comparison_boolexpr", "cypher_string_match", @@ -115,6 +116,7 @@ const ExtensibleNodeMethods node_methods[] = { DEFINE_NODE_METHODS(cypher_map), DEFINE_NODE_METHODS(cypher_map_projection), DEFINE_NODE_METHODS(cypher_list), + DEFINE_NODE_METHODS(cypher_list_comprehension), DEFINE_NODE_METHODS(cypher_comparison_aexpr), DEFINE_NODE_METHODS(cypher_comparison_boolexpr), DEFINE_NODE_METHODS(cypher_string_match), diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index e245ba639..5bc824f12 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -117,7 +117,6 @@ void out_cypher_with(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_with); WRITE_BOOL_FIELD(distinct); - WRITE_BOOL_FIELD(subquery_intermediate); WRITE_NODE_FIELD(items); WRITE_NODE_FIELD(order_by); WRITE_NODE_FIELD(skip); @@ -176,9 +175,20 @@ void out_cypher_unwind(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_unwind); WRITE_NODE_FIELD(target); - WRITE_NODE_FIELD(collect); } +/* serialization function for the cypher_list_comprehension ExtensibleNode. */ +void out_cypher_list_comprehension(StringInfo str, const ExtensibleNode *node) +{ + DEFINE_AG_NODE(cypher_list_comprehension); + + WRITE_STRING_FIELD(varname); + WRITE_NODE_FIELD(expr); + WRITE_NODE_FIELD(where); + WRITE_NODE_FIELD(mapping_expr); +} + + /* serialization function for the cypher_delete ExtensibleNode. */ void out_cypher_merge(StringInfo str, const ExtensibleNode *node) { diff --git a/src/backend/optimizer/cypher_pathnode.c b/src/backend/optimizer/cypher_pathnode.c index a08cd30e4..5e4344254 100644 --- a/src/backend/optimizer/cypher_pathnode.c +++ b/src/backend/optimizer/cypher_pathnode.c @@ -23,6 +23,14 @@ #include "optimizer/cypher_createplan.h" #include "optimizer/cypher_pathnode.h" +#include "parser/cypher_analyze.h" +#include "executor/cypher_utils.h" +#include "optimizer/subselect.h" +#include "nodes/makefuncs.h" + +static Const *convert_sublink_to_subplan(PlannerInfo *root, + List *custom_private); +static bool expr_has_sublink(Node *node, void *context); const CustomPathMethods cypher_create_path_methods = { CREATE_PATH_NAME, plan_cypher_create_path, NULL}; @@ -183,10 +191,80 @@ CustomPath *create_cypher_merge_path(PlannerInfo *root, RelOptInfo *rel, /* Make the original paths the children of the new path */ cp->custom_paths = rel->pathlist; - /* Store the metadata Delete will need in the execution phase. */ - cp->custom_private = custom_private; + + /* + * Store the metadata Merge will need in the execution phase. + * We may have a sublink here in case the user used a list + * comprehension in merge. + */ + if (rel->subroot->parse->hasSubLinks) + { + cp->custom_private = list_make1(convert_sublink_to_subplan(root, custom_private)); + } + else + { + cp->custom_private = custom_private; + } + /* Tells Postgres how to turn this path to the correct CustomScan */ cp->methods = &cypher_merge_path_methods; return cp; } + +/* + * Deserializes the merge information and checks if any property + * expression (prop_expr) contains a SubLink. + * If found, converts the SubLink to a SubPlan, updates the + * structure accordingly, and serializes it back. + */ +static Const *convert_sublink_to_subplan(PlannerInfo *root, List *custom_private) +{ + cypher_merge_information *merge_information; + char *serialized_data = NULL; + Const *c = NULL; + ListCell *lc = NULL; + StringInfo str = makeStringInfo(); + + c = linitial(custom_private); + serialized_data = (char *)c->constvalue; + merge_information = stringToNode(serialized_data); + + Assert(is_ag_node(merge_information, cypher_merge_information)); + + /* Only part where we can expect a sublink is in prop_expr. */ + foreach (lc, merge_information->path->target_nodes) + { + cypher_target_node *node = (cypher_target_node *)lfirst(lc); + Node *prop_expr = (Node *) node->prop_expr; + + if (expr_has_sublink(prop_expr, NULL)) + { + node->prop_expr = (Expr *) SS_process_sublinks(root, prop_expr, false); + } + } + + /* Serialize the information again and return it. */ + outNode(str, (Node *)merge_information); + + return makeConst(INTERNALOID, -1, InvalidOid, str->len, + PointerGetDatum(str->data), false, false); +} + +/* + * Helper function to check if the node has a sublink. + */ +static bool expr_has_sublink(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, SubLink)) + { + return true; + } + + return cypher_expr_tree_walker(node, expr_has_sublink, context); +} diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index d293df8b0..78788649e 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -67,12 +67,6 @@ static Query *analyze_cypher_and_coerce(List *stmt, RangeTblFunction *rtfunc, const char *query_str, int query_loc, char *graph_name, uint32 graph_oid, Param *params); -cypher_clause *build_subquery_node(cypher_clause *next); - -/* expr tree walker */ -bool expr_contains_node(cypher_expression_condition is_expr, Node *expr); -bool expr_has_subquery(Node * expr); - void post_parse_analyze_init(void) { @@ -687,148 +681,40 @@ static int get_query_location(const int location, const char *source_str) return strchr(p + 1, '$') - source_str + 1; } + /* - * This is a specialized expression tree walker for finding exprs of a specified - * type. Create a function that checks for the type you want, and this function - * will iterate through the tree. + * This is an extension of postgres's raw_expression_tree_walker() function. + * It can walk cypher-specific nodes found in the expression tree during + * parse analysis. + * + * More nodes can be added to this function as needed. */ - -bool expr_contains_node(cypher_expression_condition is_expr, Node *expr) +bool cypher_raw_expr_tree_walker_impl(Node *node, + bool (*walker)(Node *node, void *context), + void *context) { - if (!expr) - { - return false; - } - - switch (nodeTag(expr)) - { - case T_A_Const: - case T_ColumnRef: - case T_A_Indirection: - { - break; - } - case T_A_Expr: - { - A_Expr *a = (A_Expr *)expr; - - switch (a->kind) - { - case AEXPR_OP: - case AEXPR_IN: - { - if (expr_contains_node(is_expr, a->lexpr) || - expr_contains_node(is_expr, a->rexpr)) - { - return true; - } - break; - } - default: - ereport(ERROR, (errmsg_internal("unrecognized A_Expr kind: %d", - a->kind))); - } - break; - } - case T_BoolExpr: - { - BoolExpr *b = (BoolExpr *)expr; - ListCell *la; - - foreach(la, b->args) - { - Node *arg = lfirst(la); - - if (expr_contains_node(is_expr, arg)) - { - return true; - } - } - break; - } - case T_NullTest: - { - NullTest *n = (NullTest *)expr; - - if (expr_contains_node(is_expr, (Node *)n->arg)) - { - return true; - } - break; - } - case T_CaseExpr: - { - CaseExpr *cexpr = (CaseExpr *)expr; - ListCell *l; - - if (cexpr->arg && expr_contains_node(is_expr, (Node *) cexpr->arg)) - { - return true; - } - - foreach(l, cexpr->args) - { - CaseWhen *w = lfirst_node(CaseWhen, l); - Node *warg; - - warg = (Node *) w->expr; - - if (expr_contains_node(is_expr, warg)) - { - return true; - } - warg = (Node *)w->result; - - if (expr_contains_node(is_expr, warg)) - { - return true; - } - } - - if (expr_contains_node(is_expr , (Node *)cexpr->defresult)) - { - return true; - } + ListCell *temp; - break; - } - case T_CaseTestExpr: - { - break; - } - case T_CoalesceExpr: - { - CoalesceExpr *cexpr = (CoalesceExpr *) expr; - ListCell *args; + if (node == NULL) + return false; - foreach(args, cexpr->args) - { - Node *e = (Node *)lfirst(args); +#define WALK(n) walker((Node *) (n), context) +#define LIST_WALK(l) cypher_raw_expr_tree_walker_impl((Node *) (l), walker, context) - if (expr_contains_node(is_expr, e)) - { - return true; - } - } - break; - } - case T_ExtensibleNode: + if (IsA(node, ExtensibleNode)) { - if (is_ag_node(expr, cypher_bool_const)) + if (is_ag_node(node, cypher_bool_const) || + is_ag_node(node, cypher_integer_const) || + is_ag_node(node, cypher_param) || + is_ag_node(node, cypher_sub_pattern) || + is_ag_node(node, cypher_sub_query)) + /* Add more non-recursible node types here as needed */ { - return is_expr(expr); - } - if (is_ag_node(expr, cypher_integer_const)) - { - return is_expr(expr); - } - if (is_ag_node(expr, cypher_param)) - { - return is_expr(expr); + return false; } - if (is_ag_node(expr, cypher_map)) + else if (is_ag_node(node, cypher_map)) { - cypher_map *cm = (cypher_map *)expr; + cypher_map *cm = (cypher_map *)node; ListCell *le; Assert(list_length(cm->keyvals) % 2 == 0); @@ -843,7 +729,7 @@ bool expr_contains_node(cypher_expression_condition is_expr, Node *expr) val = lfirst(le); - if (expr_contains_node(is_expr, val)) + if (WALK(val)) { return true; } @@ -851,216 +737,163 @@ bool expr_contains_node(cypher_expression_condition is_expr, Node *expr) le = lnext(cm->keyvals, le); } - break; } - if (is_ag_node(expr, cypher_map_projection)) + else if (is_ag_node(node, cypher_map_projection)) { - cypher_map_projection *cmp = (cypher_map_projection *)expr; - ListCell *lc; + cypher_map_projection *cmp = (cypher_map_projection *)node; - foreach(lc, cmp->map_elements) + if (LIST_WALK(cmp->map_elements)) { - cypher_map_projection_element *elem; - - elem = lfirst(lc); - - if (expr_contains_node(is_expr, elem->value)) - { - return true; - } + return true; } - - break; } - if (is_ag_node(expr, cypher_list)) + else if (is_ag_node(node, cypher_list)) { - cypher_list *cl = (cypher_list *)expr; - ListCell *le = NULL; - - foreach(le, cl->elems) + cypher_list *cl = (cypher_list *)node; + + if (LIST_WALK(cl->elems)) { - Node *texpr = lfirst(le); - - if (expr_contains_node(is_expr, texpr)) - { - return true; - } + return true; } - break; } - if (is_ag_node(expr, cypher_string_match)) + else if (is_ag_node(node, cypher_string_match)) { - cypher_string_match *csm = (cypher_string_match *)expr; + cypher_string_match *csm = (cypher_string_match *)node; - if (expr_contains_node(is_expr, csm->lhs) || - expr_contains_node(is_expr, csm->rhs)) + if (WALK(csm->lhs)) { return true; } - break; - } - if (is_ag_node(expr, cypher_typecast)) - { - cypher_typecast *t = (cypher_typecast *) expr; - if (expr_contains_node(is_expr, t->expr)) + if (WALK(csm->rhs)) { return true; } - break; } - if (is_ag_node(expr, cypher_comparison_aexpr)) + else if (is_ag_node(node, cypher_typecast)) { - cypher_comparison_aexpr *a = (cypher_comparison_aexpr *) expr; + cypher_typecast *t = (cypher_typecast *)node; - if (expr_contains_node(is_expr, a->lexpr) || - expr_contains_node(is_expr, a->rexpr)) + if (WALK(t->expr)) { return true; } - break; } - if (is_ag_node(expr, cypher_comparison_boolexpr)) + else if (is_ag_node(node, cypher_comparison_aexpr)) { - cypher_comparison_boolexpr *b = (cypher_comparison_boolexpr *) expr; - ListCell *la; + cypher_comparison_aexpr *a = (cypher_comparison_aexpr *)node; - foreach(la, b->args) + if (WALK(a->lexpr)) { - Node *arg = lfirst(la); + return true; + } - if (expr_contains_node(is_expr, arg)) - { - return true; - } + if (WALK(a->rexpr)) + { + return true; } - break; } - if (is_ag_node(expr, cypher_unwind)) + else if (is_ag_node(node, cypher_comparison_boolexpr)) { - cypher_unwind* lc = (cypher_unwind *)expr; - - if (expr_contains_node(is_expr, lc->where) || - expr_contains_node(is_expr, lc->collect)) + cypher_comparison_boolexpr *b = (cypher_comparison_boolexpr *)node; + + if (LIST_WALK(b->args)) { return true; } - break; } - - if (is_ag_node(expr, cypher_sub_pattern)) + else if (is_ag_node(node, cypher_unwind)) { - break; - } + cypher_unwind *unw = (cypher_unwind *)node; - if (is_ag_node(expr, cypher_sub_query)) - { - break; + if (WALK(unw->target)) + { + return true; + } } - ereport(ERROR, - (errmsg_internal("unrecognized ExtensibleNode: %s", - ((ExtensibleNode *)expr)->extnodename))); - - break; - } - case T_FuncCall: - { - FuncCall *fn = (FuncCall *)expr; - ListCell *arg; - - foreach(arg, fn->args) + else if (is_ag_node(node, cypher_list_comprehension)) { - Node *farg = NULL; + cypher_list_comprehension *lc = (cypher_list_comprehension *)node; - farg = (Node *)lfirst(arg); + if (WALK(lc->expr)) + { + return true; + } + + if (WALK(lc->where)) + { + return true; + } - if (expr_contains_node(is_expr, farg)) + if (WALK(lc->mapping_expr)) { return true; } } - break; + /* Add more node types here as needed */ + else + { + ereport(ERROR, + (errmsg_internal("unrecognized ExtensibleNode: %s", + ((ExtensibleNode *)node)->extnodename))); + } } - case T_SubLink: + /* + * postgres's raw expresssion tree walker does not handle List + */ + else if (IsA(node, List)) { - SubLink *s = (SubLink *)expr; - - if (expr_contains_node(is_expr, s->subselect)) + foreach(temp, (List *) node) { - return true; + if (WALK((Node *) lfirst(temp))) + return true; } - break; - } - default: - ereport(ERROR, (errmsg_internal("unrecognized node type: %d", - nodeTag(expr)))); } - return (is_expr(expr)); +#undef LIST_WALK + else + { + return raw_expression_tree_walker(node, walker, context); + } + + return false; } /* - * Function that checks if an expr is a cypher_sub_query. Used in tandem with - * expr_contains_node. Can write more similar to this to find similar nodes. + * This is an extension of postgres's expression_tree_walker() function. + * It is meant to walk cypher-specific nodes found in the expression tree + * post parse analysis. + * + * More nodes can be added to this function as needed. */ - -bool expr_has_subquery(Node * expr) +bool cypher_expr_tree_walker_impl(Node *node, + bool (*walker)(Node *node, void *context), + void *context) { - if (expr == NULL) + if (node == NULL) { return false; } - if (IsA(expr, ExtensibleNode)) - { - if (is_ag_node(expr, cypher_sub_query)) - { - return true; - } - } - return false; -} +#define LIST_WALK(l) cypher_expr_tree_walker_impl((Node *) (l), walker, context) -/* - * This function constructs an intermediate WITH node for processing subqueries - */ -cypher_clause *build_subquery_node(cypher_clause *next) -{ - cypher_match *match = (cypher_match *)next->self; - cypher_clause *where_container_clause; - cypher_with *with_clause = make_ag_node(cypher_with); - ColumnRef *cr; - ResTarget *rt; - - /* construct the column ref star */ - cr = makeNode(ColumnRef); - cr->fields = list_make1(makeNode(A_Star)); - cr->location = exprLocation((Node *)next); - - /*construct the restarget */ - rt = makeNode(ResTarget); - rt->name = NULL; - rt->indirection = NIL; - rt->val = (Node *)cr; - rt->location = exprLocation((Node *)next); - - - /* construct the with_clause */ - with_clause->items = list_make1(rt); - with_clause->where = match->where; - with_clause->subquery_intermediate = true; + if (IsA(node, ExtensibleNode)) + { + /* Add our nodes that can appear post parsing stage */ - /* - * create the where container, and set the match (next) as the - * prev of the where container - */ - where_container_clause = palloc(sizeof(*where_container_clause)); - where_container_clause->self = (Node *)with_clause; - where_container_clause->next = NULL; - where_container_clause->prev = next; + ereport(ERROR, + (errmsg_internal("unrecognized ExtensibleNode: %s", + ((ExtensibleNode *)node)->extnodename))); + } +#undef WALK +#undef LIST_WALK + else + { + return expression_tree_walker(node, walker, context); + } - return where_container_clause; + return false; } static Query *analyze_cypher(List *stmt, ParseState *parent_pstate, @@ -1089,23 +922,6 @@ static Query *analyze_cypher(List *stmt, ParseState *parent_pstate, next->self = lfirst(lc); next->prev = clause; - /* check for subqueries in match */ - if (is_ag_node(next->self, cypher_match)) - { - cypher_match *match = (cypher_match *)next->self; - - if (match->where != NULL && expr_contains_node(expr_has_subquery, match->where)) - { - /* advance the clause iterator to the intermediate clause position */ - clause = build_subquery_node(next); - - /* set the next of the match to the where_container_clause */ - match->where = NULL; - next->next = clause; - continue; - } - } - if (clause != NULL) { clause->next = next; diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index e301daa0f..172e6305a 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -248,6 +248,11 @@ static void get_res_cols(ParseState *pstate, ParseNamespaceItem *l_pnsi, /* unwind */ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, cypher_clause *clause); + +/* list comprehension */ +static Query *transform_cypher_list_comprehension(cypher_parsestate *cpstate, + cypher_clause *clause); + /* merge */ static Query *transform_cypher_merge(cypher_parsestate *cpstate, cypher_clause *clause); @@ -290,7 +295,7 @@ static Query *transform_cypher_call_subquery(cypher_parsestate *cpstate, #define transform_prev_cypher_clause(cpstate, prev_clause, add_rte_to_query) \ transform_cypher_clause_as_subquery(cpstate, transform_cypher_clause, \ prev_clause, NULL, add_rte_to_query) -ParseNamespaceItem +static ParseNamespaceItem *transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, transform_method transform, cypher_clause *clause, @@ -328,9 +333,9 @@ static void markRelsAsNulledBy(ParseState *pstate, Node *n, int jindex); /* for VLE support */ static ParseNamespaceItem *transform_RangeFunction(cypher_parsestate *cpstate, RangeFunction *r); -static Node *transform_VLE_Function(cypher_parsestate *cpstate, Node *n, - RangeTblEntry **top_rte, int *top_rti, - List **namespace); +static Node *transform_from_clause_item(cypher_parsestate *cpstate, Node *n, + RangeTblEntry **top_rte, int *top_rti, + List **namespace); static ParseNamespaceItem *append_VLE_Func_to_FromClause(cypher_parsestate *cpstate, Node *n); static void setNamespaceLateralState(List *namespace, bool lateral_only, @@ -338,6 +343,7 @@ static void setNamespaceLateralState(List *namespace, bool lateral_only, static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); +static bool has_list_comp_or_subquery(Node *expr, void *context); /* * transform a cypher_clause @@ -400,19 +406,16 @@ Query *transform_cypher_clause(cypher_parsestate *cpstate, } else if (is_ag_node(self, cypher_unwind)) { - cypher_unwind *n = (cypher_unwind *) self; - if (n->collect != NULL) - { - cpstate->p_list_comp = true; - } - result = transform_cypher_clause_with_where(cpstate, - transform_cypher_unwind, - clause, n->where); + result = transform_cypher_unwind(cpstate, clause); } else if (is_ag_node(self, cypher_call)) { result = transform_cypher_call_stmt(cpstate, clause); } + else if (is_ag_node(self, cypher_list_comprehension)) + { + result = transform_cypher_list_comprehension(cpstate, clause); + } else { ereport(ERROR, (errmsg_internal("unexpected Node for cypher_clause"))); @@ -448,23 +451,6 @@ static cypher_clause *make_cypher_clause(List *stmt) next->self = lfirst(lc); next->prev = clause; - /* check for subqueries in match */ - if (is_ag_node(next->self, cypher_match)) - { - cypher_match *match = (cypher_match *)next->self; - - if (match->where != NULL && expr_contains_node(expr_has_subquery, match->where)) - { - /* advance the clause iterator to the intermediate clause position */ - clause = build_subquery_node(next); - - /* set the next of the match to the where_container_clause */ - match->where = NULL; - next->next = clause; - continue; - } - } - if (clause != NULL) { clause->next = next; @@ -663,6 +649,7 @@ static Query *transform_cypher_union(cypher_parsestate *cpstate, qry->rteperminfos = pstate->p_rteperminfos; qry->jointree = makeFromExpr(pstate->p_joinlist, NULL); qry->hasAggs = pstate->p_hasAggs; + qry->hasSubLinks = pstate->p_hasSubLinks; assign_query_collations(pstate, qry); @@ -1268,6 +1255,7 @@ static Query *transform_cypher_call_subquery(cypher_parsestate *cpstate, query->rteperminfos = cpstate->pstate.p_rteperminfos; query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, (Node *)where_qual); query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; assign_query_collations(pstate, query); @@ -1340,6 +1328,8 @@ static Query *transform_cypher_delete(cypher_parsestate *cpstate, query->rtable = pstate->p_rtable; query->rteperminfos = pstate->p_rteperminfos; query->jointree = makeFromExpr(pstate->p_joinlist, NULL); + query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; return query; } @@ -1363,9 +1353,6 @@ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, Node *funcexpr; TargetEntry *te; ParseNamespaceItem *pnsi; - bool is_list_comp = self->collect != NULL; - bool has_agg = - is_list_comp || has_a_cypher_list_comprehension_node(self->target->val); query = makeNode(Query); query->commandType = CMD_SELECT; @@ -1394,13 +1381,14 @@ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, ereport(ERROR, (errcode(ERRCODE_DUPLICATE_ALIAS), errmsg("duplicate variable \"%s\"", self->target->name), - parser_errposition(pstate, target_syntax_loc))); + parser_errposition((ParseState *) cpstate, + target_syntax_loc))); } expr = transform_cypher_expr(cpstate, self->target->val, EXPR_KIND_SELECT_TARGET); - if (!has_agg && nodeTag(expr) == T_Aggref) + if (nodeTag(expr) == T_Aggref) { ereport(ERROR, errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Invalid use of aggregation in this context"), @@ -1413,12 +1401,11 @@ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, old_expr_kind = pstate->p_expr_kind; pstate->p_expr_kind = EXPR_KIND_SELECT_TARGET; funcexpr = ParseFuncOrColumn(pstate, unwind->funcname, - list_make2(expr, makeBoolConst(is_list_comp, false)), + list_make1(expr), pstate->p_last_srf, unwind, false, target_syntax_loc); pstate->p_expr_kind = old_expr_kind; - pstate->p_hasAggs = has_agg; te = makeTargetEntry((Expr *) funcexpr, (AttrNumber) pstate->p_next_resno++, @@ -1430,12 +1417,95 @@ static Query *transform_cypher_unwind(cypher_parsestate *cpstate, query->jointree = makeFromExpr(pstate->p_joinlist, NULL); query->hasTargetSRFs = pstate->p_hasTargetSRFs; query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; assign_query_collations(pstate, query); return query; } +/* + * [i IN u WHERE i<2 | i^2] + * + * | | | | | + * \|/ \|/ \|/ \|/ \|/ + * + * SELECT i^2 FROM age_unnest(u) AS i WHERE i>2; + */ +static Query *transform_cypher_list_comprehension(cypher_parsestate *cpstate, + cypher_clause *clause) +{ + Query *query; + RangeFunction *rf; + cypher_list_comprehension *list_comp = (cypher_list_comprehension *) clause->self; + FuncCall *func_call; + Node *return_expr, *qual, *n; + RangeTblEntry *rte = NULL; + int rtindex; + List *namespace = NULL; + TargetEntry *te; + cypher_parsestate *child_cpstate = make_cypher_parsestate(cpstate); + ParseState *child_pstate = (ParseState *) child_cpstate; + + query = makeNode(Query); + query->commandType = CMD_SELECT; + + func_call = makeFuncCall(list_make1(makeString("unnest")), + list_make1(list_comp->expr), + COERCE_SQL_SYNTAX, -1); + + rf = makeNode(RangeFunction); + rf->lateral = false; + rf->ordinality = false; + rf->is_rowsfrom = false; + rf->functions = list_make1(list_make2((Node *) func_call, NIL)); + rf->alias = makeAlias(list_comp->varname, NIL); + rf->coldeflist = NIL; + + n = transform_from_clause_item(child_cpstate, (Node *) rf, + &rte, &rtindex, &namespace); + checkNameSpaceConflicts(child_pstate, child_pstate->p_namespace, namespace); + child_pstate->p_joinlist = lappend(child_pstate->p_joinlist, n); + child_pstate->p_namespace = list_concat(child_pstate->p_namespace, namespace); + + /* make all namespace items unconditionally visible */ + setNamespaceLateralState(child_pstate->p_namespace, false, true); + + return_expr = transform_cypher_expr(child_cpstate, list_comp->mapping_expr, + EXPR_KIND_SELECT_TARGET); + + te = makeTargetEntry((Expr *) return_expr, + (AttrNumber) child_pstate->p_next_resno++, + list_comp->varname, false); + + qual = transform_cypher_expr(child_cpstate, list_comp->where, + EXPR_KIND_WHERE); + if (qual) + { + qual = coerce_to_boolean(child_pstate, qual, "WHERE"); + } + + query->targetList = lappend(query->targetList, te); + query->jointree = makeFromExpr(child_pstate->p_joinlist, (Node *) qual); + query->rtable = child_pstate->p_rtable; + query->rteperminfos = child_pstate->p_rteperminfos; + query->hasAggs = child_pstate->p_hasAggs; + query->hasSubLinks = child_pstate->p_hasSubLinks; + query->hasTargetSRFs = child_pstate->p_hasTargetSRFs; + + assign_query_collations(child_pstate, query); + + if (child_pstate->p_hasAggs || + query->groupClause || query->groupingSets || query->havingQual) + { + parse_check_aggregates(child_pstate, query); + } + + free_cypher_parsestate(child_cpstate); + + return query; +} + /* * Iterate through the list of items to delete and extract the variable name. * Then find the resno that the variable name belongs to. @@ -1572,6 +1642,8 @@ static Query *transform_cypher_set(cypher_parsestate *cpstate, query->rtable = pstate->p_rtable; query->rteperminfos = pstate->p_rteperminfos; query->jointree = makeFromExpr(pstate->p_joinlist, NULL); + query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; return query; } @@ -1845,12 +1917,7 @@ cypher_update_information *transform_cypher_set_item_list( EXPR_KIND_SELECT_TARGET, NULL, false); - if (has_a_cypher_list_comprehension_node(set_item->expr)) - { - query->hasAggs = true; - } - - if (!query->hasAggs && nodeTag(target_item->expr) == T_Aggref) + if (nodeTag(target_item->expr) == T_Aggref) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Invalid use of aggregation in this context"), @@ -2182,6 +2249,7 @@ static Query *transform_cypher_return(cypher_parsestate *cpstate, query->rteperminfos = pstate->p_rteperminfos; query->jointree = makeFromExpr(pstate->p_joinlist, NULL); query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; assign_query_collations(pstate, query); @@ -2370,7 +2438,6 @@ static Query *transform_cypher_clause_with_where(cypher_parsestate *cpstate, { ParseState *pstate = (ParseState *)cpstate; Query *query; - Node *self = clause->self; Node *where_qual = NULL; if (where) @@ -2383,10 +2450,8 @@ static Query *transform_cypher_clause_with_where(cypher_parsestate *cpstate, pnsi = transform_cypher_clause_as_subquery(cpstate, transform, clause, NULL, true); - Assert(pnsi != NULL); rtindex = list_length(pstate->p_rtable); - /* rte is the only RangeTblEntry in pstate */ if (rtindex != 1) { @@ -2401,79 +2466,15 @@ static Query *transform_cypher_clause_with_where(cypher_parsestate *cpstate, * next clause */ query->targetList = expandNSItemAttrs(pstate, pnsi, 0, true, -1); - markTargetListOrigins(pstate, query->targetList); - query->rtable = pstate->p_rtable; query->rteperminfos = pstate->p_rteperminfos; where_qual = transform_cypher_expr(cpstate, where, EXPR_KIND_WHERE); - where_qual = coerce_to_boolean(pstate, where_qual, "WHERE"); - /* check if we have a list comprehension in the where clause */ - if (cpstate->p_list_comp && - has_a_cypher_list_comprehension_node(where)) - { - List *groupClause = NIL; - ListCell *li; - bool has_a_star; - - has_a_star = false; - query->jointree = makeFromExpr(pstate->p_joinlist, NULL); - query->havingQual = where_qual; - - foreach (li, ((cypher_return *)self)->items) - { - ResTarget *item = lfirst(li); - ColumnRef *cref; - - /* - * We need to handle the case where the item is a A_star. In this - * case we will need to build group by using targetList. - */ - if (IsA(item->val, ColumnRef)) - { - cref = (ColumnRef *)item->val; - if (IsA(linitial(cref->fields), A_Star)) - { - has_a_star = true; - continue; - } - } - - groupClause = lappend(groupClause, item->val); - } - - /* - * If there is A_star flag, build the group by clause - * using the targetList. - */ - if (has_a_star) - { - ListCell *lc; - foreach (lc, query->targetList) - { - TargetEntry *te = lfirst(lc); - ColumnRef *cref = makeNode(ColumnRef); - - cref->fields = list_make1(makeString(te->resname)); - cref->location = exprLocation((Node *)te->expr); - - groupClause = lappend(groupClause, cref); - } - } - query->groupClause = transform_group_clause(cpstate, groupClause, - &query->groupingSets, - &query->targetList, - query->sortClause, - EXPR_KIND_GROUP_BY); - - } - else - { - query->jointree = makeFromExpr(pstate->p_joinlist, where_qual); - } + query->jointree = makeFromExpr(pstate->p_joinlist, where_qual); + assign_query_collations(pstate, query); } else { @@ -2484,8 +2485,6 @@ static Query *transform_cypher_clause_with_where(cypher_parsestate *cpstate, query->hasTargetSRFs = pstate->p_hasTargetSRFs; query->hasAggs = pstate->p_hasAggs; - assign_query_collations(pstate, query); - return query; } @@ -2493,6 +2492,7 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, cypher_clause *clause) { cypher_match *match_self = (cypher_match*) clause->self; + Node *where = match_self->where; if(!match_check_valid_label(match_self, cpstate)) { @@ -2510,9 +2510,39 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, (Node *)r, -1); } + if (has_list_comp_or_subquery((Node *)match_self->where, NULL)) + { + match_self->where = NULL; + return transform_cypher_clause_with_where(cpstate, + transform_cypher_match_pattern, clause, where); + } + return transform_cypher_match_pattern(cpstate, clause); } +/* + * Function that checks if an expr has a cypher_sub_query or + * cypher_list_comprehension. + */ +static bool has_list_comp_or_subquery(Node *expr, void *context) +{ + if (expr == NULL) + { + return false; + } + + if (IsA(expr, ExtensibleNode)) + { + if (is_ag_node(expr, cypher_sub_query) || + is_ag_node(expr, cypher_list_comprehension)) + { + return true; + } + } + + return cypher_raw_expr_tree_walker(expr, has_list_comp_or_subquery, context); +} + /* * Transform the clause into a subquery. This subquery will be used * in a join so setup the namespace item and the created the rtr @@ -2709,18 +2739,6 @@ static Query *transform_cypher_match_pattern(cypher_parsestate *cpstate, { cypher_clause *next = clause->next; - /* - * check if optional match has a subquery node-- it could still - * be following a match - */ - if(is_ag_node(next->self, cypher_with)) - { - cypher_with *next_with = (cypher_with *)next->self; - if (next_with->subquery_intermediate == true) - { - next = next->next; - } - } if (is_ag_node(next->self, cypher_match)) { cypher_match *next_self = (cypher_match *)next->self; @@ -2983,9 +3001,9 @@ static Query *transform_cypher_sub_query(cypher_parsestate *cpstate, * will transform the VLE function, depending on type. Currently, only * RangeFunctions are supported. But, others may be in the future. */ -static Node *transform_VLE_Function(cypher_parsestate *cpstate, Node *n, - RangeTblEntry **top_rte, int *top_rti, - List **namespace) +static Node *transform_from_clause_item(cypher_parsestate *cpstate, Node *n, + RangeTblEntry **top_rte, int *top_rti, + List **namespace) { ParseState *pstate = &cpstate->pstate; @@ -3053,7 +3071,7 @@ static ParseNamespaceItem *append_VLE_Func_to_FromClause(cypher_parsestate *cpst * Following PG's FROM clause logic, just in case we need to expand it in * the future, we process the items in another function. */ - n = transform_VLE_Function(cpstate, n, &rte, &rtindex, &namespace); + n = transform_from_clause_item(cpstate, n, &rte, &rtindex, &namespace); /* this should not happen */ Assert(n != NULL); @@ -3246,36 +3264,7 @@ static void transform_match_pattern(cypher_parsestate *cpstate, Query *query, query->rtable = cpstate->pstate.p_rtable; query->rteperminfos = cpstate->pstate.p_rteperminfos; - - if (cpstate->p_list_comp) - { - List *groupList = NIL; - - query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, NULL); - query->havingQual = (Node *)expr; - - foreach (lc, query->targetList) - { - TargetEntry *te = lfirst(lc); - ColumnRef *cref = makeNode(ColumnRef); - - cref->fields = list_make1(makeString(te->resname)); - cref->location = exprLocation((Node *)te->expr); - - groupList = lappend(groupList, cref); - } - - query->groupClause = transform_group_clause(cpstate, groupList, - &query->groupingSets, - &query->targetList, - query->sortClause, - EXPR_KIND_GROUP_BY); - } - else - { - query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, - (Node *)expr); - } + query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, (Node *)expr); } /* @@ -5645,6 +5634,7 @@ static Query *transform_cypher_create(cypher_parsestate *cpstate, query->rteperminfos = pstate->p_rteperminfos; query->jointree = makeFromExpr(pstate->p_joinlist, NULL); query->hasAggs = pstate->p_hasAggs; + query->hasSubLinks = pstate->p_hasSubLinks; return query; } @@ -6280,7 +6270,7 @@ static Expr *cypher_create_properties(cypher_parsestate *cpstate, * This function is similar to transformFromClause() that is called with a * single RangeSubselect. */ -ParseNamespaceItem * +static ParseNamespaceItem * transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, transform_method transform, cypher_clause *clause, @@ -6302,8 +6292,7 @@ transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, pstate->p_expr_kind == EXPR_KIND_OTHER || pstate->p_expr_kind == EXPR_KIND_WHERE || pstate->p_expr_kind == EXPR_KIND_SELECT_TARGET || - pstate->p_expr_kind == EXPR_KIND_FROM_SUBSELECT || - pstate->p_expr_kind == EXPR_KIND_INSERT_TARGET); + pstate->p_expr_kind == EXPR_KIND_FROM_SUBSELECT); /* * As these are all sub queries, if this is just of type NONE, note it as a @@ -6507,6 +6496,12 @@ Query *cypher_parse_sub_analyze(Node *parseTree, cypher_clause *clause; Query *query; + if (IsA(parseTree, Query)) + { + /* Already transformed, just return it */ + return (Query *)parseTree; + } + pstate->p_parent_cte = parentCTE; pstate->p_locked_from_parent = locked_from_parent; pstate->p_resolve_unknowns = resolve_unknowns; @@ -6657,7 +6652,6 @@ static Query *transform_cypher_merge(cypher_parsestate *cpstate, query->rteperminfos = pstate->p_rteperminfos; query->jointree = makeFromExpr(pstate->p_joinlist, NULL); query->hasAggs = pstate->p_hasAggs; - query->hasSubLinks = pstate->p_hasSubLinks; assign_query_collations(pstate, query); diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 16d6e3572..19bc71d42 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -98,8 +98,6 @@ static Node *transform_WholeRowRef(ParseState *pstate, ParseNamespaceItem *pnsi, static ArrayExpr *make_agtype_array_expr(List *args); static Node *transform_column_ref_for_indirection(cypher_parsestate *cpstate, ColumnRef *cr); -static Node *transform_cypher_list_comprehension(cypher_parsestate *cpstate, - cypher_unwind *expr); static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, FuncCall *fn, List *targs, Form_pg_proc procform, @@ -242,12 +240,6 @@ static Node *transform_cypher_expr_recurse(cypher_parsestate *cpstate, return transform_cypher_comparison_boolexpr(cpstate, (cypher_comparison_boolexpr *)expr); } - if (is_ag_node(expr, cypher_unwind)) - { - return transform_cypher_list_comprehension(cpstate, - (cypher_unwind *) expr); - } - ereport(ERROR, (errmsg_internal("unrecognized ExtensibleNode: %s", ((ExtensibleNode *)expr)->extnodename))); @@ -258,7 +250,9 @@ static Node *transform_cypher_expr_recurse(cypher_parsestate *cpstate, return transform_FuncCall(cpstate, (FuncCall *)expr); case T_SubLink: return transform_SubLink(cpstate, (SubLink *)expr); - break; + case T_Const: + /* Already transformed */ + return expr; default: ereport(ERROR, (errmsg_internal("unrecognized node type: %d", nodeTag(expr)))); @@ -391,26 +385,8 @@ static Node *transform_ColumnRef(cypher_parsestate *cpstate, ColumnRef *cref) Assert(IsA(field1, String)); colname = strVal(field1); - if (cpstate->p_list_comp && - (pstate->p_expr_kind == EXPR_KIND_WHERE || - pstate->p_expr_kind == EXPR_KIND_SELECT_TARGET) && - list_length(pstate->p_namespace) > 0) - { - /* - * Just scan through the last pnsi(that is for list comp) - * to find the column. - */ - node = scanNSItemForColumn(pstate, - llast(pstate->p_namespace), - 0, colname, cref->location); - } - else - { - /* Try to identify as an unqualified column */ - node = colNameToVar(pstate, colname, false, - cref->location); - } - + /* Try to identify as an unqualified column */ + node = colNameToVar(pstate, colname, false, cref->location); if (node != NULL) { break; @@ -1322,7 +1298,7 @@ static Node *transform_column_ref_for_indirection(cypher_parsestate *cpstate, } /* find the properties column of the NSI and return a var for it */ - node = scanNSItemForColumn(pstate, pnsi, levels_up, "properties", + node = scanNSItemForColumn(pstate, pnsi, levels_up, "properties", cr->location); /* @@ -2373,6 +2349,7 @@ static Node *transform_SubLink(cypher_parsestate *cpstate, SubLink *sublink) case EXPR_KIND_SELECT_TARGET: case EXPR_KIND_FROM_SUBSELECT: case EXPR_KIND_WHERE: + case EXPR_KIND_INSERT_TARGET: /* okay */ break; default: @@ -2412,7 +2389,7 @@ static Node *transform_SubLink(cypher_parsestate *cpstate, SubLink *sublink) sublink->operName = NIL; } else if (sublink->subLinkType == EXPR_SUBLINK || - sublink->subLinkType == ARRAY_SUBLINK) + sublink->subLinkType == ARRAY_SUBLINK) { /* * Make sure the subselect delivers a single column (ignoring resjunk @@ -2444,31 +2421,3 @@ static Node *transform_SubLink(cypher_parsestate *cpstate, SubLink *sublink) return result; } - -static Node *transform_cypher_list_comprehension(cypher_parsestate *cpstate, - cypher_unwind *unwind) -{ - cypher_clause cc; - Node* expr; - ParseNamespaceItem *pnsi; - ParseState *pstate = (ParseState *)cpstate; - - cpstate->p_list_comp = true; - pstate->p_lateral_active = true; - - cc.prev = NULL; - cc.next = NULL; - cc.self = (Node *)unwind; - - pnsi = transform_cypher_clause_as_subquery(cpstate, - transform_cypher_clause, - &cc, NULL, true); - - expr = transform_cypher_expr(cpstate, unwind->collect, - EXPR_KIND_SELECT_TARGET); - - pnsi->p_cols_visible = false; - pstate->p_lateral_active = false; - - return expr; -} diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 6cb15e505..a4d8f0f33 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -21,6 +21,7 @@ #include "postgres.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "parser/parser.h" #include "parser/cypher_gram.h" @@ -263,15 +264,9 @@ static Node *build_comparison_expression(Node *left_grammar_node, char *opr_name, int location); /* list_comprehension */ -static Node *verify_rule_as_list_comprehension(Node *expr, Node *expr2, - Node *where, Node *mapping_expr, - int var_loc, int expr_loc, - int where_loc, int mapping_loc); - -static Node *build_list_comprehension_node(ColumnRef *var_name, Node *expr, +static Node *build_list_comprehension_node(Node *var, Node *expr, Node *where, Node *mapping_expr, - int var_loc, int expr_loc, - int where_loc,int mapping_loc); + int location); %} %% @@ -1062,8 +1057,6 @@ unwind: n = make_ag_node(cypher_unwind); n->target = res; - n->where = NULL; - n->collect = NULL; $$ = (Node *) n; } @@ -2067,6 +2060,7 @@ expr_literal: | map | map_projection | list + | list_comprehension ; map: @@ -2186,7 +2180,6 @@ list: $$ = (Node *)n; } - | list_comprehension ; /* @@ -2197,40 +2190,28 @@ list: list_comprehension: '[' expr IN expr ']' { - Node *n = $2; - Node *result = NULL; - /* - * If the first expr is a ColumnRef(variable), then the rule - * should evaluate as a list comprehension. Otherwise, it should - * evaluate as an IN operator. + * If the first expr is not a ColumnRef(variable), then the rule + * should evaluate as an IN operator. */ - if (nodeTag(n) == T_ColumnRef) + if (!IsA($2, ColumnRef)) { - ColumnRef *cref = (ColumnRef *)n; - result = build_list_comprehension_node(cref, $4, NULL, NULL, - @2, @4, 0, 0); + $$ = (Node *)makeSimpleA_Expr(AEXPR_IN, "=", $2, $4, @3); } - else - { - result = (Node *)makeSimpleA_Expr(AEXPR_IN, "=", n, $4, @3); - } - $$ = result; + + $$ = build_list_comprehension_node($2, $4, NULL, NULL, @1); } | '[' expr IN expr WHERE expr ']' { - $$ = verify_rule_as_list_comprehension($2, $4, $6, NULL, - @2, @4, @6, 0); + $$ = build_list_comprehension_node($2, $4, $6, NULL, @1); } | '[' expr IN expr '|' expr ']' { - $$ = verify_rule_as_list_comprehension($2, $4, NULL, $6, - @2, @4, 0, @6); + $$ = build_list_comprehension_node($2, $4, NULL, $6, @1); } | '[' expr IN expr WHERE expr '|' expr ']' { - $$ = verify_rule_as_list_comprehension($2, $4, $6, $8, - @2, @4, @6, @8); + $$ = build_list_comprehension_node($2, $4, $6, $8, @1); } ; @@ -2881,6 +2862,10 @@ static FuncCall *node_to_agtype(Node * fnode, char *type, int location) { funcname = lappend(funcname, makeString("bool_to_agtype")); } + else if (pg_strcasecmp(type, "agtype[]") == 0) + { + funcname = lappend(funcname, makeString("agtype_array_to_agtype")); + } else { ereport(ERROR, @@ -3275,90 +3260,51 @@ static cypher_relationship *build_VLE_relation(List *left_arg, return cr; } -/* Helper function to verify that the rule is a list comprehension */ -static Node *verify_rule_as_list_comprehension(Node *expr, Node *expr2, - Node *where, Node *mapping_expr, - int var_loc, int expr_loc, - int where_loc, int mapping_loc) +/* helper function to build a list_comprehension grammar node */ +static Node *build_list_comprehension_node(Node *var, Node *expr, + Node *where, Node *mapping_expr, + int location) { - Node *result = NULL; + SubLink *sub; + String *val; + ColumnRef *cref = NULL; + cypher_list_comprehension *list_comp = NULL; - /* - * If the first expression is a ColumnRef, then we can build a - * list_comprehension node. - * Else its an invalid use of IN operator. - */ - if (nodeTag(expr) == T_ColumnRef) - { - ColumnRef *cref = (ColumnRef *)expr; - result = build_list_comprehension_node(cref, expr2, where, - mapping_expr, var_loc, - expr_loc, where_loc, - mapping_loc); - } - else + if (!IsA(var, ColumnRef)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("Syntax error at or near IN"))); } - return result; -} -/* helper function to build a list_comprehension grammar node */ -static Node *build_list_comprehension_node(ColumnRef *cref, Node *expr, - Node *where, Node *mapping_expr, - int var_loc, int expr_loc, - int where_loc, int mapping_loc) -{ - ResTarget *res = NULL; - cypher_unwind *unwind = NULL; - char *var_name = NULL; - String *val; - - /* Extract name from cref */ + cref = (ColumnRef *)var; val = linitial(cref->fields); - if (!IsA(val, String)) { ereport(ERROR, - (errmsg_internal("unexpected Node for cypher_clause"))); + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("Invalid list comprehension variable name"))); } - var_name = val->sval; + /* build the list comprehension node */ + list_comp = make_ag_node(cypher_list_comprehension); + list_comp->varname = val->sval; + list_comp->expr = expr; + list_comp->where = where; + list_comp->mapping_expr = (mapping_expr != NULL) ? mapping_expr : + (Node *) cref; /* - * Build the ResTarget node for the UNWIND variable var_name attached to - * expr. + * Build an ARRAY sublink and attach list_comp as sub-select, + * it will be transformed in to query tree by us and reattached for + * pg to process. */ - res = makeNode(ResTarget); - res->name = var_name; - res->val = (Node *)expr; - res->location = expr_loc; - - /* build the UNWIND node */ - unwind = make_ag_node(cypher_unwind); - unwind->target = res; - unwind->where = where; - - /* if there is a mapping function, add its arg to collect */ - if (mapping_expr != NULL) - { - unwind->collect = make_function_expr(list_make1(makeString("collect")), - list_make1(mapping_expr), - mapping_loc); - } - /* - * Otherwise, we need to add in the ColumnRef of the variable var_name as - * the arg to collect instead. This implies that the RETURN variable is - * var_name. - */ - else - { - unwind->collect = make_function_expr(list_make1(makeString("collect")), - list_make1(cref), mapping_loc); - } - - /* return the UNWIND node */ - return (Node *)unwind; + sub = makeNode(SubLink); + sub->subLinkType = ARRAY_SUBLINK; + sub->subLinkId = 0; + sub->testexpr = NULL; + sub->subselect = (Node *)list_comp; + sub->location = location; + + return (Node *) node_to_agtype((Node *)sub, "agtype[]", location); } diff --git a/src/backend/parser/cypher_item.c b/src/backend/parser/cypher_item.c index c32b46c46..c2feb2720 100644 --- a/src/backend/parser/cypher_item.c +++ b/src/backend/parser/cypher_item.c @@ -27,17 +27,14 @@ #include "nodes/makefuncs.h" #include "parser/parse_relation.h" #include "parser/parse_target.h" -#include "miscadmin.h" #include "parser/cypher_expr.h" #include "parser/cypher_item.h" -#include "parser/cypher_clause.h" static List *ExpandAllTables(ParseState *pstate, int location); static List *expand_pnsi_attrs(ParseState *pstate, ParseNamespaceItem *pnsi, int sublevels_up, bool require_col_privs, int location); -bool has_a_cypher_list_comprehension_node(Node *expr); /* see transformTargetEntry() */ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node, @@ -45,17 +42,10 @@ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node, char *colname, bool resjunk) { ParseState *pstate = (ParseState *)cpstate; - bool old_p_lateral_active = pstate->p_lateral_active; - - /* we want to see lateral variables */ - pstate->p_lateral_active = true; if (!expr) expr = transform_cypher_expr(cpstate, node, expr_kind); - /* set lateral back to what it was */ - pstate->p_lateral_active = old_p_lateral_active; - if (!colname && !resjunk) colname = FigureColname(node); @@ -63,143 +53,6 @@ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node, colname, resjunk); } -/* - * Helper function to determine if the passed node has a list_comprehension - * node embedded in it. - */ -bool has_a_cypher_list_comprehension_node(Node *expr) -{ - /* return false on NULL input */ - if (expr == NULL) - { - return false; - } - - /* since this function recurses, it could be driven to stack overflow */ - check_stack_depth(); - - switch (nodeTag(expr)) - { - case T_A_Expr: - { - /* - * We need to recurse into the left and right nodes - * to check if there is an unwind node in there - */ - A_Expr *a_expr = (A_Expr *)expr; - - return (has_a_cypher_list_comprehension_node(a_expr->lexpr) || - has_a_cypher_list_comprehension_node(a_expr->rexpr)); - } - case T_BoolExpr: - { - BoolExpr *bexpr = (BoolExpr *)expr; - ListCell *lc; - - /* is any of the boolean expression argument a list comprehension? */ - foreach(lc, bexpr->args) - { - Node *arg = lfirst(lc); - - if (has_a_cypher_list_comprehension_node(arg)) - { - return true; - } - } - break; - } - case T_A_Indirection: - { - /* set expr to the object of the indirection */ - expr = ((A_Indirection *)expr)->arg; - - /* check the object of the indirection */ - return has_a_cypher_list_comprehension_node(expr); - } - case T_ExtensibleNode: - { - if (is_ag_node(expr, cypher_unwind)) - { - cypher_unwind *cu = (cypher_unwind *)expr; - - /* it is a list comprehension if it has a collect node */ - return cu->collect != NULL; - } - else if (is_ag_node(expr, cypher_map)) - { - cypher_map *map; - int i; - - map = (cypher_map *)expr; - - if (map->keyvals == NULL || map->keyvals->length == 0) - { - return false; - } - - /* check each key and value for a list comprehension */ - for (i = 0; i < map->keyvals->length; i += 2) - { - Node *val; - - /* get the value */ - val = (Node *)map->keyvals->elements[i + 1].ptr_value; - - /* check the value */ - if (has_a_cypher_list_comprehension_node(val)) - { - return true; - } - } - } - else if (is_ag_node(expr, cypher_string_match)) - { - cypher_string_match *csm_match = (cypher_string_match *)expr; - - /* is lhs or rhs of the string match a list comprehension? */ - return (has_a_cypher_list_comprehension_node(csm_match->lhs) || - has_a_cypher_list_comprehension_node(csm_match->rhs)); - } - else if (is_ag_node(expr, cypher_typecast)) - { - cypher_typecast *ctypecast = (cypher_typecast *)expr; - - /* is expr being typecasted a list comprehension? */ - return has_a_cypher_list_comprehension_node(ctypecast->expr); - } - else if (is_ag_node(expr, cypher_comparison_aexpr)) - { - cypher_comparison_aexpr *aexpr = (cypher_comparison_aexpr *)expr; - - /* is left or right argument a list comprehension? */ - return (has_a_cypher_list_comprehension_node(aexpr->lexpr) || - has_a_cypher_list_comprehension_node(aexpr->rexpr)); - } - else if (is_ag_node(expr, cypher_comparison_boolexpr)) - { - cypher_comparison_boolexpr *bexpr = (cypher_comparison_boolexpr *)expr; - ListCell *lc; - - /* is any of the boolean expression argument a list comprehension? */ - foreach(lc, bexpr->args) - { - Node *arg = lfirst(lc); - - if (has_a_cypher_list_comprehension_node(arg)) - { - return true; - } - } - } - break; - } - default: - break; - } - /* otherwise, return false */ - return false; -} - /* see transformTargetList() */ List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list, List **groupClause, ParseExprKind expr_kind) @@ -216,7 +69,6 @@ List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list, { ResTarget *item = lfirst(li); TargetEntry *te; - bool has_list_comp = false; if (expand_star) { @@ -244,48 +96,14 @@ List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list, } } } - - /* Check if we have a list comprehension */ - has_list_comp = has_a_cypher_list_comprehension_node(item->val); - /* Clear the exprHasAgg flag to check transform for an aggregate */ cpstate->exprHasAgg = false; - if (has_list_comp && item_list->length > 1) - { - /* - * Create a subquery for the list comprehension and transform it - * as a subquery. Then expand the target list of the subquery. - * This is to avoid multiple unnest functions in the same query - * level and collect not able to distinguish correctly. - */ - ParseNamespaceItem *pnsi; - cypher_return *cr; - cypher_clause cc; - - cr = make_ag_node(cypher_return); - cr->items = list_make1(item); - - cc.prev = NULL; - cc.next = NULL; - cc.self = (Node *)cr; - - pnsi = transform_cypher_clause_as_subquery(cpstate, - transform_cypher_clause, - &cc, NULL, true); - - target_list = list_concat(target_list, - expandNSItemAttrs(&cpstate->pstate, pnsi, - 0, true, -1)); - } - else - { - /* transform the item */ - te = transform_cypher_item(cpstate, item->val, NULL, expr_kind, - item->name, false); + /* transform the item */ + te = transform_cypher_item(cpstate, item->val, NULL, expr_kind, + item->name, false); - target_list = lappend(target_list, te); - } + target_list = lappend(target_list, te); /* * Did the transformed item contain an aggregate function? If it didn't, @@ -300,58 +118,6 @@ List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list, { hasAgg = true; } - - /* - * This is for a special case with list comprehension, which is embedded - * in a cypher_unwind node. We need to group the results but not expose - * the grouping expression. - */ - if (has_list_comp) - { - ParseState *pstate = &cpstate->pstate; - ParseNamespaceItem *nsitem = NULL; - RangeTblEntry *rte = NULL; - - /* - * There should be at least 2 entries in p_namespace. One for the - * variable in the reading clause and one for the variable in the - * list_comprehension expression. Otherwise, there is nothing to - * group with. - */ - if (list_length(pstate->p_namespace) > 1) - { - /* - * Get the first namespace item which should be the first - * variable from the reading clause. - */ - nsitem = lfirst(list_head(pstate->p_namespace)); - /* extract the rte */ - rte = nsitem->p_rte; - - /* - * If we have a non-null column name make a ColumnRef to it. - * Otherwise, there wasn't a variable specified in the reading - * clause. If that is the case don't. Because there isn't - * anything to group with. - */ - if (rte->eref->colnames != NULL && nsitem->p_cols_visible) - { - ColumnRef *cref = NULL; - char *colname = NULL; - - /* get the name of the column (varname) */ - colname = strVal(lfirst(list_head(rte->eref->colnames))); - - /* create the ColumnRef */ - cref = makeNode(ColumnRef); - cref->fields = list_make1(makeString(colname)); - cref->location = -1; - - /* add the expression for grouping */ - group_clause = lappend(group_clause, cref); - } - } - } } /* diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 090b1789e..86f41f23a 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -1838,6 +1838,19 @@ static void array_to_agtype_internal(Datum array, agtype_in_state *result) pfree_if_not_null(nulls); } +PG_FUNCTION_INFO_V1(agtype_array_to_agtype); +Datum agtype_array_to_agtype(PG_FUNCTION_ARGS) +{ + agtype_in_state result; + + result.parse_state = NULL; + result.res = NULL; + + array_to_agtype_internal(PG_GETARG_DATUM(0), &result); + + PG_RETURN_POINTER(agtype_value_to_agtype(result.res)); +} + /* * Turn a composite / record into agtype. */ @@ -11916,7 +11929,6 @@ PG_FUNCTION_INFO_V1(age_unnest); Datum age_unnest(PG_FUNCTION_ARGS) { agtype *agtype_arg = NULL; - bool list_comprehension = false; ReturnSetInfo *rsi; Tuplestorestate *tuple_store; TupleDesc tupdesc; @@ -11927,35 +11939,13 @@ Datum age_unnest(PG_FUNCTION_ARGS) agtype_value v; agtype_iterator_token r; - /* verify that we have the correct number of args */ - if (PG_NARGS() != 2) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid number of arguments to unnest"))); - } - - /* verify that our flags are not null */ - if (PG_ARGISNULL(1)) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid unnest boolean flags passed"))); - } - /* check for a NULL expr */ if (PG_ARGISNULL(0)) { PG_RETURN_NULL(); } - /* get our flags */ - list_comprehension = PG_GETARG_BOOL(1); - - /* get the input expression */ agtype_arg = AG_GET_ARG_AGTYPE_P(0); - - /* verify that it resolves to an array */ if (!AGT_ROOT_IS_ARRAY(agtype_arg)) { ereport(ERROR, @@ -12012,25 +12002,6 @@ Datum age_unnest(PG_FUNCTION_ARGS) } } - /* - * If this is for list_comprehension, we need to add a NULL as the last row. - * This NULL will allow empty lists (either filtered out by where, creating - * an empty list, or just a generic empty list) to be preserved. - */ - if (list_comprehension) - { - Datum values[1] = {0}; - bool nulls[1] = {true}; - - old_cxt = MemoryContextSwitchTo(tmp_cxt); - - tuplestore_puttuple(tuple_store, - heap_form_tuple(ret_tdesc, values, nulls)); - - MemoryContextSwitchTo(old_cxt); - MemoryContextReset(tmp_cxt); - } - MemoryContextDelete(tmp_cxt); rsi->setResult = tuple_store; diff --git a/src/include/nodes/ag_nodes.h b/src/include/nodes/ag_nodes.h index fe9c9663e..f0cc22043 100644 --- a/src/include/nodes/ag_nodes.h +++ b/src/include/nodes/ag_nodes.h @@ -50,6 +50,7 @@ typedef enum ag_node_tag cypher_map_projection_t, cypher_map_projection_element_t, cypher_list_t, + cypher_list_comprehension_t, /* comparison expression */ cypher_comparison_aexpr_t, cypher_comparison_boolexpr_t, @@ -106,6 +107,5 @@ static inline bool _is_ag_node(Node *node, const char *extnodename) } #define is_ag_node(node, type) _is_ag_node((Node *)(node), CppAsString(type)) -#define get_ag_node_tag(node) ((ag_node_tag)(((ExtensibleNode *)(node))->extnodename)) #endif diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index fc9a9748c..f2527019a 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -68,7 +68,6 @@ typedef struct cypher_with { ExtensibleNode extensible; bool distinct; - bool subquery_intermediate; /* flag that denotes a subquery node */ List *items; /* a list of ResTarget's */ List *order_by; Node *skip; @@ -119,10 +118,6 @@ typedef struct cypher_unwind { ExtensibleNode extensible; ResTarget *target; - - /* for list comprehension */ - Node *where; - Node *collect; } cypher_unwind; typedef struct cypher_merge @@ -220,6 +215,15 @@ typedef struct cypher_map_projection int location; } cypher_map_projection; + typedef struct cypher_list_comprehension + { + ExtensibleNode extensible; + char *varname; + Node *expr; + Node *where; + Node *mapping_expr; + } cypher_list_comprehension; + typedef enum cypher_map_projection_element_type { PROPERTY_SELECTOR = 0, /* map_var { .key } */ diff --git a/src/include/nodes/cypher_outfuncs.h b/src/include/nodes/cypher_outfuncs.h index 4a04cf94f..418d35f4e 100644 --- a/src/include/nodes/cypher_outfuncs.h +++ b/src/include/nodes/cypher_outfuncs.h @@ -48,6 +48,7 @@ void out_cypher_param(StringInfo str, const ExtensibleNode *node); void out_cypher_map(StringInfo str, const ExtensibleNode *node); void out_cypher_map_projection(StringInfo str, const ExtensibleNode *node); void out_cypher_list(StringInfo str, const ExtensibleNode *node); +void out_cypher_list_comprehension(StringInfo str, const ExtensibleNode *node); /* comparison expression */ void out_cypher_comparison_aexpr(StringInfo str, const ExtensibleNode *node); diff --git a/src/include/parser/cypher_analyze.h b/src/include/parser/cypher_analyze.h index 97616e549..b45a46832 100644 --- a/src/include/parser/cypher_analyze.h +++ b/src/include/parser/cypher_analyze.h @@ -21,16 +21,22 @@ #define AG_CYPHER_ANALYZE_H #include "parser/cypher_clause.h" +#include "nodes/nodeFuncs.h" -typedef bool (*cypher_expression_condition)( Node *expr); +#define cypher_expr_tree_walker(n, w, c) \ + cypher_expr_tree_walker_impl(n, (tree_walker_callback) (w), c) +#define cypher_raw_expr_tree_walker(n, w, c) \ + cypher_raw_expr_tree_walker_impl(n, (tree_walker_callback) (w), c) void post_parse_analyze_init(void); void post_parse_analyze_fini(void); -cypher_clause *build_subquery_node(cypher_clause *next); - -/*expr tree walker */ -bool expr_contains_node(cypher_expression_condition is_expr, Node *expr); -bool expr_has_subquery(Node * expr); +/* expr tree walker */ +bool cypher_expr_tree_walker_impl(Node *node, + bool (*walker)(Node *node, void *context), + void *context); +bool cypher_raw_expr_tree_walker_impl(Node *node, + bool (*walker)(Node *node, void *context), + void *context); #endif diff --git a/src/include/parser/cypher_clause.h b/src/include/parser/cypher_clause.h index 51ba5cb47..461a664ca 100644 --- a/src/include/parser/cypher_clause.h +++ b/src/include/parser/cypher_clause.h @@ -39,13 +39,4 @@ Query *cypher_parse_sub_analyze(Node *parseTree, CommonTableExpr *parentCTE, bool locked_from_parent, bool resolve_unknowns); - -typedef Query *(*transform_method)(cypher_parsestate *cpstate, - cypher_clause *clause); - -ParseNamespaceItem *transform_cypher_clause_as_subquery(cypher_parsestate *cpstate, - transform_method transform, - cypher_clause *clause, - Alias *alias, - bool add_rte_to_query); #endif diff --git a/src/include/parser/cypher_item.h b/src/include/parser/cypher_item.h index ba4e7e9af..92b6c95f4 100644 --- a/src/include/parser/cypher_item.h +++ b/src/include/parser/cypher_item.h @@ -26,6 +26,4 @@ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node, List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list, List **groupClause, ParseExprKind expr_kind); -bool has_a_cypher_list_comprehension_node(Node *expr); - #endif diff --git a/src/include/parser/cypher_parse_node.h b/src/include/parser/cypher_parse_node.h index f7169380e..263ea197b 100644 --- a/src/include/parser/cypher_parse_node.h +++ b/src/include/parser/cypher_parse_node.h @@ -50,7 +50,6 @@ typedef struct cypher_parsestate */ bool exprHasAgg; bool p_opt_match; - bool p_list_comp; } cypher_parsestate; typedef struct errpos_ecb_state From d9c16ab63f3fcd792b2df34ff086b1c9852d1add Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 9 Jul 2025 20:17:51 +0500 Subject: [PATCH 7/9] Add support for operators in cypher query (#2172) - Fixed some operator signatures in .sql - Added support for PG operators in cypher. Some hardcoded operators are removed, since they are now covered by the general operator handling. - Added full typecast syntax that allows for type modifiers. - These changes also improve interoperability with other extensions, as reflected in the regression tests. - Added a new function to check if graph_oid exists. --- age--1.5.0--y.y.y.sql | 118 ++++++- regress/expected/cypher_match.out | 6 +- regress/expected/cypher_vle.out | 6 +- regress/expected/pgvector.out | 508 +++++++++++++++++++++++++--- regress/expected/scan.out | 8 +- regress/sql/pgvector.sql | 252 ++++++++++++-- regress/sql/scan.sql | 2 +- sql/agtype_coercions.sql | 2 +- sql/agtype_exists.sql | 26 +- sql/agtype_operators.sql | 16 +- sql/agtype_string.sql | 6 + src/backend/catalog/ag_graph.c | 13 + src/backend/catalog/ag_label.c | 15 + src/backend/nodes/cypher_outfuncs.c | 2 +- src/backend/parser/ag_scanner.l | 217 ++++++++---- src/backend/parser/cypher_analyze.c | 9 +- src/backend/parser/cypher_expr.c | 251 +++++++------- src/backend/parser/cypher_gram.y | 168 ++++++--- src/backend/parser/cypher_parser.c | 19 +- src/backend/utils/adt/agtype.c | 12 +- src/include/catalog/ag_graph.h | 1 + src/include/nodes/cypher_nodes.h | 2 +- src/include/parser/ag_scanner.h | 10 +- src/include/parser/cypher_kwlist.h | 1 + 24 files changed, 1297 insertions(+), 373 deletions(-) diff --git a/age--1.5.0--y.y.y.sql b/age--1.5.0--y.y.y.sql index 6b7560aa2..d04a6a24b 100644 --- a/age--1.5.0--y.y.y.sql +++ b/age--1.5.0--y.y.y.sql @@ -41,8 +41,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -58,17 +58,114 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); +/* + * We have to drop and recreate the operators, because + * commutator is not modifiable using ALTER OPERATOR. + */ +ALTER EXTENSION age + DROP OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ? (agtype, text); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + DROP OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + DROP OPERATOR ?& (agtype, text); + +DROP OPERATOR ? (agtype, agtype), ? (agtype, text), + ?| (agtype, agtype), ?| (agtype, text[]), + ?& (agtype, agtype[]), ?& (agtype, text); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ? ( + LEFTARG = agtype, + RIGHTARG = text, + FUNCTION = ag_catalog.agtype_exists, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_any_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?| ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_any, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.agtype_exists_all_agtype, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +CREATE OPERATOR ?& ( + LEFTARG = agtype, + RIGHTARG = text[], + FUNCTION = ag_catalog.agtype_exists_all, + RESTRICT = matchingsel, + JOIN = matchingjoinsel +); + +ALTER EXTENSION age + ADD OPERATOR ? (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ? (agtype, text); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, agtype); +ALTER EXTENSION age + ADD OPERATOR ?| (agtype, text[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, agtype[]); +ALTER EXTENSION age + ADD OPERATOR ?& (agtype, text); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR @> (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +ALTER OPERATOR <@ (agtype, agtype) + SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + /* * Since there is no option to add or drop operator from class, * we have to drop and recreate the whole operator class. * Reference: https://www.postgresql.org/docs/current/sql-alteropclass.html */ -DROP OPERATOR CLASS ag_catalog.gin_agtype_ops; +ALTER EXTENSION age + DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + +DROP OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; CREATE OPERATOR CLASS ag_catalog.gin_agtype_ops DEFAULT FOR TYPE agtype USING gin AS @@ -89,6 +186,9 @@ DEFAULT FOR TYPE agtype USING gin AS internal, internal, internal), STORAGE text; +ALTER EXTENSION age + ADD OPERATOR CLASS ag_catalog.gin_agtype_ops USING gin; + -- this function went from variadic "any" to just "any" type CREATE OR REPLACE FUNCTION ag_catalog.age_tostring("any") RETURNS agtype @@ -148,4 +248,10 @@ PARALLEL SAFE AS 'MODULE_PATHNAME'; CREATE CAST (agtype[] AS agtype) - WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); \ No newline at end of file + WITH FUNCTION ag_catalog.agtype_array_to_agtype(agtype[]); + +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e25584788..e83ba3b93 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -2407,22 +2407,22 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ CREATE () WITH * MATCH (x{n0:x.n1}) RETURN 0 $$) as (a agtype); diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index b3cada60a..9cbb3420c 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -726,8 +726,8 @@ SELECT prepend_node('list01', 'b'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (2 rows) -- prepend a node 'c' @@ -741,9 +741,9 @@ SELECT prepend_node('list01', 'c'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex - {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex {"id": 1407374883553283, "label": "node", "properties": {"content": "c"}}::vertex + {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex (3 rows) DROP FUNCTION show_list_use_vle; diff --git a/regress/expected/pgvector.out b/regress/expected/pgvector.out index f1bd53ed4..bbc558349 100644 --- a/regress/expected/pgvector.out +++ b/regress/expected/pgvector.out @@ -61,6 +61,58 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n spa {1:1.22,2:2.22,3:3.33}/3 (1 row) +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); +ERROR: expected 2 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +ERROR: expected 4 dimensions, not 3 +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); +ERROR: expected 4 dimensions, not 3 +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); n ----- @@ -121,32 +173,186 @@ SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $ [2, 3, 4, 5] (1 row) -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); n ----- 111 (1 row) +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); + n +--------- + [2,4,7] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); + n +---------- + [0,0,-1] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); + n +---------- + [1,4,12] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); + n +--------------- + [1,2,3,1,2,4] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); + n +------- + -17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); result -------- @@ -201,7 +407,20 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ------------------------------ @@ -213,7 +432,20 @@ $$) AS (title agtype); -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 $$) AS (title agtype); title ---------------------------- @@ -224,27 +456,27 @@ $$) AS (title agtype); (4 rows) -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); - title | embedding -------------------------------+---------------------------------------------------- - "The Matrix" | "[-0.22980669,0.12351139,0.89543957,-0.36072403]" - "The Matrix Reloaded" | "[0.58534974,-0.44225806,0.6790991,0.024453051]" - "The Matrix Revolutions" | "[0.26431033,-0.6424414,0.4884408,0.528048]" - "The Matrix Resurrections" | "[0.72151977,-0.28723562,0.60715157,0.16814256]" - "Inception" | "[0.08159459,0.81693435,-0.43519026,-0.3695538]" - "Interstellar" | "[-0.5290723,-0.71527255,-0.4223914,-0.17335857]" - "Avatar" | "[-0.84023285,0.039085682,-0.18687363,-0.507503]" - "Blade Runner" | "[0.81074023,-0.44075987,-0.29708475,-0.2452992]" - "Blade Runner 2049" | "[0.8134027,-0.28314334,-0.05024454,0.50564945]" - "Minority Report" | "[0.39031598,0.7308651,-0.5024533,0.24704295]" - "Total Recall" | "[-0.54291505,0.4566574,-0.19491677,-0.67728484]" - "Elysium" | "[-0.517338,0.12022049,0.17122844,0.82981277]" - "Gattaca" | "[-0.35853538,0.51576865,0.13764863,0.765825]" - "The Fifth Element" | "[-0.5788842,-0.10483904,-0.36918527,0.7194471]" - "The Terminator" | "[0.84599304,0.45333964,-0.02701552,-0.27937278]" - "Terminator 2: Judgment Day" | "[0.8501332,0.15776564,0.15269388,-0.4786106]" - "Jurassic Park" | "[0.037194606,-0.23696794,-0.9705615,-0.02175219]" - "The Avengers" | "[0.009587915,-0.6492101,-0.7475897,0.13979948]" +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); + title | embedding +------------------------------+----------------------------------------------------- + "The Matrix" | [-0.22980669, 0.12351139, 0.89543957, -0.36072403] + "The Matrix Reloaded" | [0.58534974, -0.44225806, 0.6790991, 0.024453051] + "The Matrix Revolutions" | [0.26431033, -0.6424414, 0.4884408, 0.528048] + "The Matrix Resurrections" | [0.72151977, -0.28723562, 0.60715157, 0.16814256] + "Inception" | [0.08159459, 0.81693435, -0.43519026, -0.3695538] + "Interstellar" | [-0.5290723, -0.71527255, -0.4223914, -0.17335857] + "Avatar" | [-0.84023285, 0.039085682, -0.18687363, -0.507503] + "Blade Runner" | [0.81074023, -0.44075987, -0.29708475, -0.2452992] + "Blade Runner 2049" | [0.8134027, -0.28314334, -0.05024454, 0.50564945] + "Minority Report" | [0.39031598, 0.7308651, -0.5024533, 0.24704295] + "Total Recall" | [-0.54291505, 0.4566574, -0.19491677, -0.67728484] + "Elysium" | [-0.517338, 0.12022049, 0.17122844, 0.82981277] + "Gattaca" | [-0.35853538, 0.51576865, 0.13764863, 0.765825] + "The Fifth Element" | [-0.5788842, -0.10483904, -0.36918527, 0.7194471] + "The Terminator" | [0.84599304, 0.45333964, -0.02701552, -0.27937278] + "Terminator 2: Judgment Day" | [0.8501332, 0.15776564, 0.15269388, -0.4786106] + "Jurassic Park" | [0.037194606, -0.23696794, -0.9705615, -0.02175219] + "The Avengers" | [0.009587915, -0.6492101, -0.7475897, 0.13979948] (18 rows) -- Get top 4 most similar movies to The Terminator using l2 distance @@ -259,6 +491,18 @@ $$) AS (title agtype); "Blade Runner" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 @@ -271,6 +515,186 @@ $$) AS (title agtype); "Total Recall" (4 rows) +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- +-- Test vector index +-- +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); + plan_has_index_scan +--------------------- + t +(1 row) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table graph._ag_label_vertex diff --git a/regress/expected/scan.out b/regress/expected/scan.out index d96d80049..d8105a053 100644 --- a/regress/expected/scan.out +++ b/regress/expected/scan.out @@ -52,7 +52,7 @@ RETURN 0 " LINE 2: /* unterminated /* comment ^ --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment -- @@ -208,9 +208,9 @@ $$) AS t(a agtype, b agtype); SELECT * FROM cypher('scan', $$ RETURN 0xF~ $$) AS t(a int); -ERROR: unexpected character at or near "~" -LINE 2: RETURN 0xF~ - ^ +ERROR: syntax error at end of input +LINE 3: $$) AS t(a int); + ^ -- an invalid character after the leading "0x" SELECT * FROM cypher('scan', $$ RETURN 0x~ diff --git a/regress/sql/pgvector.sql b/regress/sql/pgvector.sql index 816d6eb9f..677e78586 100644 --- a/regress/sql/pgvector.sql +++ b/regress/sql/pgvector.sql @@ -39,6 +39,26 @@ SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vec SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3)); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2)); + +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4)); +SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4)); + +-- +-- Test functions +-- SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); @@ -49,28 +69,65 @@ SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); -SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3)); + +-- +-- Test operators +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype); +-- +-- Due to issues with pattern matching syntax, '-' is not allowed +-- as an operator character, so we have to use the OPERATOR syntax. +-- +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +-- Using OPERATOR () syntax +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype); + +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype); + +-- -- An example usage +-- SELECT * FROM cypher('graph', $$ - CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), - (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), - (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), - (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), - (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), - (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), - (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), - (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), - (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), - (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), - (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), - (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), - (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), - (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), - (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), - (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), - (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), - (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]}) $$) AS (result agtype); SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); @@ -79,23 +136,174 @@ SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.e -- Get top 4 most similar movies to The Terminator using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using cosine distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) - RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) + ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- l2 norm of the embedding -SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype); -- Get top 4 most similar movies to The Terminator using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + -- Get top 4 most similar movies to The Matrix using l2 distance SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 $$) AS (title agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector + ASC LIMIT 4 +$$) AS (title agtype); + +-- +-- Test vector index +-- + +-- This function will be used to check if index scan +-- is used successfully. We cannot simply have EXPLAIN +-- in the upcoming queries because it produces some +-- hardcoded oids in sort node, which may change in +-- future and break the tests. +CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text) +RETURNS boolean +LANGUAGE plpgsql AS +$$ +DECLARE + plan_lines text[]; + plan_text text; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text; + + -- Return true if 'Index Scan' appears anywhere + RETURN position('"Index Scan"' in plan_text) > 0; +END; +$$; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +-- The index expression below matches the expression +-- seen in the EXPLAIN plan of above query +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw ((( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::text + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +DROP INDEX graph.movie_vector_idx; +SET enable_seqscan = on; + +-- Test a direct implicit cast +CREATE CAST (agtype AS vector) + WITH INOUT AS implicit; + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding <=> search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding + ASC LIMIT 4 +$$) AS (title agtype); + +DO $$ +DECLARE + graph_oid oid; +BEGIN + SELECT graphid INTO graph_oid + FROM ag_catalog.ag_graph + WHERE name = 'graph'; + + EXECUTE format($f$ + CREATE INDEX movie_vector_idx ON graph."Movie" + USING hnsw (( + agtype_access_operator( + VARIADIC ARRAY[ + _agtype_build_vertex(id, _label_name(%L::oid, id), properties), + '"embedding"'::agtype + ] + )::vector(4)) vector_cosine_ops); + $f$, graph_oid); +END; +$$; + +-- Disable seqscan just to test the index +SET enable_seqscan = off; +SELECT plan_has_index_scan($f$ + SELECT * FROM cypher('graph', $$ + MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 + $$) AS (title agtype); +$f$); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) + RETURN m.title + ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4) + ASC LIMIT 4 +$$) AS (title agtype); + +SET enable_seqscan = on; +-- +-- Clean up +-- +DROP FUNCTION plan_has_index_scan(text); +DROP CAST (agtype AS vector); SELECT drop_graph('graph', true); DROP EXTENSION vector CASCADE; \ No newline at end of file diff --git a/regress/sql/scan.sql b/regress/sql/scan.sql index 840a822f2..4d35fe0fe 100644 --- a/regress/sql/scan.sql +++ b/regress/sql/scan.sql @@ -41,7 +41,7 @@ SELECT * FROM cypher('scan', $$ /* unterminated /* comment RETURN 0 $$) AS t(a int); --- recover syntax highlighting */ +-- recover syntax highlighting */ */ -- -- single-line comment diff --git a/sql/agtype_coercions.sql b/sql/agtype_coercions.sql index c7895fabc..933375fc1 100644 --- a/sql/agtype_coercions.sql +++ b/sql/agtype_coercions.sql @@ -78,7 +78,7 @@ AS 'MODULE_PATHNAME'; CREATE CAST (float8 AS agtype) WITH FUNCTION ag_catalog.float8_to_agtype(float8); --- agtype -> float8 (exmplicit) +-- agtype -> float8 (explicit) CREATE FUNCTION ag_catalog.agtype_to_float8(agtype) RETURNS float8 LANGUAGE c diff --git a/sql/agtype_exists.sql b/sql/agtype_exists.sql index fe6150d69..441af1755 100644 --- a/sql/agtype_exists.sql +++ b/sql/agtype_exists.sql @@ -32,9 +32,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = text, FUNCTION = ag_catalog.agtype_exists, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_agtype(agtype, agtype) @@ -49,9 +48,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_agtype, - COMMUTATOR = '?', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any(agtype, text[]) @@ -66,8 +64,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_any, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any_agtype(agtype, agtype) @@ -82,8 +80,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_any_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all(agtype, text[]) @@ -98,8 +96,8 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_all, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all_agtype(agtype, agtype) @@ -114,6 +112,6 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_all_agtype, - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); diff --git a/sql/agtype_operators.sql b/sql/agtype_operators.sql index 3fbc52f33..36fedfe80 100644 --- a/sql/agtype_operators.sql +++ b/sql/agtype_operators.sql @@ -33,8 +33,8 @@ CREATE OPERATOR @> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains, COMMUTATOR = '<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by(agtype, agtype) @@ -50,8 +50,8 @@ CREATE OPERATOR <@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by, COMMUTATOR = '@>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contains_top_level(agtype, agtype) @@ -67,8 +67,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -84,6 +84,6 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = contsel, - JOIN = contjoinsel + RESTRICT = matchingsel, + JOIN = matchingjoinsel ); \ No newline at end of file diff --git a/sql/agtype_string.sql b/sql/agtype_string.sql index e7485769d..0d7b2013f 100644 --- a/sql/agtype_string.sql +++ b/sql/agtype_string.sql @@ -52,6 +52,12 @@ CREATE FUNCTION ag_catalog.age_eq_tilde(agtype, agtype) PARALLEL SAFE AS 'MODULE_PATHNAME'; +CREATE OPERATOR =~ ( + LEFTARG = agtype, + RIGHTARG = agtype, + FUNCTION = ag_catalog.age_eq_tilde +); + CREATE FUNCTION ag_catalog.age_is_valid_label_name(agtype) RETURNS boolean LANGUAGE c diff --git a/src/backend/catalog/ag_graph.c b/src/backend/catalog/ag_graph.c index 495c6520c..833cba252 100644 --- a/src/backend/catalog/ag_graph.c +++ b/src/backend/catalog/ag_graph.c @@ -173,3 +173,16 @@ char *get_graph_namespace_name(const char *graph_name) { return get_namespace_name(get_graph_namespace(graph_name)); } + +bool graph_namespace_exists(Oid graph_oid) +{ + graph_cache_data *cache_data; + + cache_data = search_graph_namespace_cache(graph_oid); + if (cache_data) + { + return true; + } + + return false; +} diff --git a/src/backend/catalog/ag_label.c b/src/backend/catalog/ag_label.c index 3c242a000..b6dcf77a3 100644 --- a/src/backend/catalog/ag_label.c +++ b/src/backend/catalog/ag_label.c @@ -186,6 +186,13 @@ Datum _label_name(PG_FUNCTION_ARGS) } graph = PG_GETARG_OID(0); + + /* Check if the graph OID is valid */ + if (!graph_namespace_exists(graph)) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("graph with oid %u does not exist", graph))); + } label_id = (int32)(((uint64)AG_GETARG_GRAPHID(1)) >> ENTRY_ID_BITS); @@ -193,6 +200,14 @@ Datum _label_name(PG_FUNCTION_ARGS) label_name = NameStr(label_cache->name); + /* If label_name is not found, error out */ + if (label_name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("label with id %d does not exist in graph %u", + label_id, graph))); + } + if (IS_AG_DEFAULT_LABEL(label_name)) PG_RETURN_CSTRING(""); diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index 5bc824f12..4772621c9 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -320,7 +320,7 @@ void out_cypher_typecast(StringInfo str, const ExtensibleNode *node) DEFINE_AG_NODE(cypher_typecast); WRITE_NODE_FIELD(expr); - WRITE_STRING_FIELD(typecast); + WRITE_NODE_FIELD(typname); WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/parser/ag_scanner.l b/src/backend/parser/ag_scanner.l index 45ccdac3b..d5d72b926 100644 --- a/src/backend/parser/ag_scanner.l +++ b/src/backend/parser/ag_scanner.l @@ -124,7 +124,7 @@ whitespace [\t\n\v\f\r ]+ * Therefore, the rule has been modified so that it can match such comments. */ %x mlcomment -mlcstart "/*" +mlcstart \/\*{op_chars}* mlcchars [^*]+|\*+ mlcstop \*+\/ slcomment "//"[^\n\r]* @@ -228,20 +228,15 @@ param \${id} * These are tokens that are used as operators and language constructs in * Cypher, and some of them are structural characters in JSON. */ -left_contains "<@" -right_contains "@>" -any_exists "?|" -all_exists "?&" -concat "||" -access_path "#>" lt_gt "<>" lt_eq "<=" gt_eq ">=" dot_dot ".." plus_eq "+=" -eq_tilde "=~" typecast "::" -self [?%()*+,\-./:;<=>[\]^{|}] +self [%()*+,\-./:;<=>[\]^{|}] +op_chars [\!\@\#\^\&\|\~\?\+\*\/\%\<\>\=] +operator {op_chars}+ other . @@ -339,6 +334,11 @@ ag_token token; /* update location in case of unterminated comment */ update_location(); BEGIN(mlcomment); + yyless(2); +} + +{mlcstart} { + yyless(2); } {mlcchars} { @@ -649,54 +649,6 @@ ag_token token; return token; } -{concat} { - update_location(); - token.type = AG_TOKEN_CONCAT; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{access_path} { - update_location(); - token.type = AG_TOKEN_ACCESS_PATH; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{any_exists} { - update_location(); - token.type = AG_TOKEN_ANY_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{left_contains} { - update_location(); - token.type = AG_TOKEN_LEFT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{right_contains} { - update_location(); - token.type = AG_TOKEN_RIGHT_CONTAINS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - -{all_exists} { - update_location(); - token.type = AG_TOKEN_ALL_EXISTS; - token.value.s = yytext; - token.location = get_location(); - return token; -} - {lt_gt} { update_location(); token.type = AG_TOKEN_LT_GT; @@ -737,26 +689,163 @@ ag_token token; return token; } -{eq_tilde} { +{typecast} { update_location(); - token.type = AG_TOKEN_EQ_TILDE; + token.type = AG_TOKEN_TYPECAST; token.value.s = yytext; token.location = get_location(); return token; } -{typecast} { +{self} { update_location(); - token.type = AG_TOKEN_TYPECAST; + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; +} + +{op_chars} { + update_location(); + token.type = AG_TOKEN_OP; token.value.s = yytext; token.location = get_location(); return token; } -{self} { +{operator} { + /* Borrowed from PG and adjusted for our scanner */ + + /* + * Check for embedded slash-star or slash-slash; those + * are comment starts, so operator must stop there. + * Note that slash-star or slash-slash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr(yytext, "/*"); + char *slashslash = strstr(yytext, "//"); + + if (slashstar && slashslash) + { + /* if both appear, take the first one */ + if (slashstar > slashslash) + slashstar = slashslash; + } + else if (!slashstar) + slashstar = slashslash; + if (slashstar) + nchars = slashstar - yytext; + + /* + * For SQL compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL operators. + */ + if (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')) + { + int ic; + + for (ic = nchars - 2; ic >= 0; ic--) + { + char c = yytext[ic]; + if (c == '~' || c == '!' || c == '@' || + c == '#' || c == '^' || c == '&' || + c == '|' || c == '`' || c == '?' || + c == '%') + break; + } + if (ic < 0) + { + /* + * didn't find a qualifying character, so remove + * all trailing [+-] + */ + do { + nchars--; + } while (nchars > 1 && + (yytext[nchars - 1] == '+' || + yytext[nchars - 1] == '-')); + } + } + update_location(); - token.type = AG_TOKEN_CHAR; - token.value.c = yytext[0]; + + if (nchars < yyleng) + { + /* Strip the unwanted chars from the token */ + yyless(nchars); + /* + * If what we have left is only one char, and it's + * one of the characters matching "self", then + * return it as a character token the same way + * that the "self" rule would have. + */ + if (nchars == 1 && + strchr("%()*+,-./:;<=>[\\]^{|}", yytext[0])) + { + token.type = AG_TOKEN_CHAR; + token.value.c = yytext[0]; + token.location = get_location(); + return token; + } + + /* + * Likewise, if what we have left is two chars, and + * those match the tokens ">=", "<=", "=>", "<>" or + * "!=", then we must return the appropriate token + * rather than the generic Op. + */ + if (nchars == 2) + { + if (yytext[0] == '>' && yytext[1] == '=') + token.type = AG_TOKEN_GT_EQ; + else if (yytext[0] == '<' && yytext[1] == '=') + token.type = AG_TOKEN_LT_EQ; + else if (yytext[0] == '<' && yytext[1] == '>') + token.type = AG_TOKEN_LT_GT; + else if (yytext[0] == '+' && yytext[1] == '=') + token.type = AG_TOKEN_PLUS_EQ; + /* + * These operators (!=, =>) are not allowed as user-defined + * operators in PG because they are reserved as valid tokens + * with predefined semantics. As a result, we also reject + * them here. However, if a specific use case arises, we + * could allow them with custom handling. + */ + else if ((yytext[0] == '!' && yytext[1] == '=') || + (yytext[0] == '=' && yytext[1] == '>')) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("unexpected character"), + scan_errposition())); + else + goto handle_generic_op; + + token.value.s = yytext; + token.location = get_location(); + return token; + } + } + +handle_generic_op: + /* + * Complain if operator is too long. Unlike the case + * for identifiers, we make this an error not a notice- + * and-truncate, because the odds are we are looking at + * a syntactic mistake anyway. + */ + if (nchars >= NAMEDATALEN) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + scan_errmsg("operator too long"), + scan_errposition())); + + token.type = AG_TOKEN_OP; + token.value.s = yytext; token.location = get_location(); return token; } diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 78788649e..af3e83c87 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -1061,15 +1061,20 @@ static Query *analyze_cypher_and_coerce(List *stmt, RangeTblFunction *rtfunc, TargetEntry *te = lfirst(lt); Node *expr = (Node *)te->expr; Oid current_type; + int32 current_typmod; Oid target_type; + int32 target_typmod; Assert(!te->resjunk); current_type = exprType(expr); + current_typmod = exprTypmod(expr); target_type = lfirst_oid(lc2); - if (current_type != target_type) + target_typmod = lfirst_int(lc3); + + if ((current_type != target_type) || + (current_typmod != target_typmod)) { - int32 target_typmod = lfirst_int(lc3); Node *new_expr; /* diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index 19bc71d42..390bfb392 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -106,16 +106,16 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, Form_pg_proc procform, List *fargs, Oid *target_types); -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid); static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, FuncExpr *fexpr); static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found); static char *get_mapped_extension(Oid func_oid); static bool is_extension_external(char *extension); -static bool is_pgvector_datatype(char *typename); static char *construct_age_function_name(char *funcname); static bool function_exists(char *funcname, char *extension); +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typemod, bool error_out); /* transform a cypher expression */ Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr, @@ -1540,6 +1540,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, List *fname; FuncCall *fnode; ParseState *pstate; + TypeName *target_typ; /* verify input parameter */ Assert (cpstate != NULL); @@ -1548,98 +1549,137 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, /* create the qualified function name, schema first */ fname = list_make1(makeString("ag_catalog")); pstate = &cpstate->pstate; - - /* append the name of the requested typecast function */ - if (pg_strcasecmp(ctypecast->typecast, "edge") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); - } - else if (pg_strcasecmp(ctypecast->typecast, "path") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); - } - else if (pg_strcasecmp(ctypecast->typecast, "vertex") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); - } - else if (pg_strcasecmp(ctypecast->typecast, "numeric") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); - } - else if (pg_strcasecmp(ctypecast->typecast, "float") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "int") == 0 || - pg_strcasecmp(ctypecast->typecast, "integer") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_float8") == 0) + target_typ = ctypecast->typname; + + if (list_length(target_typ->names) == 1) { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_bigint") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); - } - else if ((pg_strcasecmp(ctypecast->typecast, "bool") == 0 || - pg_strcasecmp(ctypecast->typecast, "boolean") == 0)) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); - } - else if (pg_strcasecmp(ctypecast->typecast, "pg_text") == 0) - { - fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + char *typecast = strVal(linitial(target_typ->names)); + + /* append the name of the requested typecast function */ + if (pg_strcasecmp(typecast, "edge") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_EDGE)); + } + else if (pg_strcasecmp(typecast, "path") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PATH)); + } + else if (pg_strcasecmp(typecast, "vertex") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_VERTEX)); + } + else if (pg_strcasecmp(typecast, "numeric") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_NUMERIC)); + } + else if (pg_strcasecmp(typecast, "float") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_FLOAT)); + } + else if (pg_strcasecmp(typecast, "int") == 0 || + pg_strcasecmp(typecast, "integer") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_INT)); + } + else if (pg_strcasecmp(typecast, "pg_float8") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_FLOAT8)); + } + else if (pg_strcasecmp(typecast, "pg_bigint") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_BIGINT)); + } + else if ((pg_strcasecmp(typecast, "bool") == 0 || + pg_strcasecmp(typecast, "boolean") == 0)) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_BOOL)); + } + else if (pg_strcasecmp(typecast, "pg_text") == 0) + { + fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); + } + else + { + goto fallback_coercion; + } + + /* make a function call node */ + fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, + ctypecast->location); + + /* return the transformed function */ + return transform_FuncCall(cpstate, fnode); } - else if (is_pgvector_datatype(ctypecast->typecast)) + +fallback_coercion: { - TypeName *target_typname; Oid source_oid; Oid target_oid; + int32 t_typmod = -1; Node *expr; /* transform the expr before casting */ expr = transform_cypher_expr_recurse(cpstate, ctypecast->expr); - /* get the source and target oids */ - target_typname = makeTypeNameFromNameList(list_make1( - makeString(ctypecast->typecast))); - target_oid = typenameTypeId(pstate, target_typname); + typenameTypeIdAndMod(pstate, target_typ, &target_oid, &t_typmod); source_oid = exprType(expr); - if (source_oid == AGTYPEOID) - { - /* - * Cast to text and then to target type, since we cant - * directly cast agtype to pgvector datatypes. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(pstate, expr, source_oid, target_oid, + t_typmod, true); return expr; } - /* if none was found, error out */ - else +} + +/* + * Helper function to coerce an expression to the target type. If + * no direct cast exists, it attempts to cast through text if the + * source or target type is agtype. This improves interoperability + * with types from other extensions. + */ +static Node *coerce_expr_flexible(ParseState *pstate, Node *expr, + Oid source_oid, Oid target_oid, + int32 t_typmod, bool error_out) +{ + const Oid text_oid = TEXTOID; + Node *result; + + if (expr == NULL) + return NULL; + + /* Try a direct cast */ + result = coerce_to_target_type(pstate, expr, source_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + + /* Try cast via TEXT if either side is AGTYPE */ + if (source_oid == AGTYPEOID || target_oid == AGTYPEOID) + { + Node *to_text = coerce_to_target_type(pstate, expr, source_oid, text_oid, + -1, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (to_text != NULL) + { + result = coerce_to_target_type(pstate, to_text, text_oid, target_oid, + t_typmod, COERCION_EXPLICIT, + COERCE_EXPLICIT_CAST, -1); + if (result != NULL) + return result; + } + } + + if (error_out) { ereport(ERROR, (errmsg_internal("typecast \'%s\' not supported", - ctypecast->typecast))); + format_type_be(target_oid)))); } - /* make a function call node */ - fnode = makeFuncCall(fname, list_make1(ctypecast->expr), COERCE_SQL_SYNTAX, - ctypecast->location); - - /* return the transformed function */ - return transform_FuncCall(cpstate, fnode); + return NULL; } static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, @@ -1704,7 +1744,6 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, char *funcname = NameStr(procform->proname); int nargs = procform->pronargs; ListCell *lc = NULL; - int i = 0; /* verify the length of args are same */ if (list_length(fargs) != nargs) @@ -1718,67 +1757,20 @@ static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, /* iterate through the function's args */ foreach (lc, fargs) { - char *target_typname; Node *expr = lfirst(lc); Oid source_oid = exprType(expr); - Oid target_oid = target_types[i]; - - /* get the typename from target_oid */ - target_typname = format_type_be(target_oid); + Oid target_oid = target_types[foreach_current_index(lc)]; - /* cast the agtype to the target type */ - if (source_oid == AGTYPEOID && is_pgvector_datatype(target_typname)) - { - /* - * There is no cast from agtype to vector, so we first - * cast agtype to text and then text to vector. - */ - expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); - expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); - } - /* additional casts can be added here for other types */ - else - { - /* try a direct cast, it will error out if not possible */ - expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); - } + /* errors out if cast not possible */ + expr = coerce_expr_flexible(&cpstate->pstate, expr, source_oid, + target_oid, -1, true); lfirst(lc) = expr; - i++; } return fargs; } -/* - * Cast an input type to an output type, error out if not possible. - * Thanks to Taha for this idea. - */ -static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid) -{ - ParseState *pstate = &cpstate->pstate; - - /* can we cast from source to target oid? */ - if (can_coerce_type(1, &source_oid, &target_oid, COERCION_EXPLICIT)) - { - /* coerce the source to the target */ - expr = coerce_type(pstate, expr, source_oid, target_oid, -1, - COERCION_EXPLICIT, COERCE_EXPLICIT_CAST, -1); - } - /* error out if we can't cast */ - else - { - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("cannot cast type %s to %s", format_type_be(source_oid), - format_type_be(target_oid)))); - } - - /* return the casted expression */ - return expr; -} - /* * Due to issues with creating a cast from text to agtype, we need to wrap a * function that outputs text with text_to_agtype. @@ -1912,13 +1904,6 @@ static bool is_extension_external(char *extension) (pg_strcasecmp(extension, "age") != 0)); } -static bool is_pgvector_datatype(char *typename) -{ - return (pg_strcasecmp(typename, "vector") || - pg_strcasecmp(typename, "halfvec") || - pg_strcasecmp(typename, "sparsevec")); -} - /* Returns age_ prefiexed lower case function name */ static char *construct_age_function_name(char *funcname) { diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index a4d8f0f33..0bafefe1f 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -69,11 +69,11 @@ %token IDENTIFIER %token PARAMETER %token BQIDENT +%token OP %token CHAR /* operators that have more than 1 character */ -%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ EQ_TILDE CONCAT -%token ACCESS_PATH LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS +%token NOT_EQ LT_EQ GT_EQ DOT_DOT TYPECAST PLUS_EQ /* keywords in alphabetical order */ %token ALL ANALYZE AND AS ASC ASCENDING @@ -86,7 +86,7 @@ LIMIT MATCH MERGE NOT NULL_P - OPTIONAL OR ORDER + OPERATOR OPTIONAL OR ORDER REMOVE RETURN SET SKIP STARTS THEN TRUE_P @@ -168,10 +168,18 @@ /* names */ %type property_key_name var_name var_name_opt label_name -%type symbolic_name schema_name +%type symbolic_name schema_name type_name %type reserved_keyword safe_keywords conflicted_keywords %type func_name +/* types */ +%type generic_type +%type opt_type_modifiers + +/* operator */ +%type all_op math_op +%type qual_op any_operator + /* precedence: lowest to highest */ %left UNION %left OR @@ -179,8 +187,8 @@ %left XOR %right NOT %left '=' NOT_EQ '<' LT_EQ '>' GT_EQ -%left '@' '|' '&' '?' LEFT_CONTAINS RIGHT_CONTAINS ANY_EXISTS ALL_EXISTS -%left '+' '-' CONCAT +%left '+' '-' +%left OP OPERATOR %left '*' '/' '%' %left '^' %nonassoc IN IS @@ -235,7 +243,7 @@ static Node *make_bool_const(bool b, int location); static Node *make_null_const(int location); /* typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location); +static Node *make_typecast_expr(Node *expr, Node *typname, int location); /* functions */ static Node *make_function_expr(List *func_name, List *exprs, int location); @@ -499,7 +507,6 @@ yield_item: } ; - semicolon_opt: /* empty */ | ';' @@ -1562,33 +1569,13 @@ expr: { $$ = build_comparison_expression($1, $3, ">=", @2); } - | expr LEFT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "<@", $1, $3, @2); - } - | expr RIGHT_CONTAINS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "@>", $1, $3, @2); - } - | expr '?' expr %prec '.' + | expr qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } - | expr ANY_EXISTS expr + | qual_op expr %prec OP { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?|", $1, $3, @2); - } - | expr ALL_EXISTS expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "?&", $1, $3, @2); - } - | expr CONCAT expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "||", $1, $3, @2); - } - | expr ACCESS_PATH expr - { - $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "#>", $1, $3, @2); + $$ = (Node *) makeA_Expr(AEXPR_OP, $1, NULL, $2, @1); } | expr '+' expr { @@ -1680,11 +1667,6 @@ expr: $$ = (Node *)n; } - | expr EQ_TILDE expr - { - $$ = make_function_expr(list_make1(makeString("eq_tilde")), - list_make2($1, $3), @2); - } | expr '[' expr ']' { A_Indices *i; @@ -1775,6 +1757,31 @@ expr: $$ = append_indirection($1, (Node*)string); } + /* allow indirection with a typecast */ + else if ((IsA($1, ColumnRef) || IsA($1, A_Indirection)) && + (IsA($3, ExtensibleNode) && + is_ag_node($3, cypher_typecast))) + { + cypher_typecast *tc = (cypher_typecast *)$3; + + if (IsA(tc->expr, ColumnRef)) + { + ColumnRef *cr = (ColumnRef *)tc->expr; + List *fields = cr->fields; + String *string = linitial(fields); + + tc->expr = append_indirection($1, (Node *)string); + + $$ = (Node *)tc; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid indirection syntax"), + ag_scanner_errposition(@1, scanner))); + } + } else if (IsA($1, FuncCall) && IsA($3, A_Indirection)) { ereport(ERROR, @@ -1798,7 +1805,7 @@ expr: { $$ = (Node *)makeSimpleA_Expr(AEXPR_OP, "->", $1, $4, @2); } - | expr TYPECAST symbolic_name + | expr TYPECAST generic_type { $$ = make_typecast_expr($1, $3, @2); } @@ -2338,6 +2345,10 @@ label_name: schema_name ; +type_name: + schema_name + ; + symbolic_name: IDENTIFIER ; @@ -2356,6 +2367,82 @@ reserved_keyword: | conflicted_keywords ; +/* + * types + */ +generic_type: + type_name opt_type_modifiers + { + TypeName *typname; + + typname = makeTypeName($1); + typname->typmods = $2; + typname->location = @1; + + $$ = (Node *) typname; + } + ; + +opt_type_modifiers: + '(' expr_list ')' + { + $$ = $2; + } + | /* empty */ + { + $$ = NIL; + } + ; + +/* + * operators + */ +any_operator: + all_op + { + $$ = list_make1(makeString($1)); + } + | symbolic_name + { + $$ = list_make1(makeString($1)); + } + | schema_name '.' any_operator + { + $$ = lcons(makeString($1), $3); + } + ; + +all_op: + OP + | math_op + ; + +math_op: + '+' { $$ = "+"; } + | '-' { $$ = "-"; } + | '*' { $$ = "*"; } + | '/' { $$ = "/"; } + | '%' { $$ = "%"; } + | '^' { $$ = "^"; } + | '<' { $$ = "<"; } + | '>' { $$ = ">"; } + | '=' { $$ = "="; } + | LT_EQ { $$ = "<="; } + | GT_EQ { $$ = ">="; } + | NOT_EQ { $$ = "<>"; } + ; + +qual_op: + OP + { + $$ = list_make1(makeString($1)); + } + | OPERATOR '(' any_operator ')' + { + $$ = $3; + } + ; + /* * All keywords need to be copied and properly terminated with a null before * using them, pnstrdup effectively does this for us. @@ -2390,6 +2477,7 @@ safe_keywords: | MATCH { $$ = pnstrdup($1, 6); } | MERGE { $$ = pnstrdup($1, 6); } | NOT { $$ = pnstrdup($1, 3); } + | OPERATOR { $$ = pnstrdup($1, 8); } | OPTIONAL { $$ = pnstrdup($1, 8); } | OR { $$ = pnstrdup($1, 2); } | ORDER { $$ = pnstrdup($1, 5); } @@ -2658,13 +2746,13 @@ static Node *make_null_const(int location) /* * typecast */ -static Node *make_typecast_expr(Node *expr, char *typecast, int location) +static Node *make_typecast_expr(Node *expr, Node *typname, int location) { cypher_typecast *node; node = make_ag_node(cypher_typecast); node->expr = expr; - node->typecast = typecast; + node->typname = (TypeName *) typname; node->location = location; return (Node *)node; diff --git a/src/backend/parser/cypher_parser.c b/src/backend/parser/cypher_parser.c index d2b64ffef..ebf46b44d 100644 --- a/src/backend/parser/cypher_parser.c +++ b/src/backend/parser/cypher_parser.c @@ -44,15 +44,9 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) DOT_DOT, TYPECAST, PLUS_EQ, - EQ_TILDE, - LEFT_CONTAINS, - RIGHT_CONTAINS, - ACCESS_PATH, - ANY_EXISTS, - ALL_EXISTS, - CONCAT, CHAR, - BQIDENT + BQIDENT, + OP }; ag_token token; @@ -68,6 +62,7 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) break; case AG_TOKEN_DECIMAL: case AG_TOKEN_STRING: + case AG_TOKEN_OP: lvalp->string = pstrdup(token.value.s); break; case AG_TOKEN_IDENTIFIER: @@ -115,14 +110,6 @@ int cypher_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, ag_scanner_t scanner) case AG_TOKEN_GT_EQ: case AG_TOKEN_DOT_DOT: case AG_TOKEN_PLUS_EQ: - case AG_TOKEN_EQ_TILDE: - case AG_TOKEN_ACCESS_PATH: - case AG_TOKEN_ALL_EXISTS: - case AG_TOKEN_ANY_EXISTS: - case AG_TOKEN_LEFT_CONTAINS: - case AG_TOKEN_RIGHT_CONTAINS: - case AG_TOKEN_CONCAT: - break; case AG_TOKEN_TYPECAST: break; case AG_TOKEN_CHAR: diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 86f41f23a..d26929d33 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -1116,7 +1116,10 @@ char *agtype_to_cstring_indent(StringInfo out, agtype_container *in, } /* - * common worker for above two functions + * Common worker for above two functions. + * If extend is set to true, the function will append + * ::vertex, ::edge or ::path based on the type of + * container. */ static char *agtype_to_cstring_worker(StringInfo out, agtype_container *in, int estimated_len, bool indent, @@ -3207,9 +3210,10 @@ Datum agtype_to_text(PG_FUNCTION_ARGS) /* check that we have a scalar value */ if (!AGT_ROOT_IS_SCALAR(arg_agt)) { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("agtype argument must resolve to a scalar value"))); + char *out; + out = agtype_to_cstring(NULL, &arg_agt->root, VARSIZE(arg_agt)); + PG_FREE_IF_COPY(arg_agt, 0); + return CStringGetTextDatum(out); } /* get the arg parameter */ diff --git a/src/include/catalog/ag_graph.h b/src/include/catalog/ag_graph.h index 432954561..f86efd885 100644 --- a/src/include/catalog/ag_graph.h +++ b/src/include/catalog/ag_graph.h @@ -39,6 +39,7 @@ void update_graph_name(const Name graph_name, const Name new_name); uint32 get_graph_oid(const char *graph_name); char *get_graph_namespace_name(const char *graph_name); +bool graph_namespace_exists(Oid graph_oid); List *get_graphnames(void); void drop_graphs(List *graphnames); diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index f2527019a..db47eb313 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -484,7 +484,7 @@ typedef struct cypher_typecast { ExtensibleNode extensible; Node *expr; - char *typecast; + TypeName *typname; int location; } cypher_typecast; diff --git a/src/include/parser/ag_scanner.h b/src/include/parser/ag_scanner.h index 3dd89abd3..4cef63288 100644 --- a/src/include/parser/ag_scanner.h +++ b/src/include/parser/ag_scanner.h @@ -45,15 +45,9 @@ typedef enum ag_token_type AG_TOKEN_DOT_DOT, AG_TOKEN_TYPECAST, AG_TOKEN_PLUS_EQ, - AG_TOKEN_EQ_TILDE, - AG_TOKEN_LEFT_CONTAINS, - AG_TOKEN_RIGHT_CONTAINS, - AG_TOKEN_ACCESS_PATH, - AG_TOKEN_ANY_EXISTS, - AG_TOKEN_ALL_EXISTS, - AG_TOKEN_CONCAT, AG_TOKEN_CHAR, - AG_TOKEN_BQIDENT + AG_TOKEN_BQIDENT, + AG_TOKEN_OP } ag_token_type; /* diff --git a/src/include/parser/cypher_kwlist.h b/src/include/parser/cypher_kwlist.h index ce48f2887..e4c4437ba 100644 --- a/src/include/parser/cypher_kwlist.h +++ b/src/include/parser/cypher_kwlist.h @@ -29,6 +29,7 @@ PG_KEYWORD("match", MATCH, RESERVED_KEYWORD) PG_KEYWORD("merge", MERGE, RESERVED_KEYWORD) PG_KEYWORD("not", NOT, RESERVED_KEYWORD) PG_KEYWORD("null", NULL_P, RESERVED_KEYWORD) +PG_KEYWORD("operator", OPERATOR, RESERVED_KEYWORD) PG_KEYWORD("optional", OPTIONAL, RESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) PG_KEYWORD("order", ORDER, RESERVED_KEYWORD) From 4ce18f31bf38924ced0c266354ffed1aab8519a7 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Thu, 8 May 2025 21:31:28 +0500 Subject: [PATCH 8/9] Prevent object access hook from accesing not installed namespace (#2161) Currently we cannot install Age to shared_preload_libraries if pg_cron is installed. To prevent following error we must bail out early. postgres=# set backtrace_functions to 'get_namespace_oid'; SET postgres=# create extension pg_cron ; 2025-04-15 16:59:49.867 +05 [30402] ERROR: schema "ag_catalog" does not exist 2025-04-15 16:59:49.867 +05 [30402] BACKTRACE: 2 postgres 0x0000000102401ab0 get_namespace_oid + 204 3 age.so 0x0000000103285cd0 ag_catalog_namespace_id + 28 4 age.so 0x00000001032846fc ag_relation_id + 32 5 age.so 0x00000001032efe9c search_label_relation_cache_miss + 84 6 age.so 0x00000001032efe30 search_label_relation_cache + 100 7 age.so 0x00000001032842f4 object_access + 384 8 postgres 0x000000010240a7a0 RunObjectDropHook + 136 9 postgres 0x00000001023ee85c deleteOneObject + 108 10 postgres 0x00000001023eb860 deleteObjectsInList + 476 11 postgres 0x00000001023eba14 performMultipleDeletions + 316 12 postgres 0x0000000102560244 ATPostAlterTypeCleanup + 2144 13 postgres 0x0000000102559fb4 ATRewriteCatalogs + 516 14 postgres 0x00000001025543a8 ATController + 284 15 postgres 0x00000001025541bc AlterTable + 96 16 postgres 0x00000001028b8240 ProcessUtilitySlow + 1812 17 postgres 0x00000001028b600c standard_ProcessUtility + 3684 18 age.so 0x00000001032844f8 ag_ProcessUtility_hook + 200 19 postgres 0x00000001028b516c ProcessUtility + 392 20 postgres 0x000000010250e5b4 execute_sql_string + 812 21 postgres 0x000000010250d438 execute_extension_script + 2264 22 postgres 0x000000010250b330 ApplyExtensionUpdates + 1320 23 postgres 0x0000000102507954 CreateExtensionInternal + 1896 24 postgres 0x0000000102506ea4 CreateExtension + 1152 25 postgres 0x00000001028b8ed4 ProcessUtilitySlow + 5032 26 postgres 0x00000001028b600c standard_ProcessUtility + 3684 27 age.so 0x00000001032844f8 ag_ProcessUtility_hook + 200 28 postgres 0x00000001028b516c ProcessUtility + 392 29 postgres 0x00000001028b4768 PortalRunUtility + 232 30 postgres 0x00000001028b3660 PortalRunMulti + 756 31 postgres 0x00000001028b2abc PortalRun + 1008 32 postgres 0x00000001028ad870 exec_simple_query + 1436 33 postgres 0x00000001028ac990 PostgresMain + 2472 34 postgres 0x00000001027a49ac report_fork_failure_to_client + 0 35 postgres 0x00000001027a3e54 BackendStartup + 520 36 postgres 0x00000001027a29f0 ServerLoop + 812 37 postgres 0x000000010279fe0c PostmasterMain + 6484 38 postgres 0x000000010266acd0 startup_hacks + 0 39 dyld 0x000000018a3ab154 start + 2476 --- src/backend/catalog/ag_catalog.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/catalog/ag_catalog.c b/src/backend/catalog/ag_catalog.c index c0032d8d2..f4887f445 100644 --- a/src/backend/catalog/ag_catalog.c +++ b/src/backend/catalog/ag_catalog.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "catalog/dependency.h" +#include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_class_d.h" #include "catalog/pg_namespace_d.h" @@ -174,6 +175,13 @@ static void object_access(ObjectAccessType access, Oid class_id, Oid object_id, if (access != OAT_DROP) return; + /* + * Age might be installed into shared_preload_libraries before extension is + * created. In this case we must bail out from this hook. + */ + if (!OidIsValid(get_namespace_oid("ag_catalog", true))) + return; + drop_arg = arg; /* From d38b2bdb7c2b054aa1476b87f8b417ff7a1bc330 Mon Sep 17 00:00:00 2001 From: Moontasir Mahmood <53787290+Munmud@users.noreply.github.com> Date: Sun, 18 May 2025 15:56:39 +0600 Subject: [PATCH 9/9] Fix CSV import for edge with one property (#2175) - start_index marks where property fields begin. For edges, it's 4. If start_index >= total_fields, create empty properties; otherwise, parse the properties. --- src/backend/utils/load/age_load.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 815a53bac..41233ff18 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -157,7 +157,7 @@ agtype* create_agtype_from_list_i(char **header, char **fields, agtype_in_state result; size_t i; - if (start_index + 1 == fields_len) + if (start_index >= fields_len) { return create_empty_agtype(); }